1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/STLFunctionalExtras.h"
19#include "llvm/ADT/SmallBitVector.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/Analysis/AliasAnalysis.h"
24#include "llvm/Analysis/AssumeBundleQueries.h"
25#include "llvm/Analysis/AssumptionCache.h"
26#include "llvm/Analysis/InstructionSimplify.h"
27#include "llvm/Analysis/Loads.h"
28#include "llvm/Analysis/MemoryBuiltins.h"
29#include "llvm/Analysis/ValueTracking.h"
30#include "llvm/Analysis/VectorUtils.h"
31#include "llvm/IR/AttributeMask.h"
32#include "llvm/IR/Attributes.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DebugInfo.h"
38#include "llvm/IR/DerivedTypes.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/GlobalVariable.h"
41#include "llvm/IR/InlineAsm.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
44#include "llvm/IR/Instructions.h"
45#include "llvm/IR/IntrinsicInst.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsHexagon.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/Metadata.h"
53#include "llvm/IR/PatternMatch.h"
54#include "llvm/IR/ProfDataUtils.h"
55#include "llvm/IR/Statepoint.h"
56#include "llvm/IR/Type.h"
57#include "llvm/IR/User.h"
58#include "llvm/IR/Value.h"
59#include "llvm/IR/ValueHandle.h"
60#include "llvm/Support/AtomicOrdering.h"
61#include "llvm/Support/Casting.h"
62#include "llvm/Support/CommandLine.h"
63#include "llvm/Support/Compiler.h"
64#include "llvm/Support/Debug.h"
65#include "llvm/Support/ErrorHandling.h"
66#include "llvm/Support/KnownBits.h"
67#include "llvm/Support/KnownFPClass.h"
68#include "llvm/Support/MathExtras.h"
69#include "llvm/Support/TypeSize.h"
70#include "llvm/Support/raw_ostream.h"
71#include "llvm/Transforms/InstCombine/InstCombiner.h"
72#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
73#include "llvm/Transforms/Utils/Local.h"
74#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
75#include <algorithm>
76#include <cassert>
77#include <cstdint>
78#include <optional>
79#include <utility>
80#include <vector>
81
82#define DEBUG_TYPE "instcombine"
83#include "llvm/Transforms/Utils/InstructionWorklist.h"
84
85using namespace llvm;
86using namespace PatternMatch;
87
88STATISTIC(NumSimplified, "Number of library calls simplified");
89
90static cl::opt<unsigned> GuardWideningWindow(
91 "instcombine-guard-widening-window",
92 cl::init(Val: 3),
93 cl::desc("How wide an instruction window to bypass looking for "
94 "another guard"));
95
96/// Return the specified type promoted as it would be to pass though a va_arg
97/// area.
98static Type *getPromotedType(Type *Ty) {
99 if (IntegerType* ITy = dyn_cast<IntegerType>(Val: Ty)) {
100 if (ITy->getBitWidth() < 32)
101 return Type::getInt32Ty(C&: Ty->getContext());
102 }
103 return Ty;
104}
105
106/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
107/// TODO: This should probably be integrated with visitAllocSites, but that
108/// requires a deeper change to allow either unread or unwritten objects.
109static bool hasUndefSource(AnyMemTransferInst *MI) {
110 auto *Src = MI->getRawSource();
111 while (isa<GetElementPtrInst>(Val: Src)) {
112 if (!Src->hasOneUse())
113 return false;
114 Src = cast<Instruction>(Val: Src)->getOperand(i: 0);
115 }
116 return isa<AllocaInst>(Val: Src) && Src->hasOneUse();
117}
118
119Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
120 Align DstAlign = getKnownAlignment(V: MI->getRawDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
121 MaybeAlign CopyDstAlign = MI->getDestAlign();
122 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
123 MI->setDestAlignment(DstAlign);
124 return MI;
125 }
126
127 Align SrcAlign = getKnownAlignment(V: MI->getRawSource(), DL, CxtI: MI, AC: &AC, DT: &DT);
128 MaybeAlign CopySrcAlign = MI->getSourceAlign();
129 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
130 MI->setSourceAlignment(SrcAlign);
131 return MI;
132 }
133
134 // If we have a store to a location which is known constant, we can conclude
135 // that the store must be storing the constant value (else the memory
136 // wouldn't be constant), and this must be a noop.
137 if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
138 // Set the size of the copy to 0, it will be deleted on the next iteration.
139 MI->setLength((uint64_t)0);
140 return MI;
141 }
142
143 // If the source is provably undef, the memcpy/memmove doesn't do anything
144 // (unless the transfer is volatile).
145 if (hasUndefSource(MI) && !MI->isVolatile()) {
146 // Set the size of the copy to 0, it will be deleted on the next iteration.
147 MI->setLength((uint64_t)0);
148 return MI;
149 }
150
151 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
152 // load/store.
153 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(Val: MI->getLength());
154 if (!MemOpLength) return nullptr;
155
156 // Source and destination pointer types are always "i8*" for intrinsic. See
157 // if the size is something we can handle with a single primitive load/store.
158 // A single load+store correctly handles overlapping memory in the memmove
159 // case.
160 uint64_t Size = MemOpLength->getLimitedValue();
161 assert(Size && "0-sized memory transferring should be removed already.");
162
163 if (Size > 8 || (Size&(Size-1)))
164 return nullptr; // If not 1/2/4/8 bytes, exit.
165
166 // If it is an atomic and alignment is less than the size then we will
167 // introduce the unaligned memory access which will be later transformed
168 // into libcall in CodeGen. This is not evident performance gain so disable
169 // it now.
170 if (MI->isAtomic())
171 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
172 return nullptr;
173
174 // Use an integer load+store unless we can find something better.
175 IntegerType* IntType = IntegerType::get(C&: MI->getContext(), NumBits: Size<<3);
176
177 // If the memcpy has metadata describing the members, see if we can get the
178 // TBAA, scope and noalias tags describing our copy.
179 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(AccessSize: Size);
180
181 Value *Src = MI->getArgOperand(i: 1);
182 Value *Dest = MI->getArgOperand(i: 0);
183 LoadInst *L = Builder.CreateLoad(Ty: IntType, Ptr: Src);
184 // Alignment from the mem intrinsic will be better, so use it.
185 L->setAlignment(*CopySrcAlign);
186 L->setAAMetadata(AACopyMD);
187 MDNode *LoopMemParallelMD =
188 MI->getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
189 if (LoopMemParallelMD)
190 L->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
191 MDNode *AccessGroupMD = MI->getMetadata(KindID: LLVMContext::MD_access_group);
192 if (AccessGroupMD)
193 L->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
194
195 StoreInst *S = Builder.CreateStore(Val: L, Ptr: Dest);
196 // Alignment from the mem intrinsic will be better, so use it.
197 S->setAlignment(*CopyDstAlign);
198 S->setAAMetadata(AACopyMD);
199 if (LoopMemParallelMD)
200 S->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
201 if (AccessGroupMD)
202 S->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
203 S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
204
205 if (auto *MT = dyn_cast<MemTransferInst>(Val: MI)) {
206 // non-atomics can be volatile
207 L->setVolatile(MT->isVolatile());
208 S->setVolatile(MT->isVolatile());
209 }
210 if (MI->isAtomic()) {
211 // atomics have to be unordered
212 L->setOrdering(AtomicOrdering::Unordered);
213 S->setOrdering(AtomicOrdering::Unordered);
214 }
215
216 // Set the size of the copy to 0, it will be deleted on the next iteration.
217 MI->setLength((uint64_t)0);
218 return MI;
219}
220
221Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
222 const Align KnownAlignment =
223 getKnownAlignment(V: MI->getDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
224 MaybeAlign MemSetAlign = MI->getDestAlign();
225 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
226 MI->setDestAlignment(KnownAlignment);
227 return MI;
228 }
229
230 // If we have a store to a location which is known constant, we can conclude
231 // that the store must be storing the constant value (else the memory
232 // wouldn't be constant), and this must be a noop.
233 if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
234 // Set the size of the copy to 0, it will be deleted on the next iteration.
235 MI->setLength((uint64_t)0);
236 return MI;
237 }
238
239 // Remove memset with an undef value.
240 // FIXME: This is technically incorrect because it might overwrite a poison
241 // value. Change to PoisonValue once #52930 is resolved.
242 if (isa<UndefValue>(Val: MI->getValue())) {
243 // Set the size of the copy to 0, it will be deleted on the next iteration.
244 MI->setLength((uint64_t)0);
245 return MI;
246 }
247
248 // Extract the length and alignment and fill if they are constant.
249 ConstantInt *LenC = dyn_cast<ConstantInt>(Val: MI->getLength());
250 ConstantInt *FillC = dyn_cast<ConstantInt>(Val: MI->getValue());
251 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(Bitwidth: 8))
252 return nullptr;
253 const uint64_t Len = LenC->getLimitedValue();
254 assert(Len && "0-sized memory setting should be removed already.");
255 const Align Alignment = MI->getDestAlign().valueOrOne();
256
257 // If it is an atomic and alignment is less than the size then we will
258 // introduce the unaligned memory access which will be later transformed
259 // into libcall in CodeGen. This is not evident performance gain so disable
260 // it now.
261 if (MI->isAtomic() && Alignment < Len)
262 return nullptr;
263
264 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
265 if (Len <= 8 && isPowerOf2_32(Value: (uint32_t)Len)) {
266 Value *Dest = MI->getDest();
267
268 // Extract the fill value and store.
269 Constant *FillVal = ConstantInt::get(
270 Context&: MI->getContext(), V: APInt::getSplat(NewLen: Len * 8, V: FillC->getValue()));
271 StoreInst *S = Builder.CreateStore(Val: FillVal, Ptr: Dest, isVolatile: MI->isVolatile());
272 S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
273 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(Inst: S)) {
274 if (llvm::is_contained(Range: DbgAssign->location_ops(), Element: FillC))
275 DbgAssign->replaceVariableLocationOp(OldValue: FillC, NewValue: FillVal);
276 }
277
278 S->setAlignment(Alignment);
279 if (MI->isAtomic())
280 S->setOrdering(AtomicOrdering::Unordered);
281
282 // Set the size of the copy to 0, it will be deleted on the next iteration.
283 MI->setLength((uint64_t)0);
284 return MI;
285 }
286
287 return nullptr;
288}
289
290// TODO, Obvious Missing Transforms:
291// * Narrow width by halfs excluding zero/undef lanes
292Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
293 Value *LoadPtr = II.getArgOperand(i: 0);
294 const Align Alignment = II.getParamAlign(ArgNo: 0).valueOrOne();
295
296 // If the mask is all ones or undefs, this is a plain vector load of the 1st
297 // argument.
298 if (maskIsAllOneOrUndef(Mask: II.getArgOperand(i: 1))) {
299 LoadInst *L = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
300 Name: "unmaskedload");
301 L->copyMetadata(SrcInst: II);
302 return L;
303 }
304
305 // If we can unconditionally load from this address, replace with a
306 // load/select idiom. TODO: use DT for context sensitive query
307 if (isDereferenceablePointer(V: LoadPtr, Ty: II.getType(),
308 DL: II.getDataLayout(), CtxI: &II, AC: &AC)) {
309 LoadInst *LI = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
310 Name: "unmaskedload");
311 LI->copyMetadata(SrcInst: II);
312 return Builder.CreateSelect(C: II.getArgOperand(i: 1), True: LI, False: II.getArgOperand(i: 2));
313 }
314
315 return nullptr;
316}
317
318// TODO, Obvious Missing Transforms:
319// * Single constant active lane -> store
320// * Narrow width by halfs excluding zero/undef lanes
321Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
322 Value *StorePtr = II.getArgOperand(i: 1);
323 Align Alignment = II.getParamAlign(ArgNo: 1).valueOrOne();
324 auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: 2));
325 if (!ConstMask)
326 return nullptr;
327
328 // If the mask is all zeros, this instruction does nothing.
329 if (maskIsAllZeroOrUndef(Mask: ConstMask))
330 return eraseInstFromFunction(I&: II);
331
332 // If the mask is all ones, this is a plain vector store of the 1st argument.
333 if (maskIsAllOneOrUndef(Mask: ConstMask)) {
334 StoreInst *S =
335 new StoreInst(II.getArgOperand(i: 0), StorePtr, false, Alignment);
336 S->copyMetadata(SrcInst: II);
337 return S;
338 }
339
340 if (isa<ScalableVectorType>(Val: ConstMask->getType()))
341 return nullptr;
342
343 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
344 APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
345 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
346 if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: 0), DemandedElts,
347 PoisonElts))
348 return replaceOperand(I&: II, OpNum: 0, V);
349
350 return nullptr;
351}
352
353// TODO, Obvious Missing Transforms:
354// * Single constant active lane load -> load
355// * Dereferenceable address & few lanes -> scalarize speculative load/selects
356// * Adjacent vector addresses -> masked.load
357// * Narrow width by halfs excluding zero/undef lanes
358// * Vector incrementing address -> vector masked load
359Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
360 auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: 1));
361 if (!ConstMask)
362 return nullptr;
363
364 // Vector splat address w/known mask -> scalar load
365 // Fold the gather to load the source vector first lane
366 // because it is reloading the same value each time
367 if (ConstMask->isAllOnesValue())
368 if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: 0))) {
369 auto *VecTy = cast<VectorType>(Val: II.getType());
370 const Align Alignment = II.getParamAlign(ArgNo: 0).valueOrOne();
371 LoadInst *L = Builder.CreateAlignedLoad(Ty: VecTy->getElementType(), Ptr: SplatPtr,
372 Align: Alignment, Name: "load.scalar");
373 Value *Shuf =
374 Builder.CreateVectorSplat(EC: VecTy->getElementCount(), V: L, Name: "broadcast");
375 return replaceInstUsesWith(I&: II, V: cast<Instruction>(Val: Shuf));
376 }
377
378 return nullptr;
379}
380
381// TODO, Obvious Missing Transforms:
382// * Single constant active lane -> store
383// * Adjacent vector addresses -> masked.store
384// * Narrow store width by halfs excluding zero/undef lanes
385// * Vector incrementing address -> vector masked store
386Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
387 auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: 2));
388 if (!ConstMask)
389 return nullptr;
390
391 // If the mask is all zeros, a scatter does nothing.
392 if (maskIsAllZeroOrUndef(Mask: ConstMask))
393 return eraseInstFromFunction(I&: II);
394
395 // Vector splat address -> scalar store
396 if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: 1))) {
397 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
398 if (auto *SplatValue = getSplatValue(V: II.getArgOperand(i: 0))) {
399 if (maskContainsAllOneOrUndef(Mask: ConstMask)) {
400 Align Alignment = II.getParamAlign(ArgNo: 1).valueOrOne();
401 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
402 Alignment);
403 S->copyMetadata(SrcInst: II);
404 return S;
405 }
406 }
407 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
408 // lastlane), ptr
409 if (ConstMask->isAllOnesValue()) {
410 Align Alignment = II.getParamAlign(ArgNo: 1).valueOrOne();
411 VectorType *WideLoadTy = cast<VectorType>(Val: II.getArgOperand(i: 1)->getType());
412 ElementCount VF = WideLoadTy->getElementCount();
413 Value *RunTimeVF = Builder.CreateElementCount(Ty: Builder.getInt32Ty(), EC: VF);
414 Value *LastLane = Builder.CreateSub(LHS: RunTimeVF, RHS: Builder.getInt32(C: 1));
415 Value *Extract =
416 Builder.CreateExtractElement(Vec: II.getArgOperand(i: 0), Idx: LastLane);
417 StoreInst *S =
418 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
419 S->copyMetadata(SrcInst: II);
420 return S;
421 }
422 }
423 if (isa<ScalableVectorType>(Val: ConstMask->getType()))
424 return nullptr;
425
426 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
427 APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
428 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
429 if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: 0), DemandedElts,
430 PoisonElts))
431 return replaceOperand(I&: II, OpNum: 0, V);
432 if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: 1), DemandedElts,
433 PoisonElts))
434 return replaceOperand(I&: II, OpNum: 1, V);
435
436 return nullptr;
437}
438
439/// This function transforms launder.invariant.group and strip.invariant.group
440/// like:
441/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
442/// launder(strip(%x)) -> launder(%x)
443/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
444/// strip(launder(%x)) -> strip(%x)
445/// This is legal because it preserves the most recent information about
446/// the presence or absence of invariant.group.
447static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
448 InstCombinerImpl &IC) {
449 auto *Arg = II.getArgOperand(i: 0);
450 auto *StrippedArg = Arg->stripPointerCasts();
451 auto *StrippedInvariantGroupsArg = StrippedArg;
452 while (auto *Intr = dyn_cast<IntrinsicInst>(Val: StrippedInvariantGroupsArg)) {
453 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
454 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
455 break;
456 StrippedInvariantGroupsArg = Intr->getArgOperand(i: 0)->stripPointerCasts();
457 }
458 if (StrippedArg == StrippedInvariantGroupsArg)
459 return nullptr; // No launders/strips to remove.
460
461 Value *Result = nullptr;
462
463 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
464 Result = IC.Builder.CreateLaunderInvariantGroup(Ptr: StrippedInvariantGroupsArg);
465 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
466 Result = IC.Builder.CreateStripInvariantGroup(Ptr: StrippedInvariantGroupsArg);
467 else
468 llvm_unreachable(
469 "simplifyInvariantGroupIntrinsic only handles launder and strip");
470 if (Result->getType()->getPointerAddressSpace() !=
471 II.getType()->getPointerAddressSpace())
472 Result = IC.Builder.CreateAddrSpaceCast(V: Result, DestTy: II.getType());
473
474 return cast<Instruction>(Val: Result);
475}
476
477static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
478 assert((II.getIntrinsicID() == Intrinsic::cttz ||
479 II.getIntrinsicID() == Intrinsic::ctlz) &&
480 "Expected cttz or ctlz intrinsic");
481 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
482 Value *Op0 = II.getArgOperand(i: 0);
483 Value *Op1 = II.getArgOperand(i: 1);
484 Value *X;
485 // ctlz(bitreverse(x)) -> cttz(x)
486 // cttz(bitreverse(x)) -> ctlz(x)
487 if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X)))) {
488 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
489 Function *F =
490 Intrinsic::getOrInsertDeclaration(M: II.getModule(), id: ID, Tys: II.getType());
491 return CallInst::Create(Func: F, Args: {X, II.getArgOperand(i: 1)});
492 }
493
494 if (II.getType()->isIntOrIntVectorTy(BitWidth: 1)) {
495 // ctlz/cttz i1 Op0 --> not Op0
496 if (match(V: Op1, P: m_Zero()))
497 return BinaryOperator::CreateNot(Op: Op0);
498 // If zero is poison, then the input can be assumed to be "true", so the
499 // instruction simplifies to "false".
500 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
501 return IC.replaceInstUsesWith(I&: II, V: ConstantInt::getNullValue(Ty: II.getType()));
502 }
503
504 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
505 if (II.hasOneUse() && match(V: Op1, P: m_Zero()) &&
506 match(V: II.user_back(), P: m_Shift(L: m_Value(), R: m_Specific(V: &II)))) {
507 II.dropUBImplyingAttrsAndMetadata();
508 return IC.replaceOperand(I&: II, OpNum: 1, V: IC.Builder.getTrue());
509 }
510
511 Constant *C;
512
513 if (IsTZ) {
514 // cttz(-x) -> cttz(x)
515 if (match(V: Op0, P: m_Neg(V: m_Value(V&: X))))
516 return IC.replaceOperand(I&: II, OpNum: 0, V: X);
517
518 // cttz(-x & x) -> cttz(x)
519 if (match(V: Op0, P: m_c_And(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
520 return IC.replaceOperand(I&: II, OpNum: 0, V: X);
521
522 // cttz(sext(x)) -> cttz(zext(x))
523 if (match(V: Op0, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
524 auto *Zext = IC.Builder.CreateZExt(V: X, DestTy: II.getType());
525 auto *CttzZext =
526 IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: Zext, RHS: Op1);
527 return IC.replaceInstUsesWith(I&: II, V: CttzZext);
528 }
529
530 // Zext doesn't change the number of trailing zeros, so narrow:
531 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
532 if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X)))) && match(V: Op1, P: m_One())) {
533 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: X,
534 RHS: IC.Builder.getTrue());
535 auto *ZextCttz = IC.Builder.CreateZExt(V: Cttz, DestTy: II.getType());
536 return IC.replaceInstUsesWith(I&: II, V: ZextCttz);
537 }
538
539 // cttz(abs(x)) -> cttz(x)
540 // cttz(nabs(x)) -> cttz(x)
541 Value *Y;
542 SelectPatternFlavor SPF = matchSelectPattern(V: Op0, LHS&: X, RHS&: Y).Flavor;
543 if (SPF == SPF_ABS || SPF == SPF_NABS)
544 return IC.replaceOperand(I&: II, OpNum: 0, V: X);
545
546 if (match(V: Op0, P: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: X))))
547 return IC.replaceOperand(I&: II, OpNum: 0, V: X);
548
549 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
550 if (match(V: Op0, P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
551 match(V: Op1, P: m_One())) {
552 Value *ConstCttz =
553 IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1);
554 return BinaryOperator::CreateAdd(V1: ConstCttz, V2: X);
555 }
556
557 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
558 if (match(V: Op0, P: m_Exact(SubPattern: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) &&
559 match(V: Op1, P: m_One())) {
560 Value *ConstCttz =
561 IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1);
562 return BinaryOperator::CreateSub(V1: ConstCttz, V2: X);
563 }
564
565 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
566 if (match(V: Op0, P: m_Add(L: m_LShr(L: m_AllOnes(), R: m_Value(V&: X)), R: m_One()))) {
567 Value *Width =
568 ConstantInt::get(Ty: II.getType(), V: II.getType()->getScalarSizeInBits());
569 return BinaryOperator::CreateSub(V1: Width, V2: X);
570 }
571 } else {
572 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
573 if (match(V: Op0, P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
574 match(V: Op1, P: m_One())) {
575 Value *ConstCtlz =
576 IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1);
577 return BinaryOperator::CreateAdd(V1: ConstCtlz, V2: X);
578 }
579
580 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
581 if (match(V: Op0, P: m_NUWShl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
582 match(V: Op1, P: m_One())) {
583 Value *ConstCtlz =
584 IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1);
585 return BinaryOperator::CreateSub(V1: ConstCtlz, V2: X);
586 }
587
588 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
589 if (Op0->hasOneUse() &&
590 match(V: Op0,
591 P: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) {
592 Type *Ty = II.getType();
593 unsigned BitWidth = Ty->getScalarSizeInBits();
594 auto *Cttz = IC.Builder.CreateIntrinsic(ID: Intrinsic::cttz, Types: Ty,
595 Args: {X, IC.Builder.getFalse()});
596 auto *Bw = ConstantInt::get(Ty, V: APInt(BitWidth, BitWidth));
597 return IC.replaceInstUsesWith(I&: II, V: IC.Builder.CreateSub(LHS: Bw, RHS: Cttz));
598 }
599 }
600
601 // cttz(Pow2) -> Log2(Pow2)
602 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
603 if (auto *R = IC.tryGetLog2(Op: Op0, AssumeNonZero: match(V: Op1, P: m_One()))) {
604 if (IsTZ)
605 return IC.replaceInstUsesWith(I&: II, V: R);
606 BinaryOperator *BO = BinaryOperator::CreateSub(
607 V1: ConstantInt::get(Ty: R->getType(), V: R->getType()->getScalarSizeInBits() - 1),
608 V2: R);
609 BO->setHasNoSignedWrap();
610 BO->setHasNoUnsignedWrap();
611 return BO;
612 }
613
614 KnownBits Known = IC.computeKnownBits(V: Op0, CxtI: &II);
615
616 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
617 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
618 : Known.countMaxLeadingZeros();
619 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
620 : Known.countMinLeadingZeros();
621
622 // If all bits above (ctlz) or below (cttz) the first known one are known
623 // zero, this value is constant.
624 // FIXME: This should be in InstSimplify because we're replacing an
625 // instruction with a constant.
626 if (PossibleZeros == DefiniteZeros) {
627 auto *C = ConstantInt::get(Ty: Op0->getType(), V: DefiniteZeros);
628 return IC.replaceInstUsesWith(I&: II, V: C);
629 }
630
631 // If the input to cttz/ctlz is known to be non-zero,
632 // then change the 'ZeroIsPoison' parameter to 'true'
633 // because we know the zero behavior can't affect the result.
634 if (!Known.One.isZero() ||
635 isKnownNonZero(V: Op0, Q: IC.getSimplifyQuery().getWithInstruction(I: &II))) {
636 if (!match(V: II.getArgOperand(i: 1), P: m_One()))
637 return IC.replaceOperand(I&: II, OpNum: 1, V: IC.Builder.getTrue());
638 }
639
640 // Add range attribute since known bits can't completely reflect what we know.
641 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
642 if (BitWidth != 1 && !II.hasRetAttr(Kind: Attribute::Range) &&
643 !II.getMetadata(KindID: LLVMContext::MD_range)) {
644 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
645 APInt(BitWidth, PossibleZeros + 1));
646 II.addRangeRetAttr(CR: Range);
647 return &II;
648 }
649
650 return nullptr;
651}
652
653static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
654 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
655 "Expected ctpop intrinsic");
656 Type *Ty = II.getType();
657 unsigned BitWidth = Ty->getScalarSizeInBits();
658 Value *Op0 = II.getArgOperand(i: 0);
659 Value *X, *Y;
660
661 // ctpop(bitreverse(x)) -> ctpop(x)
662 // ctpop(bswap(x)) -> ctpop(x)
663 if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X))) || match(V: Op0, P: m_BSwap(Op0: m_Value(V&: X))))
664 return IC.replaceOperand(I&: II, OpNum: 0, V: X);
665
666 // ctpop(rot(x)) -> ctpop(x)
667 if ((match(V: Op0, P: m_FShl(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value())) ||
668 match(V: Op0, P: m_FShr(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value()))) &&
669 X == Y)
670 return IC.replaceOperand(I&: II, OpNum: 0, V: X);
671
672 // ctpop(x | -x) -> bitwidth - cttz(x, false)
673 if (Op0->hasOneUse() &&
674 match(V: Op0, P: m_c_Or(L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X))))) {
675 auto *Cttz = IC.Builder.CreateIntrinsic(ID: Intrinsic::cttz, Types: Ty,
676 Args: {X, IC.Builder.getFalse()});
677 auto *Bw = ConstantInt::get(Ty, V: APInt(BitWidth, BitWidth));
678 return IC.replaceInstUsesWith(I&: II, V: IC.Builder.CreateSub(LHS: Bw, RHS: Cttz));
679 }
680
681 // ctpop(~x & (x - 1)) -> cttz(x, false)
682 if (match(V: Op0,
683 P: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) {
684 Function *F =
685 Intrinsic::getOrInsertDeclaration(M: II.getModule(), id: Intrinsic::cttz, Tys: Ty);
686 return CallInst::Create(Func: F, Args: {X, IC.Builder.getFalse()});
687 }
688
689 // Zext doesn't change the number of set bits, so narrow:
690 // ctpop (zext X) --> zext (ctpop X)
691 if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))))) {
692 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V: X);
693 return CastInst::Create(Instruction::ZExt, S: NarrowPop, Ty);
694 }
695
696 KnownBits Known(BitWidth);
697 IC.computeKnownBits(V: Op0, Known, CxtI: &II);
698
699 // If all bits are zero except for exactly one fixed bit, then the result
700 // must be 0 or 1, and we can get that answer by shifting to LSB:
701 // ctpop (X & 32) --> (X & 32) >> 5
702 // TODO: Investigate removing this as its likely unnecessary given the below
703 // `isKnownToBeAPowerOfTwo` check.
704 if ((~Known.Zero).isPowerOf2())
705 return BinaryOperator::CreateLShr(
706 V1: Op0, V2: ConstantInt::get(Ty, V: (~Known.Zero).exactLogBase2()));
707
708 // More generally we can also handle non-constant power of 2 patterns such as
709 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
710 // ctpop(Pow2OrZero) --> icmp ne X, 0
711 if (IC.isKnownToBeAPowerOfTwo(V: Op0, /* OrZero */ true))
712 return CastInst::Create(Instruction::ZExt,
713 S: IC.Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Op0,
714 RHS: Constant::getNullValue(Ty)),
715 Ty);
716
717 // Add range attribute since known bits can't completely reflect what we know.
718 if (BitWidth != 1) {
719 ConstantRange OldRange =
720 II.getRange().value_or(u: ConstantRange::getFull(BitWidth));
721
722 unsigned Lower = Known.countMinPopulation();
723 unsigned Upper = Known.countMaxPopulation() + 1;
724
725 if (Lower == 0 && OldRange.contains(Val: APInt::getZero(numBits: BitWidth)) &&
726 isKnownNonZero(V: Op0, Q: IC.getSimplifyQuery().getWithInstruction(I: &II)))
727 Lower = 1;
728
729 ConstantRange Range(APInt(BitWidth, Lower), APInt(BitWidth, Upper));
730 Range = Range.intersectWith(CR: OldRange, Type: ConstantRange::Unsigned);
731
732 if (Range != OldRange) {
733 II.addRangeRetAttr(CR: Range);
734 return &II;
735 }
736 }
737
738 return nullptr;
739}
740
741/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
742/// at most two source operands are actually referenced.
743static Instruction *simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC,
744 bool IsExtension) {
745 // Bail out if the mask is not a constant.
746 auto *C = dyn_cast<Constant>(Val: II.getArgOperand(i: II.arg_size() - 1));
747 if (!C)
748 return nullptr;
749
750 auto *RetTy = cast<FixedVectorType>(Val: II.getType());
751 unsigned NumIndexes = RetTy->getNumElements();
752
753 // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
754 if (!RetTy->getElementType()->isIntegerTy(Bitwidth: 8) ||
755 (NumIndexes != 8 && NumIndexes != 16))
756 return nullptr;
757
758 // For tbx instructions, the first argument is the "fallback" vector, which
759 // has the same length as the mask and return type.
760 unsigned int StartIndex = (unsigned)IsExtension;
761 auto *SourceTy =
762 cast<FixedVectorType>(Val: II.getArgOperand(i: StartIndex)->getType());
763 // Note that the element count of each source vector does *not* need to be the
764 // same as the element count of the return type and mask! All source vectors
765 // must have the same element count as each other, though.
766 unsigned NumElementsPerSource = SourceTy->getNumElements();
767
768 // There are no tbl/tbx intrinsics for which the destination size exceeds the
769 // source size. However, our definitions of the intrinsics, at least in
770 // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
771 // *could* technically happen.
772 if (NumIndexes > NumElementsPerSource)
773 return nullptr;
774
775 // The tbl/tbx intrinsics take several source operands followed by a mask
776 // operand.
777 unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
778
779 // Map input operands to shuffle indices. This also helpfully deduplicates the
780 // input arguments, in case the same value is passed as an argument multiple
781 // times.
782 SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
783 Value *ShuffleOperands[2] = {PoisonValue::get(T: SourceTy),
784 PoisonValue::get(T: SourceTy)};
785
786 int Indexes[16];
787 for (unsigned I = 0; I < NumIndexes; ++I) {
788 Constant *COp = C->getAggregateElement(Elt: I);
789
790 if (!COp || (!isa<UndefValue>(Val: COp) && !isa<ConstantInt>(Val: COp)))
791 return nullptr;
792
793 if (isa<UndefValue>(Val: COp)) {
794 Indexes[I] = -1;
795 continue;
796 }
797
798 uint64_t Index = cast<ConstantInt>(Val: COp)->getZExtValue();
799 // The index of the input argument that this index references (0 = first
800 // source argument, etc).
801 unsigned SourceOperandIndex = Index / NumElementsPerSource;
802 // The index of the element at that source operand.
803 unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
804
805 Value *SourceOperand;
806 if (SourceOperandIndex >= NumSourceOperands) {
807 // This index is out of bounds. Map it to index into either the fallback
808 // vector (tbx) or vector of zeroes (tbl).
809 SourceOperandIndex = NumSourceOperands;
810 if (IsExtension) {
811 // For out-of-bounds indices in tbx, choose the `I`th element of the
812 // fallback.
813 SourceOperand = II.getArgOperand(i: 0);
814 SourceOperandElementIndex = I;
815 } else {
816 // Otherwise, choose some element from the dummy vector of zeroes (we'll
817 // always choose the first).
818 SourceOperand = Constant::getNullValue(Ty: SourceTy);
819 SourceOperandElementIndex = 0;
820 }
821 } else {
822 SourceOperand = II.getArgOperand(i: SourceOperandIndex + StartIndex);
823 }
824
825 // The source operand may be the fallback vector, which may not have the
826 // same number of elements as the source vector. In that case, we *could*
827 // choose to extend its length with another shufflevector, but it's simpler
828 // to just bail instead.
829 if (cast<FixedVectorType>(Val: SourceOperand->getType())->getNumElements() !=
830 NumElementsPerSource)
831 return nullptr;
832
833 // We now know the source operand referenced by this index. Make it a
834 // shufflevector operand, if it isn't already.
835 unsigned NumSlots = ValueToShuffleSlot.size();
836 // This shuffle references more than two sources, and hence cannot be
837 // represented as a shufflevector.
838 if (NumSlots == 2 && !ValueToShuffleSlot.contains(Val: SourceOperand))
839 return nullptr;
840
841 auto [It, Inserted] =
842 ValueToShuffleSlot.try_emplace(Key: SourceOperand, Args&: NumSlots);
843 if (Inserted)
844 ShuffleOperands[It->getSecond()] = SourceOperand;
845
846 unsigned RemappedIndex =
847 (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
848 Indexes[I] = RemappedIndex;
849 }
850
851 Value *Shuf = IC.Builder.CreateShuffleVector(
852 V1: ShuffleOperands[0], V2: ShuffleOperands[1], Mask: ArrayRef(Indexes, NumIndexes));
853 return IC.replaceInstUsesWith(I&: II, V: Shuf);
854}
855
856// Returns true iff the 2 intrinsics have the same operands, limiting the
857// comparison to the first NumOperands.
858static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
859 unsigned NumOperands) {
860 assert(I.arg_size() >= NumOperands && "Not enough operands");
861 assert(E.arg_size() >= NumOperands && "Not enough operands");
862 for (unsigned i = 0; i < NumOperands; i++)
863 if (I.getArgOperand(i) != E.getArgOperand(i))
864 return false;
865 return true;
866}
867
868// Remove trivially empty start/end intrinsic ranges, i.e. a start
869// immediately followed by an end (ignoring debuginfo or other
870// start/end intrinsics in between). As this handles only the most trivial
871// cases, tracking the nesting level is not needed:
872//
873// call @llvm.foo.start(i1 0)
874// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
875// call @llvm.foo.end(i1 0)
876// call @llvm.foo.end(i1 0) ; &I
877static bool
878removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
879 std::function<bool(const IntrinsicInst &)> IsStart) {
880 // We start from the end intrinsic and scan backwards, so that InstCombine
881 // has already processed (and potentially removed) all the instructions
882 // before the end intrinsic.
883 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
884 for (; BI != BE; ++BI) {
885 if (auto *I = dyn_cast<IntrinsicInst>(Val: &*BI)) {
886 if (I->isDebugOrPseudoInst() ||
887 I->getIntrinsicID() == EndI.getIntrinsicID())
888 continue;
889 if (IsStart(*I)) {
890 if (haveSameOperands(I: EndI, E: *I, NumOperands: EndI.arg_size())) {
891 IC.eraseInstFromFunction(I&: *I);
892 IC.eraseInstFromFunction(I&: EndI);
893 return true;
894 }
895 // Skip start intrinsics that don't pair with this end intrinsic.
896 continue;
897 }
898 }
899 break;
900 }
901
902 return false;
903}
904
905Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
906 removeTriviallyEmptyRange(EndI&: I, IC&: *this, IsStart: [&I](const IntrinsicInst &II) {
907 // Bail out on the case where the source va_list of a va_copy is destroyed
908 // immediately by a follow-up va_end.
909 return II.getIntrinsicID() == Intrinsic::vastart ||
910 (II.getIntrinsicID() == Intrinsic::vacopy &&
911 I.getArgOperand(i: 0) != II.getArgOperand(i: 1));
912 });
913 return nullptr;
914}
915
916static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
917 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
918 Value *Arg0 = Call.getArgOperand(i: 0), *Arg1 = Call.getArgOperand(i: 1);
919 if (isa<Constant>(Val: Arg0) && !isa<Constant>(Val: Arg1)) {
920 Call.setArgOperand(i: 0, v: Arg1);
921 Call.setArgOperand(i: 1, v: Arg0);
922 return &Call;
923 }
924 return nullptr;
925}
926
927/// Creates a result tuple for an overflow intrinsic \p II with a given
928/// \p Result and a constant \p Overflow value.
929static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result,
930 Constant *Overflow) {
931 Constant *V[] = {PoisonValue::get(T: Result->getType()), Overflow};
932 StructType *ST = cast<StructType>(Val: II->getType());
933 Constant *Struct = ConstantStruct::get(T: ST, V);
934 return InsertValueInst::Create(Agg: Struct, Val: Result, Idxs: 0);
935}
936
937Instruction *
938InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
939 WithOverflowInst *WO = cast<WithOverflowInst>(Val: II);
940 Value *OperationResult = nullptr;
941 Constant *OverflowResult = nullptr;
942 if (OptimizeOverflowCheck(BinaryOp: WO->getBinaryOp(), IsSigned: WO->isSigned(), LHS: WO->getLHS(),
943 RHS: WO->getRHS(), CtxI&: *WO, OperationResult, OverflowResult))
944 return createOverflowTuple(II: WO, Result: OperationResult, Overflow: OverflowResult);
945
946 // See whether we can optimize the overflow check with assumption information.
947 for (User *U : WO->users()) {
948 if (!match(V: U, P: m_ExtractValue<1>(V: m_Value())))
949 continue;
950
951 for (auto &AssumeVH : AC.assumptionsFor(V: U)) {
952 if (!AssumeVH)
953 continue;
954 CallInst *I = cast<CallInst>(Val&: AssumeVH);
955 if (!match(V: I->getArgOperand(i: 0), P: m_Not(V: m_Specific(V: U))))
956 continue;
957 if (!isValidAssumeForContext(I, CxtI: II, /*DT=*/nullptr,
958 /*AllowEphemerals=*/true))
959 continue;
960 Value *Result =
961 Builder.CreateBinOp(Opc: WO->getBinaryOp(), LHS: WO->getLHS(), RHS: WO->getRHS());
962 Result->takeName(V: WO);
963 if (auto *Inst = dyn_cast<Instruction>(Val: Result)) {
964 if (WO->isSigned())
965 Inst->setHasNoSignedWrap();
966 else
967 Inst->setHasNoUnsignedWrap();
968 }
969 return createOverflowTuple(II: WO, Result,
970 Overflow: ConstantInt::getFalse(Ty: U->getType()));
971 }
972 }
973
974 return nullptr;
975}
976
977static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
978 Ty = Ty->getScalarType();
979 return F.getDenormalMode(FPType: Ty->getFltSemantics()).Input == DenormalMode::IEEE;
980}
981
982static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
983 Ty = Ty->getScalarType();
984 return F.getDenormalMode(FPType: Ty->getFltSemantics()).inputsAreZero();
985}
986
987/// \returns the compare predicate type if the test performed by
988/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
989/// floating-point environment assumed for \p F for type \p Ty
990static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask,
991 const Function &F, Type *Ty) {
992 switch (static_cast<unsigned>(Mask)) {
993 case fcZero:
994 if (inputDenormalIsIEEE(F, Ty))
995 return FCmpInst::FCMP_OEQ;
996 break;
997 case fcZero | fcSubnormal:
998 if (inputDenormalIsDAZ(F, Ty))
999 return FCmpInst::FCMP_OEQ;
1000 break;
1001 case fcPositive | fcNegZero:
1002 if (inputDenormalIsIEEE(F, Ty))
1003 return FCmpInst::FCMP_OGE;
1004 break;
1005 case fcPositive | fcNegZero | fcNegSubnormal:
1006 if (inputDenormalIsDAZ(F, Ty))
1007 return FCmpInst::FCMP_OGE;
1008 break;
1009 case fcPosSubnormal | fcPosNormal | fcPosInf:
1010 if (inputDenormalIsIEEE(F, Ty))
1011 return FCmpInst::FCMP_OGT;
1012 break;
1013 case fcNegative | fcPosZero:
1014 if (inputDenormalIsIEEE(F, Ty))
1015 return FCmpInst::FCMP_OLE;
1016 break;
1017 case fcNegative | fcPosZero | fcPosSubnormal:
1018 if (inputDenormalIsDAZ(F, Ty))
1019 return FCmpInst::FCMP_OLE;
1020 break;
1021 case fcNegSubnormal | fcNegNormal | fcNegInf:
1022 if (inputDenormalIsIEEE(F, Ty))
1023 return FCmpInst::FCMP_OLT;
1024 break;
1025 case fcPosNormal | fcPosInf:
1026 if (inputDenormalIsDAZ(F, Ty))
1027 return FCmpInst::FCMP_OGT;
1028 break;
1029 case fcNegNormal | fcNegInf:
1030 if (inputDenormalIsDAZ(F, Ty))
1031 return FCmpInst::FCMP_OLT;
1032 break;
1033 case ~fcZero & ~fcNan:
1034 if (inputDenormalIsIEEE(F, Ty))
1035 return FCmpInst::FCMP_ONE;
1036 break;
1037 case ~(fcZero | fcSubnormal) & ~fcNan:
1038 if (inputDenormalIsDAZ(F, Ty))
1039 return FCmpInst::FCMP_ONE;
1040 break;
1041 default:
1042 break;
1043 }
1044
1045 return FCmpInst::BAD_FCMP_PREDICATE;
1046}
1047
1048Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1049 Value *Src0 = II.getArgOperand(i: 0);
1050 Value *Src1 = II.getArgOperand(i: 1);
1051 const ConstantInt *CMask = cast<ConstantInt>(Val: Src1);
1052 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1053 const bool IsUnordered = (Mask & fcNan) == fcNan;
1054 const bool IsOrdered = (Mask & fcNan) == fcNone;
1055 const FPClassTest OrderedMask = Mask & ~fcNan;
1056 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1057
1058 const bool IsStrict =
1059 II.getFunction()->getAttributes().hasFnAttr(Kind: Attribute::StrictFP);
1060
1061 Value *FNegSrc;
1062 if (match(V: Src0, P: m_FNeg(X: m_Value(V&: FNegSrc)))) {
1063 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1064
1065 II.setArgOperand(i: 1, v: ConstantInt::get(Ty: Src1->getType(), V: fneg(Mask)));
1066 return replaceOperand(I&: II, OpNum: 0, V: FNegSrc);
1067 }
1068
1069 Value *FAbsSrc;
1070 if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: FAbsSrc)))) {
1071 II.setArgOperand(i: 1, v: ConstantInt::get(Ty: Src1->getType(), V: inverse_fabs(Mask)));
1072 return replaceOperand(I&: II, OpNum: 0, V: FAbsSrc);
1073 }
1074
1075 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
1076 (IsOrdered || IsUnordered) && !IsStrict) {
1077 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1078 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1079 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1080 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1081 Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType());
1082 FCmpInst::Predicate Pred =
1083 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1084 if (OrderedInvertedMask == fcInf)
1085 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1086
1087 Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Src0);
1088 Value *CmpInf = Builder.CreateFCmp(P: Pred, LHS: Fabs, RHS: Inf);
1089 CmpInf->takeName(V: &II);
1090 return replaceInstUsesWith(I&: II, V: CmpInf);
1091 }
1092
1093 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1094 (IsOrdered || IsUnordered) && !IsStrict) {
1095 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1096 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1097 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1098 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1099 Constant *Inf =
1100 ConstantFP::getInfinity(Ty: Src0->getType(), Negative: OrderedMask == fcNegInf);
1101 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(LHS: Src0, RHS: Inf)
1102 : Builder.CreateFCmpOEQ(LHS: Src0, RHS: Inf);
1103
1104 EqInf->takeName(V: &II);
1105 return replaceInstUsesWith(I&: II, V: EqInf);
1106 }
1107
1108 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1109 (IsOrdered || IsUnordered) && !IsStrict) {
1110 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1111 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1112 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1113 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1114 Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType(),
1115 Negative: OrderedInvertedMask == fcNegInf);
1116 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(LHS: Src0, RHS: Inf)
1117 : Builder.CreateFCmpONE(LHS: Src0, RHS: Inf);
1118 NeInf->takeName(V: &II);
1119 return replaceInstUsesWith(I&: II, V: NeInf);
1120 }
1121
1122 if (Mask == fcNan && !IsStrict) {
1123 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1124 // exceptions.
1125 Value *IsNan =
1126 Builder.CreateFCmpUNO(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
1127 IsNan->takeName(V: &II);
1128 return replaceInstUsesWith(I&: II, V: IsNan);
1129 }
1130
1131 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1132 // Equivalent of !isnan. Replace with standard fcmp.
1133 Value *FCmp =
1134 Builder.CreateFCmpORD(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
1135 FCmp->takeName(V: &II);
1136 return replaceInstUsesWith(I&: II, V: FCmp);
1137 }
1138
1139 FCmpInst::Predicate PredType = FCmpInst::BAD_FCMP_PREDICATE;
1140
1141 // Try to replace with an fcmp with 0
1142 //
1143 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1144 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1145 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1146 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1147 //
1148 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1149 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1150 //
1151 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1152 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1153 //
1154 if (!IsStrict && (IsOrdered || IsUnordered) &&
1155 (PredType = fpclassTestIsFCmp0(Mask: OrderedMask, F: *II.getFunction(),
1156 Ty: Src0->getType())) !=
1157 FCmpInst::BAD_FCMP_PREDICATE) {
1158 Constant *Zero = ConstantFP::getZero(Ty: Src0->getType());
1159 // Equivalent of == 0.
1160 Value *FCmp = Builder.CreateFCmp(
1161 P: IsUnordered ? FCmpInst::getUnorderedPredicate(Pred: PredType) : PredType,
1162 LHS: Src0, RHS: Zero);
1163
1164 FCmp->takeName(V: &II);
1165 return replaceInstUsesWith(I&: II, V: FCmp);
1166 }
1167
1168 KnownFPClass Known = computeKnownFPClass(Val: Src0, Interested: Mask, CtxI: &II);
1169
1170 // Clear test bits we know must be false from the source value.
1171 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1172 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1173 if ((Mask & Known.KnownFPClasses) != Mask) {
1174 II.setArgOperand(
1175 i: 1, v: ConstantInt::get(Ty: Src1->getType(), V: Mask & Known.KnownFPClasses));
1176 return &II;
1177 }
1178
1179 // If none of the tests which can return false are possible, fold to true.
1180 // fp_class (nnan x), ~(qnan|snan) -> true
1181 // fp_class (ninf x), ~(ninf|pinf) -> true
1182 if (Mask == Known.KnownFPClasses)
1183 return replaceInstUsesWith(I&: II, V: ConstantInt::get(Ty: II.getType(), V: true));
1184
1185 return nullptr;
1186}
1187
1188static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1189 KnownBits Known = computeKnownBits(V: Op, Q: SQ);
1190 if (Known.isNonNegative())
1191 return false;
1192 if (Known.isNegative())
1193 return true;
1194
1195 Value *X, *Y;
1196 if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1197 return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLT, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL);
1198
1199 return std::nullopt;
1200}
1201
1202static std::optional<bool> getKnownSignOrZero(Value *Op,
1203 const SimplifyQuery &SQ) {
1204 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1205 return Sign;
1206
1207 Value *X, *Y;
1208 if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1209 return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLE, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL);
1210
1211 return std::nullopt;
1212}
1213
1214/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1215static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1216 const SimplifyQuery &SQ) {
1217 std::optional<bool> Known1 = getKnownSign(Op: Op1, SQ);
1218 if (!Known1)
1219 return false;
1220 std::optional<bool> Known0 = getKnownSign(Op: Op0, SQ);
1221 if (!Known0)
1222 return false;
1223 return *Known0 == *Known1;
1224}
1225
1226/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1227/// can trigger other combines.
1228static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
1229 InstCombiner::BuilderTy &Builder) {
1230 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1231 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1232 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1233 "Expected a min or max intrinsic");
1234
1235 // TODO: Match vectors with undef elements, but undef may not propagate.
1236 Value *Op0 = II->getArgOperand(i: 0), *Op1 = II->getArgOperand(i: 1);
1237 Value *X;
1238 const APInt *C0, *C1;
1239 if (!match(V: Op0, P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: C0)))) ||
1240 !match(V: Op1, P: m_APInt(Res&: C1)))
1241 return nullptr;
1242
1243 // Check for necessary no-wrap and overflow constraints.
1244 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1245 auto *Add = cast<BinaryOperator>(Val: Op0);
1246 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1247 (!IsSigned && !Add->hasNoUnsignedWrap()))
1248 return nullptr;
1249
1250 // If the constant difference overflows, then instsimplify should reduce the
1251 // min/max to the add or C1.
1252 bool Overflow;
1253 APInt CDiff =
1254 IsSigned ? C1->ssub_ov(RHS: *C0, Overflow) : C1->usub_ov(RHS: *C0, Overflow);
1255 assert(!Overflow && "Expected simplify of min/max");
1256
1257 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1258 // Note: the "mismatched" no-overflow setting does not propagate.
1259 Constant *NewMinMaxC = ConstantInt::get(Ty: II->getType(), V: CDiff);
1260 Value *NewMinMax = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: NewMinMaxC);
1261 return IsSigned ? BinaryOperator::CreateNSWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: 1))
1262 : BinaryOperator::CreateNUWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: 1));
1263}
1264/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1265Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1266 Type *Ty = MinMax1.getType();
1267
1268 // We are looking for a tree of:
1269 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1270 // Where the min and max could be reversed
1271 Instruction *MinMax2;
1272 BinaryOperator *AddSub;
1273 const APInt *MinValue, *MaxValue;
1274 if (match(V: &MinMax1, P: m_SMin(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MaxValue)))) {
1275 if (!match(V: MinMax2, P: m_SMax(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MinValue))))
1276 return nullptr;
1277 } else if (match(V: &MinMax1,
1278 P: m_SMax(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MinValue)))) {
1279 if (!match(V: MinMax2, P: m_SMin(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MaxValue))))
1280 return nullptr;
1281 } else
1282 return nullptr;
1283
1284 // Check that the constants clamp a saturate, and that the new type would be
1285 // sensible to convert to.
1286 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1287 return nullptr;
1288 // In what bitwidth can this be treated as saturating arithmetics?
1289 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1290 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1291 // good first approximation for what should be done there.
1292 if (!shouldChangeType(FromBitWidth: Ty->getScalarType()->getIntegerBitWidth(), ToBitWidth: NewBitWidth))
1293 return nullptr;
1294
1295 // Also make sure that the inner min/max and the add/sub have one use.
1296 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1297 return nullptr;
1298
1299 // Create the new type (which can be a vector type)
1300 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1301
1302 Intrinsic::ID IntrinsicID;
1303 if (AddSub->getOpcode() == Instruction::Add)
1304 IntrinsicID = Intrinsic::sadd_sat;
1305 else if (AddSub->getOpcode() == Instruction::Sub)
1306 IntrinsicID = Intrinsic::ssub_sat;
1307 else
1308 return nullptr;
1309
1310 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1311 // is usually achieved via a sext from a smaller type.
1312 if (ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: 0), CxtI: AddSub) > NewBitWidth ||
1313 ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: 1), CxtI: AddSub) > NewBitWidth)
1314 return nullptr;
1315
1316 // Finally create and return the sat intrinsic, truncated to the new type
1317 Value *AT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: 0), DestTy: NewTy);
1318 Value *BT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: 1), DestTy: NewTy);
1319 Value *Sat = Builder.CreateIntrinsic(ID: IntrinsicID, Types: NewTy, Args: {AT, BT});
1320 return CastInst::Create(Instruction::SExt, S: Sat, Ty);
1321}
1322
1323
1324/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1325/// can only be one of two possible constant values -- turn that into a select
1326/// of constants.
1327static Instruction *foldClampRangeOfTwo(IntrinsicInst *II,
1328 InstCombiner::BuilderTy &Builder) {
1329 Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1);
1330 Value *X;
1331 const APInt *C0, *C1;
1332 if (!match(V: I1, P: m_APInt(Res&: C1)) || !I0->hasOneUse())
1333 return nullptr;
1334
1335 CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
1336 switch (II->getIntrinsicID()) {
1337 case Intrinsic::smax:
1338 if (match(V: I0, P: m_SMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C0 == *C1 + 1)
1339 Pred = ICmpInst::ICMP_SGT;
1340 break;
1341 case Intrinsic::smin:
1342 if (match(V: I0, P: m_SMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C1 == *C0 + 1)
1343 Pred = ICmpInst::ICMP_SLT;
1344 break;
1345 case Intrinsic::umax:
1346 if (match(V: I0, P: m_UMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C0 == *C1 + 1)
1347 Pred = ICmpInst::ICMP_UGT;
1348 break;
1349 case Intrinsic::umin:
1350 if (match(V: I0, P: m_UMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C1 == *C0 + 1)
1351 Pred = ICmpInst::ICMP_ULT;
1352 break;
1353 default:
1354 llvm_unreachable("Expected min/max intrinsic");
1355 }
1356 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1357 return nullptr;
1358
1359 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1360 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1361 Value *Cmp = Builder.CreateICmp(P: Pred, LHS: X, RHS: I1);
1362 return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty: II->getType(), V: *C0), S2: I1);
1363}
1364
1365/// If this min/max has a constant operand and an operand that is a matching
1366/// min/max with a constant operand, constant-fold the 2 constant operands.
1367static Value *reassociateMinMaxWithConstants(IntrinsicInst *II,
1368 IRBuilderBase &Builder,
1369 const SimplifyQuery &SQ) {
1370 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1371 auto *LHS = dyn_cast<MinMaxIntrinsic>(Val: II->getArgOperand(i: 0));
1372 if (!LHS)
1373 return nullptr;
1374
1375 Constant *C0, *C1;
1376 if (!match(V: LHS->getArgOperand(i: 1), P: m_ImmConstant(C&: C0)) ||
1377 !match(V: II->getArgOperand(i: 1), P: m_ImmConstant(C&: C1)))
1378 return nullptr;
1379
1380 // max (max X, C0), C1 --> max X, (max C0, C1)
1381 // min (min X, C0), C1 --> min X, (min C0, C1)
1382 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1383 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1384 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1385 if (InnerMinMaxID != MinMaxID &&
1386 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1387 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1388 isKnownNonNegative(V: C0, SQ) && isKnownNonNegative(V: C1, SQ)))
1389 return nullptr;
1390
1391 ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(ID: MinMaxID);
1392 Value *CondC = Builder.CreateICmp(P: Pred, LHS: C0, RHS: C1);
1393 Value *NewC = Builder.CreateSelect(C: CondC, True: C0, False: C1);
1394 return Builder.CreateIntrinsic(ID: InnerMinMaxID, Types: II->getType(),
1395 Args: {LHS->getArgOperand(i: 0), NewC});
1396}
1397
1398/// If this min/max has a matching min/max operand with a constant, try to push
1399/// the constant operand into this instruction. This can enable more folds.
1400static Instruction *
1401reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
1402 InstCombiner::BuilderTy &Builder) {
1403 // Match and capture a min/max operand candidate.
1404 Value *X, *Y;
1405 Constant *C;
1406 Instruction *Inner;
1407 if (!match(V: II, P: m_c_MaxOrMin(L: m_OneUse(SubPattern: m_CombineAnd(
1408 L: m_Instruction(I&: Inner),
1409 R: m_MaxOrMin(L: m_Value(V&: X), R: m_ImmConstant(C)))),
1410 R: m_Value(V&: Y))))
1411 return nullptr;
1412
1413 // The inner op must match. Check for constants to avoid infinite loops.
1414 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1415 auto *InnerMM = dyn_cast<IntrinsicInst>(Val: Inner);
1416 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1417 match(V: X, P: m_ImmConstant()) || match(V: Y, P: m_ImmConstant()))
1418 return nullptr;
1419
1420 // max (max X, C), Y --> max (max X, Y), C
1421 Function *MinMax = Intrinsic::getOrInsertDeclaration(M: II->getModule(),
1422 id: MinMaxID, Tys: II->getType());
1423 Value *NewInner = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: Y);
1424 NewInner->takeName(V: Inner);
1425 return CallInst::Create(Func: MinMax, Args: {NewInner, C});
1426}
1427
1428/// Reduce a sequence of min/max intrinsics with a common operand.
1429static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
1430 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1431 auto *LHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0));
1432 auto *RHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 1));
1433 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1434 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1435 RHS->getIntrinsicID() != MinMaxID ||
1436 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1437 return nullptr;
1438
1439 Value *A = LHS->getArgOperand(i: 0);
1440 Value *B = LHS->getArgOperand(i: 1);
1441 Value *C = RHS->getArgOperand(i: 0);
1442 Value *D = RHS->getArgOperand(i: 1);
1443
1444 // Look for a common operand.
1445 Value *MinMaxOp = nullptr;
1446 Value *ThirdOp = nullptr;
1447 if (LHS->hasOneUse()) {
1448 // If the LHS is only used in this chain and the RHS is used outside of it,
1449 // reuse the RHS min/max because that will eliminate the LHS.
1450 if (D == A || C == A) {
1451 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1452 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1453 MinMaxOp = RHS;
1454 ThirdOp = B;
1455 } else if (D == B || C == B) {
1456 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1457 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1458 MinMaxOp = RHS;
1459 ThirdOp = A;
1460 }
1461 } else {
1462 assert(RHS->hasOneUse() && "Expected one-use operand");
1463 // Reuse the LHS. This will eliminate the RHS.
1464 if (D == A || D == B) {
1465 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1466 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1467 MinMaxOp = LHS;
1468 ThirdOp = C;
1469 } else if (C == A || C == B) {
1470 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1471 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1472 MinMaxOp = LHS;
1473 ThirdOp = D;
1474 }
1475 }
1476
1477 if (!MinMaxOp || !ThirdOp)
1478 return nullptr;
1479
1480 Module *Mod = II->getModule();
1481 Function *MinMax =
1482 Intrinsic::getOrInsertDeclaration(M: Mod, id: MinMaxID, Tys: II->getType());
1483 return CallInst::Create(Func: MinMax, Args: { MinMaxOp, ThirdOp });
1484}
1485
1486/// If all arguments of the intrinsic are unary shuffles with the same mask,
1487/// try to shuffle after the intrinsic.
1488Instruction *
1489InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
1490 if (!II->getType()->isVectorTy() ||
1491 !isTriviallyVectorizable(ID: II->getIntrinsicID()) ||
1492 !II->getCalledFunction()->isSpeculatable())
1493 return nullptr;
1494
1495 Value *X;
1496 Constant *C;
1497 ArrayRef<int> Mask;
1498 auto *NonConstArg = find_if_not(Range: II->args(), P: [&II](Use &Arg) {
1499 return isa<Constant>(Val: Arg.get()) ||
1500 isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(),
1501 ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr);
1502 });
1503 if (!NonConstArg ||
1504 !match(V: NonConstArg, P: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_Mask(Mask))))
1505 return nullptr;
1506
1507 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1508 // instructions.
1509 if (none_of(Range: II->args(), P: match_fn(P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(), v2: m_Value())))))
1510 return nullptr;
1511
1512 // See if all arguments are shuffled with the same mask.
1513 SmallVector<Value *, 4> NewArgs;
1514 Type *SrcTy = X->getType();
1515 for (Use &Arg : II->args()) {
1516 if (isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(),
1517 ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr))
1518 NewArgs.push_back(Elt: Arg);
1519 else if (match(V: &Arg,
1520 P: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_SpecificMask(Mask))) &&
1521 X->getType() == SrcTy)
1522 NewArgs.push_back(Elt: X);
1523 else if (match(V: &Arg, P: m_ImmConstant(C))) {
1524 // If it's a constant, try find the constant that would be shuffled to C.
1525 if (Constant *ShuffledC =
1526 unshuffleConstant(ShMask: Mask, C, NewCTy: cast<VectorType>(Val: SrcTy)))
1527 NewArgs.push_back(Elt: ShuffledC);
1528 else
1529 return nullptr;
1530 } else
1531 return nullptr;
1532 }
1533
1534 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1535 Instruction *FPI = isa<FPMathOperator>(Val: II) ? II : nullptr;
1536 // Result type might be a different vector width.
1537 // TODO: Check that the result type isn't widened?
1538 VectorType *ResTy =
1539 VectorType::get(ElementType: II->getType()->getScalarType(), Other: cast<VectorType>(Val: SrcTy));
1540 Value *NewIntrinsic =
1541 Builder.CreateIntrinsic(RetTy: ResTy, ID: II->getIntrinsicID(), Args: NewArgs, FMFSource: FPI);
1542 return new ShuffleVectorInst(NewIntrinsic, Mask);
1543}
1544
1545/// If all arguments of the intrinsic are reverses, try to pull the reverse
1546/// after the intrinsic.
1547Value *InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst *II) {
1548 if (!isTriviallyVectorizable(ID: II->getIntrinsicID()))
1549 return nullptr;
1550
1551 // At least 1 operand must be a reverse with 1 use because we are creating 2
1552 // instructions.
1553 if (none_of(Range: II->args(), P: [](Value *V) {
1554 return match(V, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value())));
1555 }))
1556 return nullptr;
1557
1558 Value *X;
1559 Constant *C;
1560 SmallVector<Value *> NewArgs;
1561 for (Use &Arg : II->args()) {
1562 if (isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(),
1563 ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr))
1564 NewArgs.push_back(Elt: Arg);
1565 else if (match(V: &Arg, P: m_VecReverse(Op0: m_Value(V&: X))))
1566 NewArgs.push_back(Elt: X);
1567 else if (isSplatValue(V: Arg))
1568 NewArgs.push_back(Elt: Arg);
1569 else if (match(V: &Arg, P: m_ImmConstant(C)))
1570 NewArgs.push_back(Elt: Builder.CreateVectorReverse(V: C));
1571 else
1572 return nullptr;
1573 }
1574
1575 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1576 Instruction *FPI = isa<FPMathOperator>(Val: II) ? II : nullptr;
1577 Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1578 RetTy: II->getType(), ID: II->getIntrinsicID(), Args: NewArgs, FMFSource: FPI);
1579 return Builder.CreateVectorReverse(V: NewIntrinsic);
1580}
1581
1582/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1583/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1584/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1585template <Intrinsic::ID IntrID>
1586static Instruction *foldBitOrderCrossLogicOp(Value *V,
1587 InstCombiner::BuilderTy &Builder) {
1588 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1589 "This helper only supports BSWAP and BITREVERSE intrinsics");
1590
1591 Value *X, *Y;
1592 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1593 // don't match ConstantExpr that aren't meaningful for this transform.
1594 if (match(V, P: m_OneUse(SubPattern: m_BitwiseLogic(L: m_Value(V&: X), R: m_Value(V&: Y)))) &&
1595 isa<BinaryOperator>(Val: V)) {
1596 Value *OldReorderX, *OldReorderY;
1597 BinaryOperator::BinaryOps Op = cast<BinaryOperator>(Val: V)->getOpcode();
1598
1599 // If both X and Y are bswap/bitreverse, the transform reduces the number
1600 // of instructions even if there's multiuse.
1601 // If only one operand is bswap/bitreverse, we need to ensure the operand
1602 // have only one use.
1603 if (match(X, m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))) &&
1604 match(Y, m_Intrinsic<IntrID>(m_Value(V&: OldReorderY)))) {
1605 return BinaryOperator::Create(Op, S1: OldReorderX, S2: OldReorderY);
1606 }
1607
1608 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))))) {
1609 Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: Y);
1610 return BinaryOperator::Create(Op, S1: OldReorderX, S2: NewReorder);
1611 }
1612
1613 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderY))))) {
1614 Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: X);
1615 return BinaryOperator::Create(Op, S1: NewReorder, S2: OldReorderY);
1616 }
1617 }
1618 return nullptr;
1619}
1620
1621/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1622/// `f(f(x, y), y) == f(x, y)` holds.
1623static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID) {
1624 switch (IID) {
1625 case Intrinsic::smax:
1626 case Intrinsic::smin:
1627 case Intrinsic::umax:
1628 case Intrinsic::umin:
1629 case Intrinsic::maximum:
1630 case Intrinsic::minimum:
1631 case Intrinsic::maximumnum:
1632 case Intrinsic::minimumnum:
1633 case Intrinsic::maxnum:
1634 case Intrinsic::minnum:
1635 return true;
1636 default:
1637 return false;
1638 }
1639}
1640
1641/// Attempt to simplify value-accumulating recurrences of kind:
1642/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1643/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1644/// And let the idempotent binary intrinsic be hoisted, when the operands are
1645/// known to be loop-invariant.
1646static Value *foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC,
1647 IntrinsicInst *II) {
1648 PHINode *PN;
1649 Value *Init, *OtherOp;
1650
1651 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1652 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1653 auto IID = II->getIntrinsicID();
1654 if (!isIdempotentBinaryIntrinsic(IID) ||
1655 !matchSimpleBinaryIntrinsicRecurrence(I: II, P&: PN, Init, OtherOp) ||
1656 !IC.getDominatorTree().dominates(Def: OtherOp, User: PN))
1657 return nullptr;
1658
1659 auto *InvariantBinaryInst =
1660 IC.Builder.CreateBinaryIntrinsic(ID: IID, LHS: Init, RHS: OtherOp);
1661 if (isa<FPMathOperator>(Val: InvariantBinaryInst))
1662 cast<Instruction>(Val: InvariantBinaryInst)->copyFastMathFlags(I: II);
1663 return InvariantBinaryInst;
1664}
1665
1666static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1667 if (!CanReorderLanes)
1668 return nullptr;
1669
1670 Value *V;
1671 if (match(V: Arg, P: m_VecReverse(Op0: m_Value(V))))
1672 return V;
1673
1674 ArrayRef<int> Mask;
1675 if (!isa<FixedVectorType>(Val: Arg->getType()) ||
1676 !match(V: Arg, P: m_Shuffle(v1: m_Value(V), v2: m_Undef(), mask: m_Mask(Mask))) ||
1677 !cast<ShuffleVectorInst>(Val: Arg)->isSingleSource())
1678 return nullptr;
1679
1680 int Sz = Mask.size();
1681 SmallBitVector UsedIndices(Sz);
1682 for (int Idx : Mask) {
1683 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1684 return nullptr;
1685 UsedIndices.set(Idx);
1686 }
1687
1688 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1689 // other changes.
1690 return UsedIndices.all() ? V : nullptr;
1691}
1692
1693/// Fold an unsigned minimum of trailing or leading zero bits counts:
1694/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
1695/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
1696/// >> ConstOp))
1697/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
1698/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
1699template <Intrinsic::ID IntrID>
1700static Value *
1701foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
1702 const DataLayout &DL,
1703 InstCombiner::BuilderTy &Builder) {
1704 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1705 "This helper only supports cttz and ctlz intrinsics");
1706
1707 Value *CtOp1, *CtOp2;
1708 Value *ZeroUndef1, *ZeroUndef2;
1709 if (!match(I0, m_OneUse(
1710 m_Intrinsic<IntrID>(m_Value(V&: CtOp1), m_Value(V&: ZeroUndef1)))))
1711 return nullptr;
1712
1713 if (match(I1,
1714 m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: CtOp2), m_Value(V&: ZeroUndef2)))))
1715 return Builder.CreateBinaryIntrinsic(
1716 ID: IntrID, LHS: Builder.CreateOr(LHS: CtOp1, RHS: CtOp2),
1717 RHS: Builder.CreateOr(LHS: ZeroUndef1, RHS: ZeroUndef2));
1718
1719 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1720 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1721 if (!match(I1, m_CheckedInt(LessBitWidth)))
1722 // We have a constant >= BitWidth (which can be handled by CVP)
1723 // or a non-splat vector with elements < and >= BitWidth
1724 return nullptr;
1725
1726 Type *Ty = I1->getType();
1727 Constant *NewConst = ConstantFoldBinaryOpOperands(
1728 Opcode: IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1729 LHS: IntrID == Intrinsic::cttz
1730 ? ConstantInt::get(Ty, V: 1)
1731 : ConstantInt::get(Ty, V: APInt::getSignedMinValue(numBits: BitWidth)),
1732 RHS: cast<Constant>(Val: I1), DL);
1733 return Builder.CreateBinaryIntrinsic(
1734 ID: IntrID, LHS: Builder.CreateOr(LHS: CtOp1, RHS: NewConst),
1735 RHS: ConstantInt::getTrue(Ty: ZeroUndef1->getType()));
1736}
1737
1738/// Return whether "X LOp (Y ROp Z)" is always equal to
1739/// "(X LOp Y) ROp (X LOp Z)".
1740static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW,
1741 bool HasNSW, Intrinsic::ID ROp) {
1742 switch (ROp) {
1743 case Intrinsic::umax:
1744 case Intrinsic::umin:
1745 if (HasNUW && LOp == Instruction::Add)
1746 return true;
1747 if (HasNUW && LOp == Instruction::Shl)
1748 return true;
1749 return false;
1750 case Intrinsic::smax:
1751 case Intrinsic::smin:
1752 return HasNSW && LOp == Instruction::Add;
1753 default:
1754 return false;
1755 }
1756}
1757
1758// Attempts to factorise a common term
1759// in an instruction that has the form "(A op' B) op (C op' D)
1760// where op is an intrinsic and op' is a binop
1761static Value *
1762foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II,
1763 InstCombiner::BuilderTy &Builder) {
1764 Value *LHS = II->getOperand(i_nocapture: 0), *RHS = II->getOperand(i_nocapture: 1);
1765 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1766
1767 OverflowingBinaryOperator *Op0 = dyn_cast<OverflowingBinaryOperator>(Val: LHS);
1768 OverflowingBinaryOperator *Op1 = dyn_cast<OverflowingBinaryOperator>(Val: RHS);
1769
1770 if (!Op0 || !Op1)
1771 return nullptr;
1772
1773 if (Op0->getOpcode() != Op1->getOpcode())
1774 return nullptr;
1775
1776 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1777 return nullptr;
1778
1779 Instruction::BinaryOps InnerOpcode =
1780 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1781 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1782 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1783
1784 if (!leftDistributesOverRight(LOp: InnerOpcode, HasNUW, HasNSW, ROp: TopLevelOpcode))
1785 return nullptr;
1786
1787 Value *A = Op0->getOperand(i_nocapture: 0);
1788 Value *B = Op0->getOperand(i_nocapture: 1);
1789 Value *C = Op1->getOperand(i_nocapture: 0);
1790 Value *D = Op1->getOperand(i_nocapture: 1);
1791
1792 // Attempts to swap variables such that A equals C or B equals D,
1793 // if the inner operation is commutative.
1794 if (Op0->isCommutative() && A != C && B != D) {
1795 if (A == D || B == C)
1796 std::swap(a&: C, b&: D);
1797 else
1798 return nullptr;
1799 }
1800
1801 BinaryOperator *NewBinop;
1802 if (A == C) {
1803 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(ID: TopLevelOpcode, LHS: B, RHS: D);
1804 NewBinop =
1805 cast<BinaryOperator>(Val: Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: NewIntrinsic));
1806 } else if (B == D) {
1807 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(ID: TopLevelOpcode, LHS: A, RHS: C);
1808 NewBinop =
1809 cast<BinaryOperator>(Val: Builder.CreateBinOp(Opc: InnerOpcode, LHS: NewIntrinsic, RHS: B));
1810 } else {
1811 return nullptr;
1812 }
1813
1814 NewBinop->setHasNoUnsignedWrap(HasNUW);
1815 NewBinop->setHasNoSignedWrap(HasNSW);
1816
1817 return NewBinop;
1818}
1819
1820static Instruction *foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC) {
1821 Value *Arg0 = II->getArgOperand(i: 0);
1822 auto *ShiftConst = dyn_cast<Constant>(Val: II->getArgOperand(i: 1));
1823 if (!ShiftConst)
1824 return nullptr;
1825
1826 int ElemBits = Arg0->getType()->getScalarSizeInBits();
1827 bool AllPositive = true;
1828 bool AllNegative = true;
1829
1830 auto Check = [&](Constant *C) -> bool {
1831 if (auto *CI = dyn_cast_or_null<ConstantInt>(Val: C)) {
1832 const APInt &V = CI->getValue();
1833 if (V.isNonNegative()) {
1834 AllNegative = false;
1835 return AllPositive && V.ult(RHS: ElemBits);
1836 }
1837 AllPositive = false;
1838 return AllNegative && V.sgt(RHS: -ElemBits);
1839 }
1840 return false;
1841 };
1842
1843 if (auto *VTy = dyn_cast<FixedVectorType>(Val: Arg0->getType())) {
1844 for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) {
1845 if (!Check(ShiftConst->getAggregateElement(Elt: I)))
1846 return nullptr;
1847 }
1848
1849 } else if (!Check(ShiftConst))
1850 return nullptr;
1851
1852 IRBuilderBase &B = IC.Builder;
1853 if (AllPositive)
1854 return IC.replaceInstUsesWith(I&: *II, V: B.CreateShl(LHS: Arg0, RHS: ShiftConst));
1855
1856 Value *NegAmt = B.CreateNeg(V: ShiftConst);
1857 Intrinsic::ID IID = II->getIntrinsicID();
1858 const bool IsSigned =
1859 IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl;
1860 Value *Result =
1861 IsSigned ? B.CreateAShr(LHS: Arg0, RHS: NegAmt) : B.CreateLShr(LHS: Arg0, RHS: NegAmt);
1862 return IC.replaceInstUsesWith(I&: *II, V: Result);
1863}
1864
1865/// CallInst simplification. This mostly only handles folding of intrinsic
1866/// instructions. For normal calls, it allows visitCallBase to do the heavy
1867/// lifting.
1868Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
1869 // Don't try to simplify calls without uses. It will not do anything useful,
1870 // but will result in the following folds being skipped.
1871 if (!CI.use_empty()) {
1872 SmallVector<Value *, 8> Args(CI.args());
1873 if (Value *V = simplifyCall(Call: &CI, Callee: CI.getCalledOperand(), Args,
1874 Q: SQ.getWithInstruction(I: &CI)))
1875 return replaceInstUsesWith(I&: CI, V);
1876 }
1877
1878 if (Value *FreedOp = getFreedOperand(CB: &CI, TLI: &TLI))
1879 return visitFree(FI&: CI, FreedOp);
1880
1881 // If the caller function (i.e. us, the function that contains this CallInst)
1882 // is nounwind, mark the call as nounwind, even if the callee isn't.
1883 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1884 CI.setDoesNotThrow();
1885 return &CI;
1886 }
1887
1888 IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &CI);
1889 if (!II)
1890 return visitCallBase(Call&: CI);
1891
1892 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1893 // instead of in visitCallBase.
1894 if (auto *MI = dyn_cast<AnyMemIntrinsic>(Val: II)) {
1895 if (auto NumBytes = MI->getLengthInBytes()) {
1896 // memmove/cpy/set of zero bytes is a noop.
1897 if (NumBytes->isZero())
1898 return eraseInstFromFunction(I&: CI);
1899
1900 // For atomic unordered mem intrinsics if len is not a positive or
1901 // not a multiple of element size then behavior is undefined.
1902 if (MI->isAtomic() &&
1903 (NumBytes->isNegative() ||
1904 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1905 CreateNonTerminatorUnreachable(InsertAt: MI);
1906 assert(MI->getType()->isVoidTy() &&
1907 "non void atomic unordered mem intrinsic");
1908 return eraseInstFromFunction(I&: *MI);
1909 }
1910 }
1911
1912 // No other transformations apply to volatile transfers.
1913 if (MI->isVolatile())
1914 return nullptr;
1915
1916 if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1917 // memmove(x,x,size) -> noop.
1918 if (MTI->getSource() == MTI->getDest())
1919 return eraseInstFromFunction(I&: CI);
1920 }
1921
1922 auto IsPointerUndefined = [MI](Value *Ptr) {
1923 return isa<ConstantPointerNull>(Val: Ptr) &&
1924 !NullPointerIsDefined(
1925 F: MI->getFunction(),
1926 AS: cast<PointerType>(Val: Ptr->getType())->getAddressSpace());
1927 };
1928 bool SrcIsUndefined = false;
1929 // If we can determine a pointer alignment that is bigger than currently
1930 // set, update the alignment.
1931 if (auto *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1932 if (Instruction *I = SimplifyAnyMemTransfer(MI: MTI))
1933 return I;
1934 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1935 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(Val: MI)) {
1936 if (Instruction *I = SimplifyAnyMemSet(MI: MSI))
1937 return I;
1938 }
1939
1940 // If src/dest is null, this memory intrinsic must be a noop.
1941 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1942 Builder.CreateAssumption(Cond: Builder.CreateIsNull(Arg: MI->getLength()));
1943 return eraseInstFromFunction(I&: CI);
1944 }
1945
1946 // If we have a memmove and the source operation is a constant global,
1947 // then the source and dest pointers can't alias, so we can change this
1948 // into a call to memcpy.
1949 if (auto *MMI = dyn_cast<AnyMemMoveInst>(Val: MI)) {
1950 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(Val: MMI->getSource()))
1951 if (GVSrc->isConstant()) {
1952 Module *M = CI.getModule();
1953 Intrinsic::ID MemCpyID =
1954 MMI->isAtomic()
1955 ? Intrinsic::memcpy_element_unordered_atomic
1956 : Intrinsic::memcpy;
1957 Type *Tys[3] = { CI.getArgOperand(i: 0)->getType(),
1958 CI.getArgOperand(i: 1)->getType(),
1959 CI.getArgOperand(i: 2)->getType() };
1960 CI.setCalledFunction(
1961 Intrinsic::getOrInsertDeclaration(M, id: MemCpyID, Tys));
1962 return II;
1963 }
1964 }
1965 }
1966
1967 // For fixed width vector result intrinsics, use the generic demanded vector
1968 // support.
1969 if (auto *IIFVTy = dyn_cast<FixedVectorType>(Val: II->getType())) {
1970 auto VWidth = IIFVTy->getNumElements();
1971 APInt PoisonElts(VWidth, 0);
1972 APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
1973 if (Value *V = SimplifyDemandedVectorElts(V: II, DemandedElts: AllOnesEltMask, PoisonElts)) {
1974 if (V != II)
1975 return replaceInstUsesWith(I&: *II, V);
1976 return II;
1977 }
1978 }
1979
1980 if (II->isCommutative()) {
1981 if (auto Pair = matchSymmetricPair(LHS: II->getOperand(i_nocapture: 0), RHS: II->getOperand(i_nocapture: 1))) {
1982 replaceOperand(I&: *II, OpNum: 0, V: Pair->first);
1983 replaceOperand(I&: *II, OpNum: 1, V: Pair->second);
1984 return II;
1985 }
1986
1987 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(Call&: CI))
1988 return NewCall;
1989 }
1990
1991 // Unused constrained FP intrinsic calls may have declared side effect, which
1992 // prevents it from being removed. In some cases however the side effect is
1993 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
1994 // returns a replacement, the call may be removed.
1995 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(Val: CI)) {
1996 if (simplifyConstrainedFPCall(Call: &CI, Q: SQ.getWithInstruction(I: &CI)))
1997 return eraseInstFromFunction(I&: CI);
1998 }
1999
2000 Intrinsic::ID IID = II->getIntrinsicID();
2001 switch (IID) {
2002 case Intrinsic::objectsize: {
2003 SmallVector<Instruction *> InsertedInstructions;
2004 if (Value *V = lowerObjectSizeCall(ObjectSize: II, DL, TLI: &TLI, AA, /*MustSucceed=*/false,
2005 InsertedInstructions: &InsertedInstructions)) {
2006 for (Instruction *Inserted : InsertedInstructions)
2007 Worklist.add(I: Inserted);
2008 return replaceInstUsesWith(I&: CI, V);
2009 }
2010 return nullptr;
2011 }
2012 case Intrinsic::abs: {
2013 Value *IIOperand = II->getArgOperand(i: 0);
2014 bool IntMinIsPoison = cast<Constant>(Val: II->getArgOperand(i: 1))->isOneValue();
2015
2016 // abs(-x) -> abs(x)
2017 Value *X;
2018 if (match(V: IIOperand, P: m_Neg(V: m_Value(V&: X)))) {
2019 if (cast<Instruction>(Val: IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
2020 replaceOperand(I&: *II, OpNum: 1, V: Builder.getTrue());
2021 return replaceOperand(I&: *II, OpNum: 0, V: X);
2022 }
2023 if (match(V: IIOperand, P: m_c_Select(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
2024 return replaceOperand(I&: *II, OpNum: 0, V: X);
2025
2026 Value *Y;
2027 // abs(a * abs(b)) -> abs(a * b)
2028 if (match(V: IIOperand,
2029 P: m_OneUse(SubPattern: m_c_Mul(L: m_Value(V&: X),
2030 R: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: Y)))))) {
2031 bool NSW =
2032 cast<Instruction>(Val: IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2033 auto *XY = NSW ? Builder.CreateNSWMul(LHS: X, RHS: Y) : Builder.CreateMul(LHS: X, RHS: Y);
2034 return replaceOperand(I&: *II, OpNum: 0, V: XY);
2035 }
2036
2037 if (std::optional<bool> Known =
2038 getKnownSignOrZero(Op: IIOperand, SQ: SQ.getWithInstruction(I: II))) {
2039 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2040 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2041 if (!*Known)
2042 return replaceInstUsesWith(I&: *II, V: IIOperand);
2043
2044 // abs(x) -> -x if x < 0
2045 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2046 if (IntMinIsPoison)
2047 return BinaryOperator::CreateNSWNeg(Op: IIOperand);
2048 return BinaryOperator::CreateNeg(Op: IIOperand);
2049 }
2050
2051 // abs (sext X) --> zext (abs X*)
2052 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2053 if (match(V: IIOperand, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
2054 Value *NarrowAbs =
2055 Builder.CreateBinaryIntrinsic(ID: Intrinsic::abs, LHS: X, RHS: Builder.getFalse());
2056 return CastInst::Create(Instruction::ZExt, S: NarrowAbs, Ty: II->getType());
2057 }
2058
2059 // Match a complicated way to check if a number is odd/even:
2060 // abs (srem X, 2) --> and X, 1
2061 const APInt *C;
2062 if (match(V: IIOperand, P: m_SRem(L: m_Value(V&: X), R: m_APInt(Res&: C))) && *C == 2)
2063 return BinaryOperator::CreateAnd(V1: X, V2: ConstantInt::get(Ty: II->getType(), V: 1));
2064
2065 break;
2066 }
2067 case Intrinsic::umin: {
2068 Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1);
2069 // umin(x, 1) == zext(x != 0)
2070 if (match(V: I1, P: m_One())) {
2071 assert(II->getType()->getScalarSizeInBits() != 1 &&
2072 "Expected simplify of umin with max constant");
2073 Value *Zero = Constant::getNullValue(Ty: I0->getType());
2074 Value *Cmp = Builder.CreateICmpNE(LHS: I0, RHS: Zero);
2075 return CastInst::Create(Instruction::ZExt, S: Cmp, Ty: II->getType());
2076 }
2077 // umin(cttz(x), const) --> cttz(x | (1 << const))
2078 if (Value *FoldedCttz =
2079 foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::cttz>(
2080 I0, I1, DL, Builder))
2081 return replaceInstUsesWith(I&: *II, V: FoldedCttz);
2082 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
2083 if (Value *FoldedCtlz =
2084 foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::ctlz>(
2085 I0, I1, DL, Builder))
2086 return replaceInstUsesWith(I&: *II, V: FoldedCtlz);
2087 [[fallthrough]];
2088 }
2089 case Intrinsic::umax: {
2090 Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1);
2091 Value *X, *Y;
2092 if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_ZExt(Op: m_Value(V&: Y))) &&
2093 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2094 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
2095 return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
2096 }
2097 Constant *C;
2098 if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
2099 I0->hasOneUse()) {
2100 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, DestTy: X->getType(), DL)) {
2101 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
2102 return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
2103 }
2104 }
2105 // If C is not 0:
2106 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2107 // If C is not 0 or 1:
2108 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2109 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
2110 const APInt *C;
2111 Value *X;
2112 if (!match(V: A, P: m_NUWShl(L: m_Value(V&: X), R: m_APInt(Res&: C))) &&
2113 !(match(V: A, P: m_NUWMul(L: m_Value(V&: X), R: m_APInt(Res&: C))) && !C->isOne()))
2114 return nullptr;
2115 if (C->isZero())
2116 return nullptr;
2117 if (!match(V: B, P: m_OneUse(SubPattern: m_Add(L: m_Specific(V: X), R: m_One()))))
2118 return nullptr;
2119
2120 Value *Cmp = Builder.CreateICmpEQ(LHS: X, RHS: ConstantInt::get(Ty: X->getType(), V: 0));
2121 Value *NewSelect = nullptr;
2122 NewSelect = Builder.CreateSelectWithUnknownProfile(
2123 C: Cmp, True: ConstantInt::get(Ty: X->getType(), V: 1), False: A, DEBUG_TYPE);
2124 return replaceInstUsesWith(I&: *II, V: NewSelect);
2125 };
2126
2127 if (IID == Intrinsic::umax) {
2128 if (Instruction *I = foldMaxMulShift(I0, I1))
2129 return I;
2130 if (Instruction *I = foldMaxMulShift(I1, I0))
2131 return I;
2132 }
2133
2134 // If both operands of unsigned min/max are sign-extended, it is still ok
2135 // to narrow the operation.
2136 [[fallthrough]];
2137 }
2138 case Intrinsic::smax:
2139 case Intrinsic::smin: {
2140 Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1);
2141 Value *X, *Y;
2142 if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_SExt(Op: m_Value(V&: Y))) &&
2143 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2144 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
2145 return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
2146 }
2147
2148 Constant *C;
2149 if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
2150 I0->hasOneUse()) {
2151 if (Constant *NarrowC = getLosslessSignedTrunc(C, DestTy: X->getType(), DL)) {
2152 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
2153 return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
2154 }
2155 }
2156
2157 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2158 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2159 const APInt *MinC, *MaxC;
2160 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2161 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2162 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2163 Value *NewMax = Builder.CreateBinaryIntrinsic(
2164 ID: MaxIID, LHS: X, RHS: ConstantInt::get(Ty: X->getType(), V: *MaxC));
2165 return replaceInstUsesWith(
2166 I&: *II, V: Builder.CreateBinaryIntrinsic(
2167 ID: MinIID, LHS: NewMax, RHS: ConstantInt::get(Ty: X->getType(), V: *MinC)));
2168 };
2169 if (IID == Intrinsic::smax &&
2170 match(V: I0, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::smin>(Op0: m_Value(V&: X),
2171 Op1: m_APInt(Res&: MinC)))) &&
2172 match(V: I1, P: m_APInt(Res&: MaxC)) && MinC->sgt(RHS: *MaxC))
2173 return CreateCanonicalClampForm(true);
2174 if (IID == Intrinsic::umax &&
2175 match(V: I0, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::umin>(Op0: m_Value(V&: X),
2176 Op1: m_APInt(Res&: MinC)))) &&
2177 match(V: I1, P: m_APInt(Res&: MaxC)) && MinC->ugt(RHS: *MaxC))
2178 return CreateCanonicalClampForm(false);
2179
2180 // umin(i1 X, i1 Y) -> and i1 X, Y
2181 // smax(i1 X, i1 Y) -> and i1 X, Y
2182 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2183 II->getType()->isIntOrIntVectorTy(BitWidth: 1)) {
2184 return BinaryOperator::CreateAnd(V1: I0, V2: I1);
2185 }
2186
2187 // umax(i1 X, i1 Y) -> or i1 X, Y
2188 // smin(i1 X, i1 Y) -> or i1 X, Y
2189 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2190 II->getType()->isIntOrIntVectorTy(BitWidth: 1)) {
2191 return BinaryOperator::CreateOr(V1: I0, V2: I1);
2192 }
2193
2194 // smin(smax(X, -1), 1) -> scmp(X, 0)
2195 // smax(smin(X, 1), -1) -> scmp(X, 0)
2196 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2197 // And i1's have been changed to and/ors
2198 // So we only need to check for smin
2199 if (IID == Intrinsic::smin) {
2200 if (match(V: I0, P: m_OneUse(SubPattern: m_SMax(L: m_Value(V&: X), R: m_AllOnes()))) &&
2201 match(V: I1, P: m_One())) {
2202 Value *Zero = ConstantInt::get(Ty: X->getType(), V: 0);
2203 return replaceInstUsesWith(
2204 I&: CI,
2205 V: Builder.CreateIntrinsic(RetTy: II->getType(), ID: Intrinsic::scmp, Args: {X, Zero}));
2206 }
2207 }
2208
2209 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2210 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2211 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2212 // TODO: Canonicalize neg after min/max if I1 is constant.
2213 if (match(V: I0, P: m_NSWNeg(V: m_Value(V&: X))) && match(V: I1, P: m_NSWNeg(V: m_Value(V&: Y))) &&
2214 (I0->hasOneUse() || I1->hasOneUse())) {
2215 Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
2216 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: X, RHS: Y);
2217 return BinaryOperator::CreateNSWNeg(Op: InvMaxMin);
2218 }
2219 }
2220
2221 // (umax X, (xor X, Pow2))
2222 // -> (or X, Pow2)
2223 // (umin X, (xor X, Pow2))
2224 // -> (and X, ~Pow2)
2225 // (smax X, (xor X, Pos_Pow2))
2226 // -> (or X, Pos_Pow2)
2227 // (smin X, (xor X, Pos_Pow2))
2228 // -> (and X, ~Pos_Pow2)
2229 // (smax X, (xor X, Neg_Pow2))
2230 // -> (and X, ~Neg_Pow2)
2231 // (smin X, (xor X, Neg_Pow2))
2232 // -> (or X, Neg_Pow2)
2233 if ((match(V: I0, P: m_c_Xor(L: m_Specific(V: I1), R: m_Value(V&: X))) ||
2234 match(V: I1, P: m_c_Xor(L: m_Specific(V: I0), R: m_Value(V&: X)))) &&
2235 isKnownToBeAPowerOfTwo(V: X, /* OrZero */ true)) {
2236 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2237 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2238
2239 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2240 auto KnownSign = getKnownSign(Op: X, SQ: SQ.getWithInstruction(I: II));
2241 if (KnownSign == std::nullopt) {
2242 UseOr = false;
2243 UseAndN = false;
2244 } else if (*KnownSign /* true is Signed. */) {
2245 UseOr ^= true;
2246 UseAndN ^= true;
2247 Type *Ty = I0->getType();
2248 // Negative power of 2 must be IntMin. It's possible to be able to
2249 // prove negative / power of 2 without actually having known bits, so
2250 // just get the value by hand.
2251 X = Constant::getIntegerValue(
2252 Ty, V: APInt::getSignedMinValue(numBits: Ty->getScalarSizeInBits()));
2253 }
2254 }
2255 if (UseOr)
2256 return BinaryOperator::CreateOr(V1: I0, V2: X);
2257 else if (UseAndN)
2258 return BinaryOperator::CreateAnd(V1: I0, V2: Builder.CreateNot(V: X));
2259 }
2260
2261 // If we can eliminate ~A and Y is free to invert:
2262 // max ~A, Y --> ~(min A, ~Y)
2263 //
2264 // Examples:
2265 // max ~A, ~Y --> ~(min A, Y)
2266 // max ~A, C --> ~(min A, ~C)
2267 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2268 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2269 Value *A;
2270 if (match(V: X, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: A)))) &&
2271 !isFreeToInvert(V: A, WillInvertAllUses: A->hasOneUse())) {
2272 if (Value *NotY = getFreelyInverted(V: Y, WillInvertAllUses: Y->hasOneUse(), Builder: &Builder)) {
2273 Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
2274 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: A, RHS: NotY);
2275 return BinaryOperator::CreateNot(Op: InvMaxMin);
2276 }
2277 }
2278 return nullptr;
2279 };
2280
2281 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2282 return I;
2283 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2284 return I;
2285
2286 if (Instruction *I = moveAddAfterMinMax(II, Builder))
2287 return I;
2288
2289 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2290 const APInt *RHSC;
2291 if (match(V: I0, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: X), R: m_NegatedPower2(V&: RHSC)))) &&
2292 match(V: I1, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: Y), R: m_SpecificInt(V: *RHSC)))))
2293 return BinaryOperator::CreateAnd(V1: Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y),
2294 V2: ConstantInt::get(Ty: II->getType(), V: *RHSC));
2295
2296 // smax(X, -X) --> abs(X)
2297 // smin(X, -X) --> -abs(X)
2298 // umax(X, -X) --> -abs(X)
2299 // umin(X, -X) --> abs(X)
2300 if (isKnownNegation(X: I0, Y: I1)) {
2301 // We can choose either operand as the input to abs(), but if we can
2302 // eliminate the only use of a value, that's better for subsequent
2303 // transforms/analysis.
2304 if (I0->hasOneUse() && !I1->hasOneUse())
2305 std::swap(a&: I0, b&: I1);
2306
2307 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2308 // operation and potentially its negation.
2309 bool IntMinIsPoison = isKnownNegation(X: I0, Y: I1, /* NeedNSW */ true);
2310 Value *Abs = Builder.CreateBinaryIntrinsic(
2311 ID: Intrinsic::abs, LHS: I0,
2312 RHS: ConstantInt::getBool(Context&: II->getContext(), V: IntMinIsPoison));
2313
2314 // We don't have a "nabs" intrinsic, so negate if needed based on the
2315 // max/min operation.
2316 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2317 Abs = Builder.CreateNeg(V: Abs, Name: "nabs", HasNSW: IntMinIsPoison);
2318 return replaceInstUsesWith(I&: CI, V: Abs);
2319 }
2320
2321 if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
2322 return Sel;
2323
2324 if (Instruction *SAdd = matchSAddSubSat(MinMax1&: *II))
2325 return SAdd;
2326
2327 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2328 return replaceInstUsesWith(I&: *II, V: NewMinMax);
2329
2330 if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
2331 return R;
2332
2333 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2334 return NewMinMax;
2335
2336 // Try to fold minmax with constant RHS based on range information
2337 if (match(V: I1, P: m_APIntAllowPoison(Res&: RHSC))) {
2338 ICmpInst::Predicate Pred =
2339 ICmpInst::getNonStrictPredicate(pred: MinMaxIntrinsic::getPredicate(ID: IID));
2340 bool IsSigned = MinMaxIntrinsic::isSigned(ID: IID);
2341 ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits(
2342 V: I0, ForSigned: IsSigned, SQ: SQ.getWithInstruction(I: II));
2343 if (!LHS_CR.isFullSet()) {
2344 if (LHS_CR.icmp(Pred, Other: *RHSC))
2345 return replaceInstUsesWith(I&: *II, V: I0);
2346 if (LHS_CR.icmp(Pred: ICmpInst::getSwappedPredicate(pred: Pred), Other: *RHSC))
2347 return replaceInstUsesWith(I&: *II,
2348 V: ConstantInt::get(Ty: II->getType(), V: *RHSC));
2349 }
2350 }
2351
2352 if (Value *V = foldIntrinsicUsingDistributiveLaws(II, Builder))
2353 return replaceInstUsesWith(I&: *II, V);
2354
2355 break;
2356 }
2357 case Intrinsic::scmp: {
2358 Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1);
2359 Value *LHS, *RHS;
2360 if (match(V: I0, P: m_NSWSub(L: m_Value(V&: LHS), R: m_Value(V&: RHS))) && match(V: I1, P: m_Zero()))
2361 return replaceInstUsesWith(
2362 I&: CI,
2363 V: Builder.CreateIntrinsic(RetTy: II->getType(), ID: Intrinsic::scmp, Args: {LHS, RHS}));
2364 break;
2365 }
2366 case Intrinsic::bitreverse: {
2367 Value *IIOperand = II->getArgOperand(i: 0);
2368 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2369 Value *X;
2370 if (match(V: IIOperand, P: m_ZExt(Op: m_Value(V&: X))) &&
2371 X->getType()->isIntOrIntVectorTy(BitWidth: 1)) {
2372 Type *Ty = II->getType();
2373 APInt SignBit = APInt::getSignMask(BitWidth: Ty->getScalarSizeInBits());
2374 return SelectInst::Create(C: X, S1: ConstantInt::get(Ty, V: SignBit),
2375 S2: ConstantInt::getNullValue(Ty));
2376 }
2377
2378 if (Instruction *crossLogicOpFold =
2379 foldBitOrderCrossLogicOp<Intrinsic::bitreverse>(V: IIOperand, Builder))
2380 return crossLogicOpFold;
2381
2382 break;
2383 }
2384 case Intrinsic::bswap: {
2385 Value *IIOperand = II->getArgOperand(i: 0);
2386
2387 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2388 // inverse-shift-of-bswap:
2389 // bswap (shl X, Y) --> lshr (bswap X), Y
2390 // bswap (lshr X, Y) --> shl (bswap X), Y
2391 Value *X, *Y;
2392 if (match(V: IIOperand, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: X), R: m_Value(V&: Y))))) {
2393 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2394 if (MaskedValueIsZero(V: Y, Mask: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: 3))) {
2395 Value *NewSwap = Builder.CreateUnaryIntrinsic(ID: Intrinsic::bswap, V: X);
2396 BinaryOperator::BinaryOps InverseShift =
2397 cast<BinaryOperator>(Val: IIOperand)->getOpcode() == Instruction::Shl
2398 ? Instruction::LShr
2399 : Instruction::Shl;
2400 return BinaryOperator::Create(Op: InverseShift, S1: NewSwap, S2: Y);
2401 }
2402 }
2403
2404 KnownBits Known = computeKnownBits(V: IIOperand, CxtI: II);
2405 uint64_t LZ = alignDown(Value: Known.countMinLeadingZeros(), Align: 8);
2406 uint64_t TZ = alignDown(Value: Known.countMinTrailingZeros(), Align: 8);
2407 unsigned BW = Known.getBitWidth();
2408
2409 // bswap(x) -> shift(x) if x has exactly one "active byte"
2410 if (BW - LZ - TZ == 8) {
2411 assert(LZ != TZ && "active byte cannot be in the middle");
2412 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2413 return BinaryOperator::CreateNUWShl(
2414 V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: LZ - TZ));
2415 // -> lshr(x) if the "active byte" is in the high part of x
2416 return BinaryOperator::CreateExactLShr(
2417 V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: TZ - LZ));
2418 }
2419
2420 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2421 if (match(V: IIOperand, P: m_Trunc(Op: m_BSwap(Op0: m_Value(V&: X))))) {
2422 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2423 Value *CV = ConstantInt::get(Ty: X->getType(), V: C);
2424 Value *V = Builder.CreateLShr(LHS: X, RHS: CV);
2425 return new TruncInst(V, IIOperand->getType());
2426 }
2427
2428 if (Instruction *crossLogicOpFold =
2429 foldBitOrderCrossLogicOp<Intrinsic::bswap>(V: IIOperand, Builder)) {
2430 return crossLogicOpFold;
2431 }
2432
2433 // Try to fold into bitreverse if bswap is the root of the expression tree.
2434 if (Instruction *BitOp = matchBSwapOrBitReverse(I&: *II, /*MatchBSwaps*/ false,
2435 /*MatchBitReversals*/ true))
2436 return BitOp;
2437 break;
2438 }
2439 case Intrinsic::masked_load:
2440 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(II&: *II))
2441 return replaceInstUsesWith(I&: CI, V: SimplifiedMaskedOp);
2442 break;
2443 case Intrinsic::masked_store:
2444 return simplifyMaskedStore(II&: *II);
2445 case Intrinsic::masked_gather:
2446 return simplifyMaskedGather(II&: *II);
2447 case Intrinsic::masked_scatter:
2448 return simplifyMaskedScatter(II&: *II);
2449 case Intrinsic::launder_invariant_group:
2450 case Intrinsic::strip_invariant_group:
2451 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(II&: *II, IC&: *this))
2452 return replaceInstUsesWith(I&: *II, V: SkippedBarrier);
2453 break;
2454 case Intrinsic::powi:
2455 if (ConstantInt *Power = dyn_cast<ConstantInt>(Val: II->getArgOperand(i: 1))) {
2456 // 0 and 1 are handled in instsimplify
2457 // powi(x, -1) -> 1/x
2458 if (Power->isMinusOne())
2459 return BinaryOperator::CreateFDivFMF(V1: ConstantFP::get(Ty: CI.getType(), V: 1.0),
2460 V2: II->getArgOperand(i: 0), FMFSource: II);
2461 // powi(x, 2) -> x*x
2462 if (Power->equalsInt(V: 2))
2463 return BinaryOperator::CreateFMulFMF(V1: II->getArgOperand(i: 0),
2464 V2: II->getArgOperand(i: 0), FMFSource: II);
2465
2466 if (!Power->getValue()[0]) {
2467 Value *X;
2468 // If power is even:
2469 // powi(-x, p) -> powi(x, p)
2470 // powi(fabs(x), p) -> powi(x, p)
2471 // powi(copysign(x, y), p) -> powi(x, p)
2472 if (match(V: II->getArgOperand(i: 0), P: m_FNeg(X: m_Value(V&: X))) ||
2473 match(V: II->getArgOperand(i: 0), P: m_FAbs(Op0: m_Value(V&: X))) ||
2474 match(V: II->getArgOperand(i: 0),
2475 P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(V&: X), Op1: m_Value())))
2476 return replaceOperand(I&: *II, OpNum: 0, V: X);
2477 }
2478 }
2479 break;
2480
2481 case Intrinsic::cttz:
2482 case Intrinsic::ctlz:
2483 if (auto *I = foldCttzCtlz(II&: *II, IC&: *this))
2484 return I;
2485 break;
2486
2487 case Intrinsic::ctpop:
2488 if (auto *I = foldCtpop(II&: *II, IC&: *this))
2489 return I;
2490 break;
2491
2492 case Intrinsic::fshl:
2493 case Intrinsic::fshr: {
2494 Value *Op0 = II->getArgOperand(i: 0), *Op1 = II->getArgOperand(i: 1);
2495 Type *Ty = II->getType();
2496 unsigned BitWidth = Ty->getScalarSizeInBits();
2497 Constant *ShAmtC;
2498 if (match(V: II->getArgOperand(i: 2), P: m_ImmConstant(C&: ShAmtC))) {
2499 // Canonicalize a shift amount constant operand to modulo the bit-width.
2500 Constant *WidthC = ConstantInt::get(Ty, V: BitWidth);
2501 Constant *ModuloC =
2502 ConstantFoldBinaryOpOperands(Opcode: Instruction::URem, LHS: ShAmtC, RHS: WidthC, DL);
2503 if (!ModuloC)
2504 return nullptr;
2505 if (ModuloC != ShAmtC)
2506 return replaceOperand(I&: *II, OpNum: 2, V: ModuloC);
2507
2508 assert(match(ConstantFoldCompareInstOperands(ICmpInst::ICMP_UGT, WidthC,
2509 ShAmtC, DL),
2510 m_One()) &&
2511 "Shift amount expected to be modulo bitwidth");
2512
2513 // Canonicalize funnel shift right by constant to funnel shift left. This
2514 // is not entirely arbitrary. For historical reasons, the backend may
2515 // recognize rotate left patterns but miss rotate right patterns.
2516 if (IID == Intrinsic::fshr) {
2517 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2518 if (!isKnownNonZero(V: ShAmtC, Q: SQ.getWithInstruction(I: II)))
2519 return nullptr;
2520
2521 Constant *LeftShiftC = ConstantExpr::getSub(C1: WidthC, C2: ShAmtC);
2522 Module *Mod = II->getModule();
2523 Function *Fshl =
2524 Intrinsic::getOrInsertDeclaration(M: Mod, id: Intrinsic::fshl, Tys: Ty);
2525 return CallInst::Create(Func: Fshl, Args: { Op0, Op1, LeftShiftC });
2526 }
2527 assert(IID == Intrinsic::fshl &&
2528 "All funnel shifts by simple constants should go left");
2529
2530 // fshl(X, 0, C) --> shl X, C
2531 // fshl(X, undef, C) --> shl X, C
2532 if (match(V: Op1, P: m_ZeroInt()) || match(V: Op1, P: m_Undef()))
2533 return BinaryOperator::CreateShl(V1: Op0, V2: ShAmtC);
2534
2535 // fshl(0, X, C) --> lshr X, (BW-C)
2536 // fshl(undef, X, C) --> lshr X, (BW-C)
2537 if (match(V: Op0, P: m_ZeroInt()) || match(V: Op0, P: m_Undef()))
2538 return BinaryOperator::CreateLShr(V1: Op1,
2539 V2: ConstantExpr::getSub(C1: WidthC, C2: ShAmtC));
2540
2541 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2542 if (Op0 == Op1 && BitWidth == 16 && match(V: ShAmtC, P: m_SpecificInt(V: 8))) {
2543 Module *Mod = II->getModule();
2544 Function *Bswap =
2545 Intrinsic::getOrInsertDeclaration(M: Mod, id: Intrinsic::bswap, Tys: Ty);
2546 return CallInst::Create(Func: Bswap, Args: { Op0 });
2547 }
2548 if (Instruction *BitOp =
2549 matchBSwapOrBitReverse(I&: *II, /*MatchBSwaps*/ true,
2550 /*MatchBitReversals*/ true))
2551 return BitOp;
2552
2553 // R = fshl(X, X, C2)
2554 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2555 Value *InnerOp;
2556 const APInt *ShAmtInnerC, *ShAmtOuterC;
2557 if (match(V: Op0, P: m_FShl(Op0: m_Value(V&: InnerOp), Op1: m_Deferred(V: InnerOp),
2558 Op2: m_APInt(Res&: ShAmtInnerC))) &&
2559 match(V: ShAmtC, P: m_APInt(Res&: ShAmtOuterC)) && Op0 == Op1) {
2560 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2561 APInt Modulo = Sum.urem(RHS: APInt(Sum.getBitWidth(), BitWidth));
2562 if (Modulo.isZero())
2563 return replaceInstUsesWith(I&: *II, V: InnerOp);
2564 Constant *ModuloC = ConstantInt::get(Ty, V: Modulo);
2565 return CallInst::Create(Func: cast<IntrinsicInst>(Val: Op0)->getCalledFunction(),
2566 Args: {InnerOp, InnerOp, ModuloC});
2567 }
2568 }
2569
2570 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2571 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2572 // if BitWidth is a power-of-2
2573 Value *Y;
2574 if (Op0 == Op1 && isPowerOf2_32(Value: BitWidth) &&
2575 match(V: II->getArgOperand(i: 2), P: m_Neg(V: m_Value(V&: Y)))) {
2576 Module *Mod = II->getModule();
2577 Function *OppositeShift = Intrinsic::getOrInsertDeclaration(
2578 M: Mod, id: IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Tys: Ty);
2579 return CallInst::Create(Func: OppositeShift, Args: {Op0, Op1, Y});
2580 }
2581
2582 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2583 // power-of-2
2584 if (IID == Intrinsic::fshl && isPowerOf2_32(Value: BitWidth) &&
2585 match(V: Op1, P: m_ZeroInt())) {
2586 Value *Op2 = II->getArgOperand(i: 2);
2587 Value *And = Builder.CreateAnd(LHS: Op2, RHS: ConstantInt::get(Ty, V: BitWidth - 1));
2588 return BinaryOperator::CreateShl(V1: Op0, V2: And);
2589 }
2590
2591 // Left or right might be masked.
2592 if (SimplifyDemandedInstructionBits(Inst&: *II))
2593 return &CI;
2594
2595 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2596 // so only the low bits of the shift amount are demanded if the bitwidth is
2597 // a power-of-2.
2598 if (!isPowerOf2_32(Value: BitWidth))
2599 break;
2600 APInt Op2Demanded = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: Log2_32_Ceil(Value: BitWidth));
2601 KnownBits Op2Known(BitWidth);
2602 if (SimplifyDemandedBits(I: II, OpNo: 2, DemandedMask: Op2Demanded, Known&: Op2Known))
2603 return &CI;
2604 break;
2605 }
2606 case Intrinsic::ptrmask: {
2607 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2608 KnownBits Known(BitWidth);
2609 if (SimplifyDemandedInstructionBits(Inst&: *II, Known))
2610 return II;
2611
2612 Value *InnerPtr, *InnerMask;
2613 bool Changed = false;
2614 // Combine:
2615 // (ptrmask (ptrmask p, A), B)
2616 // -> (ptrmask p, (and A, B))
2617 if (match(V: II->getArgOperand(i: 0),
2618 P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ptrmask>(Op0: m_Value(V&: InnerPtr),
2619 Op1: m_Value(V&: InnerMask))))) {
2620 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2621 "Mask types must match");
2622 // TODO: If InnerMask == Op1, we could copy attributes from inner
2623 // callsite -> outer callsite.
2624 Value *NewMask = Builder.CreateAnd(LHS: II->getArgOperand(i: 1), RHS: InnerMask);
2625 replaceOperand(I&: CI, OpNum: 0, V: InnerPtr);
2626 replaceOperand(I&: CI, OpNum: 1, V: NewMask);
2627 Changed = true;
2628 }
2629
2630 // See if we can deduce non-null.
2631 if (!CI.hasRetAttr(Kind: Attribute::NonNull) &&
2632 (Known.isNonZero() ||
2633 isKnownNonZero(V: II, Q: getSimplifyQuery().getWithInstruction(I: II)))) {
2634 CI.addRetAttr(Kind: Attribute::NonNull);
2635 Changed = true;
2636 }
2637
2638 unsigned NewAlignmentLog =
2639 std::min(a: Value::MaxAlignmentExponent,
2640 b: std::min(a: BitWidth - 1, b: Known.countMinTrailingZeros()));
2641 // Known bits will capture if we had alignment information associated with
2642 // the pointer argument.
2643 if (NewAlignmentLog > Log2(A: CI.getRetAlign().valueOrOne())) {
2644 CI.addRetAttr(Attr: Attribute::getWithAlignment(
2645 Context&: CI.getContext(), Alignment: Align(uint64_t(1) << NewAlignmentLog)));
2646 Changed = true;
2647 }
2648 if (Changed)
2649 return &CI;
2650 break;
2651 }
2652 case Intrinsic::uadd_with_overflow:
2653 case Intrinsic::sadd_with_overflow: {
2654 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2655 return I;
2656
2657 // Given 2 constant operands whose sum does not overflow:
2658 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2659 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2660 Value *X;
2661 const APInt *C0, *C1;
2662 Value *Arg0 = II->getArgOperand(i: 0);
2663 Value *Arg1 = II->getArgOperand(i: 1);
2664 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2665 bool HasNWAdd = IsSigned
2666 ? match(V: Arg0, P: m_NSWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0)))
2667 : match(V: Arg0, P: m_NUWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0)));
2668 if (HasNWAdd && match(V: Arg1, P: m_APInt(Res&: C1))) {
2669 bool Overflow;
2670 APInt NewC =
2671 IsSigned ? C1->sadd_ov(RHS: *C0, Overflow) : C1->uadd_ov(RHS: *C0, Overflow);
2672 if (!Overflow)
2673 return replaceInstUsesWith(
2674 I&: *II, V: Builder.CreateBinaryIntrinsic(
2675 ID: IID, LHS: X, RHS: ConstantInt::get(Ty: Arg1->getType(), V: NewC)));
2676 }
2677 break;
2678 }
2679
2680 case Intrinsic::umul_with_overflow:
2681 case Intrinsic::smul_with_overflow:
2682 case Intrinsic::usub_with_overflow:
2683 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2684 return I;
2685 break;
2686
2687 case Intrinsic::ssub_with_overflow: {
2688 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2689 return I;
2690
2691 Constant *C;
2692 Value *Arg0 = II->getArgOperand(i: 0);
2693 Value *Arg1 = II->getArgOperand(i: 1);
2694 // Given a constant C that is not the minimum signed value
2695 // for an integer of a given bit width:
2696 //
2697 // ssubo X, C -> saddo X, -C
2698 if (match(V: Arg1, P: m_Constant(C)) && C->isNotMinSignedValue()) {
2699 Value *NegVal = ConstantExpr::getNeg(C);
2700 // Build a saddo call that is equivalent to the discovered
2701 // ssubo call.
2702 return replaceInstUsesWith(
2703 I&: *II, V: Builder.CreateBinaryIntrinsic(ID: Intrinsic::sadd_with_overflow,
2704 LHS: Arg0, RHS: NegVal));
2705 }
2706
2707 break;
2708 }
2709
2710 case Intrinsic::uadd_sat:
2711 case Intrinsic::sadd_sat:
2712 case Intrinsic::usub_sat:
2713 case Intrinsic::ssub_sat: {
2714 SaturatingInst *SI = cast<SaturatingInst>(Val: II);
2715 Type *Ty = SI->getType();
2716 Value *Arg0 = SI->getLHS();
2717 Value *Arg1 = SI->getRHS();
2718
2719 // Make use of known overflow information.
2720 OverflowResult OR = computeOverflow(BinaryOp: SI->getBinaryOp(), IsSigned: SI->isSigned(),
2721 LHS: Arg0, RHS: Arg1, CxtI: SI);
2722 switch (OR) {
2723 case OverflowResult::MayOverflow:
2724 break;
2725 case OverflowResult::NeverOverflows:
2726 if (SI->isSigned())
2727 return BinaryOperator::CreateNSW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2728 else
2729 return BinaryOperator::CreateNUW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2730 case OverflowResult::AlwaysOverflowsLow: {
2731 unsigned BitWidth = Ty->getScalarSizeInBits();
2732 APInt Min = APSInt::getMinValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2733 return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Min));
2734 }
2735 case OverflowResult::AlwaysOverflowsHigh: {
2736 unsigned BitWidth = Ty->getScalarSizeInBits();
2737 APInt Max = APSInt::getMaxValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2738 return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Max));
2739 }
2740 }
2741
2742 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2743 // which after that:
2744 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2745 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2746 Constant *C, *C1;
2747 Value *A;
2748 if (IID == Intrinsic::usub_sat &&
2749 match(V: Arg0, P: m_NUWSub(L: m_ImmConstant(C), R: m_Value(V&: A))) &&
2750 match(V: Arg1, P: m_ImmConstant(C&: C1))) {
2751 auto *NewC = Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: C, RHS: C1);
2752 auto *NewSub =
2753 Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: NewC, RHS: A);
2754 return replaceInstUsesWith(I&: *SI, V: NewSub);
2755 }
2756
2757 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2758 if (IID == Intrinsic::ssub_sat && match(V: Arg1, P: m_Constant(C)) &&
2759 C->isNotMinSignedValue()) {
2760 Value *NegVal = ConstantExpr::getNeg(C);
2761 return replaceInstUsesWith(
2762 I&: *II, V: Builder.CreateBinaryIntrinsic(
2763 ID: Intrinsic::sadd_sat, LHS: Arg0, RHS: NegVal));
2764 }
2765
2766 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2767 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2768 // if Val and Val2 have the same sign
2769 if (auto *Other = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2770 Value *X;
2771 const APInt *Val, *Val2;
2772 APInt NewVal;
2773 bool IsUnsigned =
2774 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2775 if (Other->getIntrinsicID() == IID &&
2776 match(V: Arg1, P: m_APInt(Res&: Val)) &&
2777 match(V: Other->getArgOperand(i: 0), P: m_Value(V&: X)) &&
2778 match(V: Other->getArgOperand(i: 1), P: m_APInt(Res&: Val2))) {
2779 if (IsUnsigned)
2780 NewVal = Val->uadd_sat(RHS: *Val2);
2781 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2782 bool Overflow;
2783 NewVal = Val->sadd_ov(RHS: *Val2, Overflow);
2784 if (Overflow) {
2785 // Both adds together may add more than SignedMaxValue
2786 // without saturating the final result.
2787 break;
2788 }
2789 } else {
2790 // Cannot fold saturated addition with different signs.
2791 break;
2792 }
2793
2794 return replaceInstUsesWith(
2795 I&: *II, V: Builder.CreateBinaryIntrinsic(
2796 ID: IID, LHS: X, RHS: ConstantInt::get(Ty: II->getType(), V: NewVal)));
2797 }
2798 }
2799 break;
2800 }
2801
2802 case Intrinsic::minnum:
2803 case Intrinsic::maxnum:
2804 case Intrinsic::minimumnum:
2805 case Intrinsic::maximumnum:
2806 case Intrinsic::minimum:
2807 case Intrinsic::maximum: {
2808 Value *Arg0 = II->getArgOperand(i: 0);
2809 Value *Arg1 = II->getArgOperand(i: 1);
2810 Value *X, *Y;
2811 if (match(V: Arg0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Arg1, P: m_FNeg(X: m_Value(V&: Y))) &&
2812 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2813 // If both operands are negated, invert the call and negate the result:
2814 // min(-X, -Y) --> -(max(X, Y))
2815 // max(-X, -Y) --> -(min(X, Y))
2816 Intrinsic::ID NewIID;
2817 switch (IID) {
2818 case Intrinsic::maxnum:
2819 NewIID = Intrinsic::minnum;
2820 break;
2821 case Intrinsic::minnum:
2822 NewIID = Intrinsic::maxnum;
2823 break;
2824 case Intrinsic::maximumnum:
2825 NewIID = Intrinsic::minimumnum;
2826 break;
2827 case Intrinsic::minimumnum:
2828 NewIID = Intrinsic::maximumnum;
2829 break;
2830 case Intrinsic::maximum:
2831 NewIID = Intrinsic::minimum;
2832 break;
2833 case Intrinsic::minimum:
2834 NewIID = Intrinsic::maximum;
2835 break;
2836 default:
2837 llvm_unreachable("unexpected intrinsic ID");
2838 }
2839 Value *NewCall = Builder.CreateBinaryIntrinsic(ID: NewIID, LHS: X, RHS: Y, FMFSource: II);
2840 Instruction *FNeg = UnaryOperator::CreateFNeg(V: NewCall);
2841 FNeg->copyIRFlags(V: II);
2842 return FNeg;
2843 }
2844
2845 // m(m(X, C2), C1) -> m(X, C)
2846 const APFloat *C1, *C2;
2847 if (auto *M = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2848 if (M->getIntrinsicID() == IID && match(V: Arg1, P: m_APFloat(Res&: C1)) &&
2849 ((match(V: M->getArgOperand(i: 0), P: m_Value(V&: X)) &&
2850 match(V: M->getArgOperand(i: 1), P: m_APFloat(Res&: C2))) ||
2851 (match(V: M->getArgOperand(i: 1), P: m_Value(V&: X)) &&
2852 match(V: M->getArgOperand(i: 0), P: m_APFloat(Res&: C2))))) {
2853 APFloat Res(0.0);
2854 switch (IID) {
2855 case Intrinsic::maxnum:
2856 Res = maxnum(A: *C1, B: *C2);
2857 break;
2858 case Intrinsic::minnum:
2859 Res = minnum(A: *C1, B: *C2);
2860 break;
2861 case Intrinsic::maximumnum:
2862 Res = maximumnum(A: *C1, B: *C2);
2863 break;
2864 case Intrinsic::minimumnum:
2865 Res = minimumnum(A: *C1, B: *C2);
2866 break;
2867 case Intrinsic::maximum:
2868 Res = maximum(A: *C1, B: *C2);
2869 break;
2870 case Intrinsic::minimum:
2871 Res = minimum(A: *C1, B: *C2);
2872 break;
2873 default:
2874 llvm_unreachable("unexpected intrinsic ID");
2875 }
2876 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2877 // was a simplification (so Arg0 and its original flags could
2878 // propagate?)
2879 Value *V = Builder.CreateBinaryIntrinsic(
2880 ID: IID, LHS: X, RHS: ConstantFP::get(Ty: Arg0->getType(), V: Res),
2881 FMFSource: FMFSource::intersect(A: II, B: M));
2882 return replaceInstUsesWith(I&: *II, V);
2883 }
2884 }
2885
2886 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2887 if (match(V: Arg0, P: m_FPExt(Op: m_Value(V&: X))) && match(V: Arg1, P: m_FPExt(Op: m_Value(V&: Y))) &&
2888 (Arg0->hasOneUse() || Arg1->hasOneUse()) &&
2889 X->getType() == Y->getType()) {
2890 Value *NewCall =
2891 Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y, FMFSource: II, Name: II->getName());
2892 return new FPExtInst(NewCall, II->getType());
2893 }
2894
2895 // m(fpext X, C) -> fpext m(X, TruncC) if C can be losslessly truncated.
2896 Constant *C;
2897 if (match(V: Arg0, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: X)))) &&
2898 match(V: Arg1, P: m_ImmConstant(C))) {
2899 if (Constant *TruncC =
2900 getLosslessInvCast(C, InvCastTo: X->getType(), CastOp: Instruction::FPExt, DL)) {
2901 Value *NewCall =
2902 Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: TruncC, FMFSource: II, Name: II->getName());
2903 return new FPExtInst(NewCall, II->getType());
2904 }
2905 }
2906
2907 // max X, -X --> fabs X
2908 // min X, -X --> -(fabs X)
2909 // TODO: Remove one-use limitation? That is obviously better for max,
2910 // hence why we don't check for one-use for that. However,
2911 // it would be an extra instruction for min (fnabs), but
2912 // that is still likely better for analysis and codegen.
2913 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
2914 if (match(V: Op0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Op1, P: m_Specific(V: X)))
2915 return Op0->hasOneUse() ||
2916 (IID != Intrinsic::minimum && IID != Intrinsic::minnum &&
2917 IID != Intrinsic::minimumnum);
2918 return false;
2919 };
2920
2921 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
2922 Value *R = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II);
2923 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
2924 IID == Intrinsic::minimumnum)
2925 R = Builder.CreateFNegFMF(V: R, FMFSource: II);
2926 return replaceInstUsesWith(I&: *II, V: R);
2927 }
2928
2929 break;
2930 }
2931 case Intrinsic::matrix_multiply: {
2932 // Optimize negation in matrix multiplication.
2933
2934 // -A * -B -> A * B
2935 Value *A, *B;
2936 if (match(V: II->getArgOperand(i: 0), P: m_FNeg(X: m_Value(V&: A))) &&
2937 match(V: II->getArgOperand(i: 1), P: m_FNeg(X: m_Value(V&: B)))) {
2938 replaceOperand(I&: *II, OpNum: 0, V: A);
2939 replaceOperand(I&: *II, OpNum: 1, V: B);
2940 return II;
2941 }
2942
2943 Value *Op0 = II->getOperand(i_nocapture: 0);
2944 Value *Op1 = II->getOperand(i_nocapture: 1);
2945 Value *OpNotNeg, *NegatedOp;
2946 unsigned NegatedOpArg, OtherOpArg;
2947 if (match(V: Op0, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2948 NegatedOp = Op0;
2949 NegatedOpArg = 0;
2950 OtherOpArg = 1;
2951 } else if (match(V: Op1, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2952 NegatedOp = Op1;
2953 NegatedOpArg = 1;
2954 OtherOpArg = 0;
2955 } else
2956 // Multiplication doesn't have a negated operand.
2957 break;
2958
2959 // Only optimize if the negated operand has only one use.
2960 if (!NegatedOp->hasOneUse())
2961 break;
2962
2963 Value *OtherOp = II->getOperand(i_nocapture: OtherOpArg);
2964 VectorType *RetTy = cast<VectorType>(Val: II->getType());
2965 VectorType *NegatedOpTy = cast<VectorType>(Val: NegatedOp->getType());
2966 VectorType *OtherOpTy = cast<VectorType>(Val: OtherOp->getType());
2967 ElementCount NegatedCount = NegatedOpTy->getElementCount();
2968 ElementCount OtherCount = OtherOpTy->getElementCount();
2969 ElementCount RetCount = RetTy->getElementCount();
2970 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
2971 if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: OtherCount) &&
2972 ElementCount::isKnownLT(LHS: OtherCount, RHS: RetCount)) {
2973 Value *InverseOtherOp = Builder.CreateFNeg(V: OtherOp);
2974 replaceOperand(I&: *II, OpNum: NegatedOpArg, V: OpNotNeg);
2975 replaceOperand(I&: *II, OpNum: OtherOpArg, V: InverseOtherOp);
2976 return II;
2977 }
2978 // (-A) * B -> -(A * B), if it is cheaper to negate the result
2979 if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: RetCount)) {
2980 SmallVector<Value *, 5> NewArgs(II->args());
2981 NewArgs[NegatedOpArg] = OpNotNeg;
2982 Instruction *NewMul =
2983 Builder.CreateIntrinsic(RetTy: II->getType(), ID: IID, Args: NewArgs, FMFSource: II);
2984 return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: NewMul, FMFSource: II));
2985 }
2986 break;
2987 }
2988 case Intrinsic::fmuladd: {
2989 // Try to simplify the underlying FMul.
2990 if (Value *V =
2991 simplifyFMulInst(LHS: II->getArgOperand(i: 0), RHS: II->getArgOperand(i: 1),
2992 FMF: II->getFastMathFlags(), Q: SQ.getWithInstruction(I: II)))
2993 return BinaryOperator::CreateFAddFMF(V1: V, V2: II->getArgOperand(i: 2),
2994 FMF: II->getFastMathFlags());
2995
2996 [[fallthrough]];
2997 }
2998 case Intrinsic::fma: {
2999 // fma fneg(x), fneg(y), z -> fma x, y, z
3000 Value *Src0 = II->getArgOperand(i: 0);
3001 Value *Src1 = II->getArgOperand(i: 1);
3002 Value *Src2 = II->getArgOperand(i: 2);
3003 Value *X, *Y;
3004 if (match(V: Src0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Src1, P: m_FNeg(X: m_Value(V&: Y)))) {
3005 replaceOperand(I&: *II, OpNum: 0, V: X);
3006 replaceOperand(I&: *II, OpNum: 1, V: Y);
3007 return II;
3008 }
3009
3010 // fma fabs(x), fabs(x), z -> fma x, x, z
3011 if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: X))) &&
3012 match(V: Src1, P: m_FAbs(Op0: m_Specific(V: X)))) {
3013 replaceOperand(I&: *II, OpNum: 0, V: X);
3014 replaceOperand(I&: *II, OpNum: 1, V: X);
3015 return II;
3016 }
3017
3018 // Try to simplify the underlying FMul. We can only apply simplifications
3019 // that do not require rounding.
3020 if (Value *V = simplifyFMAFMul(LHS: Src0, RHS: Src1, FMF: II->getFastMathFlags(),
3021 Q: SQ.getWithInstruction(I: II)))
3022 return BinaryOperator::CreateFAddFMF(V1: V, V2: Src2, FMF: II->getFastMathFlags());
3023
3024 // fma x, y, 0 -> fmul x, y
3025 // This is always valid for -0.0, but requires nsz for +0.0 as
3026 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
3027 if (match(V: Src2, P: m_NegZeroFP()) ||
3028 (match(V: Src2, P: m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
3029 return BinaryOperator::CreateFMulFMF(V1: Src0, V2: Src1, FMFSource: II);
3030
3031 // fma x, -1.0, y -> fsub y, x
3032 if (match(V: Src1, P: m_SpecificFP(V: -1.0)))
3033 return BinaryOperator::CreateFSubFMF(V1: Src2, V2: Src0, FMFSource: II);
3034
3035 break;
3036 }
3037 case Intrinsic::copysign: {
3038 Value *Mag = II->getArgOperand(i: 0), *Sign = II->getArgOperand(i: 1);
3039 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3040 V: Sign, SQ: getSimplifyQuery().getWithInstruction(I: II))) {
3041 if (*KnownSignBit) {
3042 // If we know that the sign argument is negative, reduce to FNABS:
3043 // copysign Mag, -Sign --> fneg (fabs Mag)
3044 Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II);
3045 return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: Fabs, FMFSource: II));
3046 }
3047
3048 // If we know that the sign argument is positive, reduce to FABS:
3049 // copysign Mag, +Sign --> fabs Mag
3050 Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II);
3051 return replaceInstUsesWith(I&: *II, V: Fabs);
3052 }
3053
3054 // Propagate sign argument through nested calls:
3055 // copysign Mag, (copysign ?, X) --> copysign Mag, X
3056 Value *X;
3057 if (match(V: Sign, P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(), Op1: m_Value(V&: X)))) {
3058 Value *CopySign =
3059 Builder.CreateCopySign(LHS: Mag, RHS: X, FMFSource: FMFSource::intersect(A: II, B: Sign));
3060 return replaceInstUsesWith(I&: *II, V: CopySign);
3061 }
3062
3063 // Clear sign-bit of constant magnitude:
3064 // copysign -MagC, X --> copysign MagC, X
3065 // TODO: Support constant folding for fabs
3066 const APFloat *MagC;
3067 if (match(V: Mag, P: m_APFloat(Res&: MagC)) && MagC->isNegative()) {
3068 APFloat PosMagC = *MagC;
3069 PosMagC.clearSign();
3070 return replaceOperand(I&: *II, OpNum: 0, V: ConstantFP::get(Ty: Mag->getType(), V: PosMagC));
3071 }
3072
3073 // Peek through changes of magnitude's sign-bit. This call rewrites those:
3074 // copysign (fabs X), Sign --> copysign X, Sign
3075 // copysign (fneg X), Sign --> copysign X, Sign
3076 if (match(V: Mag, P: m_FAbs(Op0: m_Value(V&: X))) || match(V: Mag, P: m_FNeg(X: m_Value(V&: X))))
3077 return replaceOperand(I&: *II, OpNum: 0, V: X);
3078
3079 Type *SignEltTy = Sign->getType()->getScalarType();
3080
3081 Value *CastSrc;
3082 if (match(V: Sign,
3083 P: m_OneUse(SubPattern: m_ElementWiseBitCast(Op: m_OneUse(SubPattern: m_Value(V&: CastSrc))))) &&
3084 CastSrc->getType()->isIntOrIntVectorTy() &&
3085 APFloat::hasSignBitInMSB(SignEltTy->getFltSemantics())) {
3086 KnownBits Known(SignEltTy->getPrimitiveSizeInBits());
3087 if (SimplifyDemandedBits(I: cast<Instruction>(Val: Sign), Op: 0,
3088 DemandedMask: APInt::getSignMask(BitWidth: Known.getBitWidth()), Known,
3089 Q: SQ))
3090 return II;
3091 }
3092
3093 break;
3094 }
3095 case Intrinsic::fabs: {
3096 Value *Cond, *TVal, *FVal;
3097 Value *Arg = II->getArgOperand(i: 0);
3098 Value *X;
3099 // fabs (-X) --> fabs (X)
3100 if (match(V: Arg, P: m_FNeg(X: m_Value(V&: X)))) {
3101 CallInst *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II);
3102 return replaceInstUsesWith(I&: CI, V: Fabs);
3103 }
3104
3105 if (match(V: Arg, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))) {
3106 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3107 if (Arg->hasOneUse() ? (isa<Constant>(Val: TVal) || isa<Constant>(Val: FVal))
3108 : (isa<Constant>(Val: TVal) && isa<Constant>(Val: FVal))) {
3109 CallInst *AbsT = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {TVal});
3110 CallInst *AbsF = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {FVal});
3111 SelectInst *SI = SelectInst::Create(C: Cond, S1: AbsT, S2: AbsF);
3112 FastMathFlags FMF1 = II->getFastMathFlags();
3113 FastMathFlags FMF2 = cast<SelectInst>(Val: Arg)->getFastMathFlags();
3114 FMF2.setNoSignedZeros(false);
3115 SI->setFastMathFlags(FMF1 | FMF2);
3116 return SI;
3117 }
3118 // fabs (select Cond, -FVal, FVal) --> fabs FVal
3119 if (match(V: TVal, P: m_FNeg(X: m_Specific(V: FVal))))
3120 return replaceOperand(I&: *II, OpNum: 0, V: FVal);
3121 // fabs (select Cond, TVal, -TVal) --> fabs TVal
3122 if (match(V: FVal, P: m_FNeg(X: m_Specific(V: TVal))))
3123 return replaceOperand(I&: *II, OpNum: 0, V: TVal);
3124 }
3125
3126 Value *Magnitude, *Sign;
3127 if (match(V: II->getArgOperand(i: 0),
3128 P: m_CopySign(Op0: m_Value(V&: Magnitude), Op1: m_Value(V&: Sign)))) {
3129 // fabs (copysign x, y) -> (fabs x)
3130 CallInst *AbsSign =
3131 Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Magnitude, FMFSource: II);
3132 return replaceInstUsesWith(I&: *II, V: AbsSign);
3133 }
3134
3135 [[fallthrough]];
3136 }
3137 case Intrinsic::ceil:
3138 case Intrinsic::floor:
3139 case Intrinsic::round:
3140 case Intrinsic::roundeven:
3141 case Intrinsic::nearbyint:
3142 case Intrinsic::rint:
3143 case Intrinsic::trunc: {
3144 Value *ExtSrc;
3145 if (match(V: II->getArgOperand(i: 0), P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: ExtSrc))))) {
3146 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3147 Value *NarrowII = Builder.CreateUnaryIntrinsic(ID: IID, V: ExtSrc, FMFSource: II);
3148 return new FPExtInst(NarrowII, II->getType());
3149 }
3150 break;
3151 }
3152 case Intrinsic::cos:
3153 case Intrinsic::amdgcn_cos:
3154 case Intrinsic::cosh: {
3155 Value *X, *Sign;
3156 Value *Src = II->getArgOperand(i: 0);
3157 if (match(V: Src, P: m_FNeg(X: m_Value(V&: X))) || match(V: Src, P: m_FAbs(Op0: m_Value(V&: X))) ||
3158 match(V: Src, P: m_CopySign(Op0: m_Value(V&: X), Op1: m_Value(V&: Sign)))) {
3159 // f(-x) --> f(x)
3160 // f(fabs(x)) --> f(x)
3161 // f(copysign(x, y)) --> f(x)
3162 // for f in {cos, cosh}
3163 return replaceOperand(I&: *II, OpNum: 0, V: X);
3164 }
3165 break;
3166 }
3167 case Intrinsic::sin:
3168 case Intrinsic::amdgcn_sin:
3169 case Intrinsic::sinh:
3170 case Intrinsic::tan:
3171 case Intrinsic::tanh: {
3172 Value *X;
3173 if (match(V: II->getArgOperand(i: 0), P: m_OneUse(SubPattern: m_FNeg(X: m_Value(V&: X))))) {
3174 // f(-x) --> -f(x)
3175 // for f in {sin, sinh, tan, tanh}
3176 Value *NewFunc = Builder.CreateUnaryIntrinsic(ID: IID, V: X, FMFSource: II);
3177 return UnaryOperator::CreateFNegFMF(Op: NewFunc, FMFSource: II);
3178 }
3179 break;
3180 }
3181 case Intrinsic::ldexp: {
3182 // ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
3183 //
3184 // The danger is if the first ldexp would overflow to infinity or underflow
3185 // to zero, but the combined exponent avoids it. We ignore this with
3186 // reassoc.
3187 //
3188 // It's also safe to fold if we know both exponents are >= 0 or <= 0 since
3189 // it would just double down on the overflow/underflow which would occur
3190 // anyway.
3191 //
3192 // TODO: Could do better if we had range tracking for the input value
3193 // exponent. Also could broaden sign check to cover == 0 case.
3194 Value *Src = II->getArgOperand(i: 0);
3195 Value *Exp = II->getArgOperand(i: 1);
3196
3197 uint64_t ConstExp;
3198 if (match(V: Exp, P: m_ConstantInt(V&: ConstExp))) {
3199 // ldexp(x, K) -> fmul x, 2^K
3200 const fltSemantics &FPTy =
3201 Src->getType()->getScalarType()->getFltSemantics();
3202
3203 APFloat Scaled = scalbn(X: APFloat::getOne(Sem: FPTy), Exp: static_cast<int>(ConstExp),
3204 RM: APFloat::rmNearestTiesToEven);
3205 if (!Scaled.isZero() && !Scaled.isInfinity()) {
3206 // Skip overflow and underflow cases.
3207 Constant *FPConst = ConstantFP::get(Ty: Src->getType(), V: Scaled);
3208 return BinaryOperator::CreateFMulFMF(V1: Src, V2: FPConst, FMFSource: II);
3209 }
3210 }
3211
3212 Value *InnerSrc;
3213 Value *InnerExp;
3214 if (match(V: Src, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ldexp>(
3215 Op0: m_Value(V&: InnerSrc), Op1: m_Value(V&: InnerExp)))) &&
3216 Exp->getType() == InnerExp->getType()) {
3217 FastMathFlags FMF = II->getFastMathFlags();
3218 FastMathFlags InnerFlags = cast<FPMathOperator>(Val: Src)->getFastMathFlags();
3219
3220 if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3221 signBitMustBeTheSame(Op0: Exp, Op1: InnerExp, SQ: SQ.getWithInstruction(I: II))) {
3222 // TODO: Add nsw/nuw probably safe if integer type exceeds exponent
3223 // width.
3224 Value *NewExp = Builder.CreateAdd(LHS: InnerExp, RHS: Exp);
3225 II->setArgOperand(i: 1, v: NewExp);
3226 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3227 return replaceOperand(I&: *II, OpNum: 0, V: InnerSrc);
3228 }
3229 }
3230
3231 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3232 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3233 Value *ExtSrc;
3234 if (match(V: Exp, P: m_ZExt(Op: m_Value(V&: ExtSrc))) &&
3235 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3236 Value *Select =
3237 Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: 2.0),
3238 False: ConstantFP::get(Ty: II->getType(), V: 1.0));
3239 return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II);
3240 }
3241 if (match(V: Exp, P: m_SExt(Op: m_Value(V&: ExtSrc))) &&
3242 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3243 Value *Select =
3244 Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: 0.5),
3245 False: ConstantFP::get(Ty: II->getType(), V: 1.0));
3246 return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II);
3247 }
3248
3249 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3250 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3251 ///
3252 // TODO: If we cared, should insert a canonicalize for x
3253 Value *SelectCond, *SelectLHS, *SelectRHS;
3254 if (match(V: II->getArgOperand(i: 1),
3255 P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: SelectCond), L: m_Value(V&: SelectLHS),
3256 R: m_Value(V&: SelectRHS))))) {
3257 Value *NewLdexp = nullptr;
3258 Value *Select = nullptr;
3259 if (match(V: SelectRHS, P: m_ZeroInt())) {
3260 NewLdexp = Builder.CreateLdexp(Src, Exp: SelectLHS, FMFSource: II);
3261 Select = Builder.CreateSelect(C: SelectCond, True: NewLdexp, False: Src);
3262 } else if (match(V: SelectLHS, P: m_ZeroInt())) {
3263 NewLdexp = Builder.CreateLdexp(Src, Exp: SelectRHS, FMFSource: II);
3264 Select = Builder.CreateSelect(C: SelectCond, True: Src, False: NewLdexp);
3265 }
3266
3267 if (NewLdexp) {
3268 Select->takeName(V: II);
3269 return replaceInstUsesWith(I&: *II, V: Select);
3270 }
3271 }
3272
3273 break;
3274 }
3275 case Intrinsic::ptrauth_auth:
3276 case Intrinsic::ptrauth_resign: {
3277 // We don't support this optimization on intrinsic calls with deactivation
3278 // symbols, which are represented using operand bundles.
3279 if (II->hasOperandBundles())
3280 break;
3281
3282 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3283 // sign+auth component if the key and discriminator match.
3284 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3285 Value *Ptr = II->getArgOperand(i: 0);
3286 Value *Key = II->getArgOperand(i: 1);
3287 Value *Disc = II->getArgOperand(i: 2);
3288
3289 // AuthKey will be the key we need to end up authenticating against in
3290 // whatever we replace this sequence with.
3291 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3292 if (const auto *CI = dyn_cast<CallBase>(Val: Ptr)) {
3293 // We don't support this optimization on intrinsic calls with deactivation
3294 // symbols, which are represented using operand bundles.
3295 if (CI->hasOperandBundles())
3296 break;
3297
3298 BasePtr = CI->getArgOperand(i: 0);
3299 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3300 if (CI->getArgOperand(i: 1) != Key || CI->getArgOperand(i: 2) != Disc)
3301 break;
3302 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3303 if (CI->getArgOperand(i: 3) != Key || CI->getArgOperand(i: 4) != Disc)
3304 break;
3305 AuthKey = CI->getArgOperand(i: 1);
3306 AuthDisc = CI->getArgOperand(i: 2);
3307 } else
3308 break;
3309 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Val: Ptr)) {
3310 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3311 // our purposes, so check for that too.
3312 const auto *CPA = dyn_cast<ConstantPtrAuth>(Val: PtrToInt->getOperand(i_nocapture: 0));
3313 if (!CPA || !CPA->isKnownCompatibleWith(Key, Discriminator: Disc, DL))
3314 break;
3315
3316 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3317 if (NeedSign && isa<ConstantInt>(Val: II->getArgOperand(i: 4))) {
3318 auto *SignKey = cast<ConstantInt>(Val: II->getArgOperand(i: 3));
3319 auto *SignDisc = cast<ConstantInt>(Val: II->getArgOperand(i: 4));
3320 auto *Null = ConstantPointerNull::get(T: Builder.getPtrTy());
3321 auto *NewCPA = ConstantPtrAuth::get(Ptr: CPA->getPointer(), Key: SignKey,
3322 Disc: SignDisc, /*AddrDisc=*/Null,
3323 /*DeactivationSymbol=*/Null);
3324 replaceInstUsesWith(
3325 I&: *II, V: ConstantExpr::getPointerCast(C: NewCPA, Ty: II->getType()));
3326 return eraseInstFromFunction(I&: *II);
3327 }
3328
3329 // auth(ptrauth(p,k,d),k,d) -> p
3330 BasePtr = Builder.CreatePtrToInt(V: CPA->getPointer(), DestTy: II->getType());
3331 } else
3332 break;
3333
3334 unsigned NewIntrin;
3335 if (AuthKey && NeedSign) {
3336 // resign(0,1) + resign(1,2) = resign(0, 2)
3337 NewIntrin = Intrinsic::ptrauth_resign;
3338 } else if (AuthKey) {
3339 // resign(0,1) + auth(1) = auth(0)
3340 NewIntrin = Intrinsic::ptrauth_auth;
3341 } else if (NeedSign) {
3342 // sign(0) + resign(0, 1) = sign(1)
3343 NewIntrin = Intrinsic::ptrauth_sign;
3344 } else {
3345 // sign(0) + auth(0) = nop
3346 replaceInstUsesWith(I&: *II, V: BasePtr);
3347 return eraseInstFromFunction(I&: *II);
3348 }
3349
3350 SmallVector<Value *, 4> CallArgs;
3351 CallArgs.push_back(Elt: BasePtr);
3352 if (AuthKey) {
3353 CallArgs.push_back(Elt: AuthKey);
3354 CallArgs.push_back(Elt: AuthDisc);
3355 }
3356
3357 if (NeedSign) {
3358 CallArgs.push_back(Elt: II->getArgOperand(i: 3));
3359 CallArgs.push_back(Elt: II->getArgOperand(i: 4));
3360 }
3361
3362 Function *NewFn =
3363 Intrinsic::getOrInsertDeclaration(M: II->getModule(), id: NewIntrin);
3364 return CallInst::Create(Func: NewFn, Args: CallArgs);
3365 }
3366 case Intrinsic::arm_neon_vtbl1:
3367 case Intrinsic::arm_neon_vtbl2:
3368 case Intrinsic::arm_neon_vtbl3:
3369 case Intrinsic::arm_neon_vtbl4:
3370 case Intrinsic::aarch64_neon_tbl1:
3371 case Intrinsic::aarch64_neon_tbl2:
3372 case Intrinsic::aarch64_neon_tbl3:
3373 case Intrinsic::aarch64_neon_tbl4:
3374 return simplifyNeonTbl(II&: *II, IC&: *this, /*IsExtension=*/false);
3375 case Intrinsic::arm_neon_vtbx1:
3376 case Intrinsic::arm_neon_vtbx2:
3377 case Intrinsic::arm_neon_vtbx3:
3378 case Intrinsic::arm_neon_vtbx4:
3379 case Intrinsic::aarch64_neon_tbx1:
3380 case Intrinsic::aarch64_neon_tbx2:
3381 case Intrinsic::aarch64_neon_tbx3:
3382 case Intrinsic::aarch64_neon_tbx4:
3383 return simplifyNeonTbl(II&: *II, IC&: *this, /*IsExtension=*/true);
3384
3385 case Intrinsic::arm_neon_vmulls:
3386 case Intrinsic::arm_neon_vmullu:
3387 case Intrinsic::aarch64_neon_smull:
3388 case Intrinsic::aarch64_neon_umull: {
3389 Value *Arg0 = II->getArgOperand(i: 0);
3390 Value *Arg1 = II->getArgOperand(i: 1);
3391
3392 // Handle mul by zero first:
3393 if (isa<ConstantAggregateZero>(Val: Arg0) || isa<ConstantAggregateZero>(Val: Arg1)) {
3394 return replaceInstUsesWith(I&: CI, V: ConstantAggregateZero::get(Ty: II->getType()));
3395 }
3396
3397 // Check for constant LHS & RHS - in this case we just simplify.
3398 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3399 IID == Intrinsic::aarch64_neon_umull);
3400 VectorType *NewVT = cast<VectorType>(Val: II->getType());
3401 if (Constant *CV0 = dyn_cast<Constant>(Val: Arg0)) {
3402 if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1)) {
3403 Value *V0 = Builder.CreateIntCast(V: CV0, DestTy: NewVT, /*isSigned=*/!Zext);
3404 Value *V1 = Builder.CreateIntCast(V: CV1, DestTy: NewVT, /*isSigned=*/!Zext);
3405 return replaceInstUsesWith(I&: CI, V: Builder.CreateMul(LHS: V0, RHS: V1));
3406 }
3407
3408 // Couldn't simplify - canonicalize constant to the RHS.
3409 std::swap(a&: Arg0, b&: Arg1);
3410 }
3411
3412 // Handle mul by one:
3413 if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1))
3414 if (ConstantInt *Splat =
3415 dyn_cast_or_null<ConstantInt>(Val: CV1->getSplatValue()))
3416 if (Splat->isOne())
3417 return CastInst::CreateIntegerCast(S: Arg0, Ty: II->getType(),
3418 /*isSigned=*/!Zext);
3419
3420 break;
3421 }
3422 case Intrinsic::arm_neon_aesd:
3423 case Intrinsic::arm_neon_aese:
3424 case Intrinsic::aarch64_crypto_aesd:
3425 case Intrinsic::aarch64_crypto_aese:
3426 case Intrinsic::aarch64_sve_aesd:
3427 case Intrinsic::aarch64_sve_aese: {
3428 Value *DataArg = II->getArgOperand(i: 0);
3429 Value *KeyArg = II->getArgOperand(i: 1);
3430
3431 // Accept zero on either operand.
3432 if (!match(V: KeyArg, P: m_ZeroInt()))
3433 std::swap(a&: KeyArg, b&: DataArg);
3434
3435 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3436 Value *Data, *Key;
3437 if (match(V: KeyArg, P: m_ZeroInt()) &&
3438 match(V: DataArg, P: m_Xor(L: m_Value(V&: Data), R: m_Value(V&: Key)))) {
3439 replaceOperand(I&: *II, OpNum: 0, V: Data);
3440 replaceOperand(I&: *II, OpNum: 1, V: Key);
3441 return II;
3442 }
3443 break;
3444 }
3445 case Intrinsic::arm_neon_vshifts:
3446 case Intrinsic::arm_neon_vshiftu:
3447 case Intrinsic::aarch64_neon_sshl:
3448 case Intrinsic::aarch64_neon_ushl:
3449 return foldNeonShift(II, IC&: *this);
3450 case Intrinsic::hexagon_V6_vandvrt:
3451 case Intrinsic::hexagon_V6_vandvrt_128B: {
3452 // Simplify Q -> V -> Q conversion.
3453 if (auto Op0 = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0))) {
3454 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3455 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3456 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3457 break;
3458 Value *Bytes = Op0->getArgOperand(i: 1), *Mask = II->getArgOperand(i: 1);
3459 uint64_t Bytes1 = computeKnownBits(V: Bytes, CxtI: Op0).One.getZExtValue();
3460 uint64_t Mask1 = computeKnownBits(V: Mask, CxtI: II).One.getZExtValue();
3461 // Check if every byte has common bits in Bytes and Mask.
3462 uint64_t C = Bytes1 & Mask1;
3463 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3464 return replaceInstUsesWith(I&: *II, V: Op0->getArgOperand(i: 0));
3465 }
3466 break;
3467 }
3468 case Intrinsic::stackrestore: {
3469 enum class ClassifyResult {
3470 None,
3471 Alloca,
3472 StackRestore,
3473 CallWithSideEffects,
3474 };
3475 auto Classify = [](const Instruction *I) {
3476 if (isa<AllocaInst>(Val: I))
3477 return ClassifyResult::Alloca;
3478
3479 if (auto *CI = dyn_cast<CallInst>(Val: I)) {
3480 if (auto *II = dyn_cast<IntrinsicInst>(Val: CI)) {
3481 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3482 return ClassifyResult::StackRestore;
3483
3484 if (II->mayHaveSideEffects())
3485 return ClassifyResult::CallWithSideEffects;
3486 } else {
3487 // Consider all non-intrinsic calls to be side effects
3488 return ClassifyResult::CallWithSideEffects;
3489 }
3490 }
3491
3492 return ClassifyResult::None;
3493 };
3494
3495 // If the stacksave and the stackrestore are in the same BB, and there is
3496 // no intervening call, alloca, or stackrestore of a different stacksave,
3497 // remove the restore. This can happen when variable allocas are DCE'd.
3498 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0))) {
3499 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3500 SS->getParent() == II->getParent()) {
3501 BasicBlock::iterator BI(SS);
3502 bool CannotRemove = false;
3503 for (++BI; &*BI != II; ++BI) {
3504 switch (Classify(&*BI)) {
3505 case ClassifyResult::None:
3506 // So far so good, look at next instructions.
3507 break;
3508
3509 case ClassifyResult::StackRestore:
3510 // If we found an intervening stackrestore for a different
3511 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3512 if (cast<IntrinsicInst>(Val&: *BI).getArgOperand(i: 0) != SS)
3513 CannotRemove = true;
3514 break;
3515
3516 case ClassifyResult::Alloca:
3517 case ClassifyResult::CallWithSideEffects:
3518 // If we found an alloca, a non-intrinsic call, or an intrinsic
3519 // call with side effects, we can't remove the stackrestore.
3520 CannotRemove = true;
3521 break;
3522 }
3523 if (CannotRemove)
3524 break;
3525 }
3526
3527 if (!CannotRemove)
3528 return eraseInstFromFunction(I&: CI);
3529 }
3530 }
3531
3532 // Scan down this block to see if there is another stack restore in the
3533 // same block without an intervening call/alloca.
3534 BasicBlock::iterator BI(II);
3535 Instruction *TI = II->getParent()->getTerminator();
3536 bool CannotRemove = false;
3537 for (++BI; &*BI != TI; ++BI) {
3538 switch (Classify(&*BI)) {
3539 case ClassifyResult::None:
3540 // So far so good, look at next instructions.
3541 break;
3542
3543 case ClassifyResult::StackRestore:
3544 // If there is a stackrestore below this one, remove this one.
3545 return eraseInstFromFunction(I&: CI);
3546
3547 case ClassifyResult::Alloca:
3548 case ClassifyResult::CallWithSideEffects:
3549 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3550 // with side effects (such as llvm.stacksave and llvm.read_register),
3551 // we can't remove the stack restore.
3552 CannotRemove = true;
3553 break;
3554 }
3555 if (CannotRemove)
3556 break;
3557 }
3558
3559 // If the stack restore is in a return, resume, or unwind block and if there
3560 // are no allocas or calls between the restore and the return, nuke the
3561 // restore.
3562 if (!CannotRemove && (isa<ReturnInst>(Val: TI) || isa<ResumeInst>(Val: TI)))
3563 return eraseInstFromFunction(I&: CI);
3564 break;
3565 }
3566 case Intrinsic::lifetime_end:
3567 // Asan needs to poison memory to detect invalid access which is possible
3568 // even for empty lifetime range.
3569 if (II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeAddress) ||
3570 II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeMemory) ||
3571 II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeHWAddress) ||
3572 II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeMemTag))
3573 break;
3574
3575 if (removeTriviallyEmptyRange(EndI&: *II, IC&: *this, IsStart: [](const IntrinsicInst &I) {
3576 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3577 }))
3578 return nullptr;
3579 break;
3580 case Intrinsic::assume: {
3581 Value *IIOperand = II->getArgOperand(i: 0);
3582 SmallVector<OperandBundleDef, 4> OpBundles;
3583 II->getOperandBundlesAsDefs(Defs&: OpBundles);
3584
3585 /// This will remove the boolean Condition from the assume given as
3586 /// argument and remove the assume if it becomes useless.
3587 /// always returns nullptr for use as a return values.
3588 auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
3589 assert(isa<AssumeInst>(Assume));
3590 if (isAssumeWithEmptyBundle(Assume: *cast<AssumeInst>(Val: II)))
3591 return eraseInstFromFunction(I&: CI);
3592 replaceUse(U&: II->getOperandUse(i: 0), NewValue: ConstantInt::getTrue(Context&: II->getContext()));
3593 return nullptr;
3594 };
3595 // Remove an assume if it is followed by an identical assume.
3596 // TODO: Do we need this? Unless there are conflicting assumptions, the
3597 // computeKnownBits(IIOperand) below here eliminates redundant assumes.
3598 Instruction *Next = II->getNextNode();
3599 if (match(V: Next, P: m_Intrinsic<Intrinsic::assume>(Op0: m_Specific(V: IIOperand))))
3600 return RemoveConditionFromAssume(Next);
3601
3602 // Canonicalize assume(a && b) -> assume(a); assume(b);
3603 // Note: New assumption intrinsics created here are registered by
3604 // the InstCombineIRInserter object.
3605 FunctionType *AssumeIntrinsicTy = II->getFunctionType();
3606 Value *AssumeIntrinsic = II->getCalledOperand();
3607 Value *A, *B;
3608 if (match(V: IIOperand, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3609 Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: A, OpBundles,
3610 Name: II->getName());
3611 Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: B, Name: II->getName());
3612 return eraseInstFromFunction(I&: *II);
3613 }
3614 // assume(!(a || b)) -> assume(!a); assume(!b);
3615 if (match(V: IIOperand, P: m_Not(V: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) {
3616 Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
3617 Args: Builder.CreateNot(V: A), OpBundles, Name: II->getName());
3618 Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
3619 Args: Builder.CreateNot(V: B), Name: II->getName());
3620 return eraseInstFromFunction(I&: *II);
3621 }
3622
3623 // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3624 // (if assume is valid at the load)
3625 Instruction *LHS;
3626 if (match(V: IIOperand, P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_NE, L: m_Instruction(I&: LHS),
3627 R: m_Zero())) &&
3628 LHS->getOpcode() == Instruction::Load &&
3629 LHS->getType()->isPointerTy() &&
3630 isValidAssumeForContext(I: II, CxtI: LHS, DT: &DT)) {
3631 MDNode *MD = MDNode::get(Context&: II->getContext(), MDs: {});
3632 LHS->setMetadata(KindID: LLVMContext::MD_nonnull, Node: MD);
3633 LHS->setMetadata(KindID: LLVMContext::MD_noundef, Node: MD);
3634 return RemoveConditionFromAssume(II);
3635
3636 // TODO: apply nonnull return attributes to calls and invokes
3637 // TODO: apply range metadata for range check patterns?
3638 }
3639
3640 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3641 OperandBundleUse OBU = II->getOperandBundleAt(Index: Idx);
3642
3643 // Separate storage assumptions apply to the underlying allocations, not
3644 // any particular pointer within them. When evaluating the hints for AA
3645 // purposes we getUnderlyingObject them; by precomputing the answers here
3646 // we can avoid having to do so repeatedly there.
3647 if (OBU.getTagName() == "separate_storage") {
3648 assert(OBU.Inputs.size() == 2);
3649 auto MaybeSimplifyHint = [&](const Use &U) {
3650 Value *Hint = U.get();
3651 // Not having a limit is safe because InstCombine removes unreachable
3652 // code.
3653 Value *UnderlyingObject = getUnderlyingObject(V: Hint, /*MaxLookup*/ 0);
3654 if (Hint != UnderlyingObject)
3655 replaceUse(U&: const_cast<Use &>(U), NewValue: UnderlyingObject);
3656 };
3657 MaybeSimplifyHint(OBU.Inputs[0]);
3658 MaybeSimplifyHint(OBU.Inputs[1]);
3659 }
3660
3661 // Try to remove redundant alignment assumptions.
3662 if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) {
3663 RetainedKnowledge RK = getKnowledgeFromOperandInAssume(
3664 Assume&: *cast<AssumeInst>(Val: II), Idx: II->arg_size() + Idx);
3665 if (!RK || RK.AttrKind != Attribute::Alignment ||
3666 !isPowerOf2_64(Value: RK.ArgValue) || !isa<ConstantInt>(Val: RK.IRArgValue))
3667 continue;
3668
3669 // Remove align 1 bundles; they don't add any useful information.
3670 if (RK.ArgValue == 1)
3671 return CallBase::removeOperandBundle(CB: II, ID: OBU.getTagID());
3672
3673 // Don't try to remove align assumptions for pointers derived from
3674 // arguments. We might lose information if the function gets inline and
3675 // the align argument attribute disappears.
3676 Value *UO = getUnderlyingObject(V: RK.WasOn);
3677 if (!UO || isa<Argument>(Val: UO))
3678 continue;
3679
3680 // Compute known bits for the pointer, passing nullptr as context to
3681 // avoid computeKnownBits using the assumption we are about to remove
3682 // for reasoning.
3683 KnownBits Known = computeKnownBits(V: RK.WasOn, /*CtxI=*/CxtI: nullptr);
3684 unsigned TZ = std::min(a: Known.countMinTrailingZeros(),
3685 b: Value::MaxAlignmentExponent);
3686 if ((1ULL << TZ) < RK.ArgValue)
3687 continue;
3688 return CallBase::removeOperandBundle(CB: II, ID: OBU.getTagID());
3689 }
3690
3691 if (OBU.getTagName() == "nonnull" && OBU.Inputs.size() == 1) {
3692 RetainedKnowledge RK = getKnowledgeFromOperandInAssume(
3693 Assume&: *cast<AssumeInst>(Val: II), Idx: II->arg_size() + Idx);
3694 if (!RK || RK.AttrKind != Attribute::NonNull ||
3695 !isKnownNonZero(V: RK.WasOn,
3696 Q: getSimplifyQuery().getWithInstruction(I: II)))
3697 continue;
3698 return CallBase::removeOperandBundle(CB: II, ID: OBU.getTagID());
3699 }
3700 }
3701
3702 // Convert nonnull assume like:
3703 // %A = icmp ne i32* %PTR, null
3704 // call void @llvm.assume(i1 %A)
3705 // into
3706 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3707 if (EnableKnowledgeRetention &&
3708 match(V: IIOperand,
3709 P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_NE, L: m_Value(V&: A), R: m_Zero())) &&
3710 A->getType()->isPointerTy()) {
3711 if (auto *Replacement = buildAssumeFromKnowledge(
3712 Knowledge: {RetainedKnowledge{Attribute::NonNull, 0, A}}, CtxI: Next, AC: &AC, DT: &DT)) {
3713
3714 Replacement->insertBefore(InsertPos: Next->getIterator());
3715 AC.registerAssumption(CI: Replacement);
3716 return RemoveConditionFromAssume(II);
3717 }
3718 }
3719
3720 // Convert alignment assume like:
3721 // %B = ptrtoint i32* %A to i64
3722 // %C = and i64 %B, Constant
3723 // %D = icmp eq i64 %C, 0
3724 // call void @llvm.assume(i1 %D)
3725 // into
3726 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3727 uint64_t AlignMask = 1;
3728 if ((match(V: IIOperand, P: m_Not(V: m_Trunc(Op: m_Value(V&: A)))) ||
3729 match(V: IIOperand,
3730 P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ,
3731 L: m_And(L: m_Value(V&: A), R: m_ConstantInt(V&: AlignMask)),
3732 R: m_Zero())))) {
3733 if (isPowerOf2_64(Value: AlignMask + 1)) {
3734 uint64_t Offset = 0;
3735 match(V: A, P: m_Add(L: m_Value(V&: A), R: m_ConstantInt(V&: Offset)));
3736 if (match(V: A, P: m_PtrToIntOrAddr(Op: m_Value(V&: A)))) {
3737 /// Note: this doesn't preserve the offset information but merges
3738 /// offset and alignment.
3739 /// TODO: we can generate a GEP instead of merging the alignment with
3740 /// the offset.
3741 RetainedKnowledge RK{Attribute::Alignment,
3742 MinAlign(A: Offset, B: AlignMask + 1), A};
3743 if (auto *Replacement =
3744 buildAssumeFromKnowledge(Knowledge: RK, CtxI: Next, AC: &AC, DT: &DT)) {
3745
3746 Replacement->insertAfter(InsertPos: II->getIterator());
3747 AC.registerAssumption(CI: Replacement);
3748 }
3749 return RemoveConditionFromAssume(II);
3750 }
3751 }
3752 }
3753
3754 /// Canonicalize Knowledge in operand bundles.
3755 if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3756 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3757 auto &BOI = II->bundle_op_info_begin()[Idx];
3758 RetainedKnowledge RK =
3759 llvm::getKnowledgeFromBundle(Assume&: cast<AssumeInst>(Val&: *II), BOI);
3760 if (BOI.End - BOI.Begin > 2)
3761 continue; // Prevent reducing knowledge in an align with offset since
3762 // extracting a RetainedKnowledge from them looses offset
3763 // information
3764 RetainedKnowledge CanonRK =
3765 llvm::simplifyRetainedKnowledge(Assume: cast<AssumeInst>(Val: II), RK,
3766 AC: &getAssumptionCache(),
3767 DT: &getDominatorTree());
3768 if (CanonRK == RK)
3769 continue;
3770 if (!CanonRK) {
3771 if (BOI.End - BOI.Begin > 0) {
3772 Worklist.pushValue(V: II->op_begin()[BOI.Begin]);
3773 Value::dropDroppableUse(U&: II->op_begin()[BOI.Begin]);
3774 }
3775 continue;
3776 }
3777 assert(RK.AttrKind == CanonRK.AttrKind);
3778 if (BOI.End - BOI.Begin > 0)
3779 II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3780 if (BOI.End - BOI.Begin > 1)
3781 II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
3782 Ty: Type::getInt64Ty(C&: II->getContext()), V: CanonRK.ArgValue));
3783 if (RK.WasOn)
3784 Worklist.pushValue(V: RK.WasOn);
3785 return II;
3786 }
3787 }
3788
3789 // If there is a dominating assume with the same condition as this one,
3790 // then this one is redundant, and should be removed.
3791 KnownBits Known(1);
3792 computeKnownBits(V: IIOperand, Known, CxtI: II);
3793 if (Known.isAllOnes() && isAssumeWithEmptyBundle(Assume: cast<AssumeInst>(Val&: *II)))
3794 return eraseInstFromFunction(I&: *II);
3795
3796 // assume(false) is unreachable.
3797 if (match(V: IIOperand, P: m_CombineOr(L: m_Zero(), R: m_Undef()))) {
3798 CreateNonTerminatorUnreachable(InsertAt: II);
3799 return eraseInstFromFunction(I&: *II);
3800 }
3801
3802 // Update the cache of affected values for this assumption (we might be
3803 // here because we just simplified the condition).
3804 AC.updateAffectedValues(CI: cast<AssumeInst>(Val: II));
3805 break;
3806 }
3807 case Intrinsic::experimental_guard: {
3808 // Is this guard followed by another guard? We scan forward over a small
3809 // fixed window of instructions to handle common cases with conditions
3810 // computed between guards.
3811 Instruction *NextInst = II->getNextNode();
3812 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3813 // Note: Using context-free form to avoid compile time blow up
3814 if (!isSafeToSpeculativelyExecute(I: NextInst))
3815 break;
3816 NextInst = NextInst->getNextNode();
3817 }
3818 Value *NextCond = nullptr;
3819 if (match(V: NextInst,
3820 P: m_Intrinsic<Intrinsic::experimental_guard>(Op0: m_Value(V&: NextCond)))) {
3821 Value *CurrCond = II->getArgOperand(i: 0);
3822
3823 // Remove a guard that it is immediately preceded by an identical guard.
3824 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3825 if (CurrCond != NextCond) {
3826 Instruction *MoveI = II->getNextNode();
3827 while (MoveI != NextInst) {
3828 auto *Temp = MoveI;
3829 MoveI = MoveI->getNextNode();
3830 Temp->moveBefore(InsertPos: II->getIterator());
3831 }
3832 replaceOperand(I&: *II, OpNum: 0, V: Builder.CreateAnd(LHS: CurrCond, RHS: NextCond));
3833 }
3834 eraseInstFromFunction(I&: *NextInst);
3835 return II;
3836 }
3837 break;
3838 }
3839 case Intrinsic::vector_insert: {
3840 Value *Vec = II->getArgOperand(i: 0);
3841 Value *SubVec = II->getArgOperand(i: 1);
3842 Value *Idx = II->getArgOperand(i: 2);
3843 auto *DstTy = dyn_cast<FixedVectorType>(Val: II->getType());
3844 auto *VecTy = dyn_cast<FixedVectorType>(Val: Vec->getType());
3845 auto *SubVecTy = dyn_cast<FixedVectorType>(Val: SubVec->getType());
3846
3847 // Only canonicalize if the destination vector, Vec, and SubVec are all
3848 // fixed vectors.
3849 if (DstTy && VecTy && SubVecTy) {
3850 unsigned DstNumElts = DstTy->getNumElements();
3851 unsigned VecNumElts = VecTy->getNumElements();
3852 unsigned SubVecNumElts = SubVecTy->getNumElements();
3853 unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3854
3855 // An insert that entirely overwrites Vec with SubVec is a nop.
3856 if (VecNumElts == SubVecNumElts)
3857 return replaceInstUsesWith(I&: CI, V: SubVec);
3858
3859 // Widen SubVec into a vector of the same width as Vec, since
3860 // shufflevector requires the two input vectors to be the same width.
3861 // Elements beyond the bounds of SubVec within the widened vector are
3862 // undefined.
3863 SmallVector<int, 8> WidenMask;
3864 unsigned i;
3865 for (i = 0; i != SubVecNumElts; ++i)
3866 WidenMask.push_back(Elt: i);
3867 for (; i != VecNumElts; ++i)
3868 WidenMask.push_back(Elt: PoisonMaskElem);
3869
3870 Value *WidenShuffle = Builder.CreateShuffleVector(V: SubVec, Mask: WidenMask);
3871
3872 SmallVector<int, 8> Mask;
3873 for (unsigned i = 0; i != IdxN; ++i)
3874 Mask.push_back(Elt: i);
3875 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3876 Mask.push_back(Elt: i);
3877 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3878 Mask.push_back(Elt: i);
3879
3880 Value *Shuffle = Builder.CreateShuffleVector(V1: Vec, V2: WidenShuffle, Mask);
3881 return replaceInstUsesWith(I&: CI, V: Shuffle);
3882 }
3883 break;
3884 }
3885 case Intrinsic::vector_extract: {
3886 Value *Vec = II->getArgOperand(i: 0);
3887 Value *Idx = II->getArgOperand(i: 1);
3888
3889 Type *ReturnType = II->getType();
3890 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3891 // ExtractIdx)
3892 unsigned ExtractIdx = cast<ConstantInt>(Val: Idx)->getZExtValue();
3893 Value *InsertTuple, *InsertIdx, *InsertValue;
3894 if (match(V: Vec, P: m_Intrinsic<Intrinsic::vector_insert>(Op0: m_Value(V&: InsertTuple),
3895 Op1: m_Value(V&: InsertValue),
3896 Op2: m_Value(V&: InsertIdx))) &&
3897 InsertValue->getType() == ReturnType) {
3898 unsigned Index = cast<ConstantInt>(Val: InsertIdx)->getZExtValue();
3899 // Case where we get the same index right after setting it.
3900 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3901 // InsertValue
3902 if (ExtractIdx == Index)
3903 return replaceInstUsesWith(I&: CI, V: InsertValue);
3904 // If we are getting a different index than what was set in the
3905 // insert.vector intrinsic. We can just set the input tuple to the one up
3906 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3907 // InsertIndex), ExtractIndex)
3908 // --> extract.vector(InsertTuple, ExtractIndex)
3909 else
3910 return replaceOperand(I&: CI, OpNum: 0, V: InsertTuple);
3911 }
3912
3913 auto *DstTy = dyn_cast<VectorType>(Val: ReturnType);
3914 auto *VecTy = dyn_cast<VectorType>(Val: Vec->getType());
3915
3916 if (DstTy && VecTy) {
3917 auto DstEltCnt = DstTy->getElementCount();
3918 auto VecEltCnt = VecTy->getElementCount();
3919 unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3920
3921 // Extracting the entirety of Vec is a nop.
3922 if (DstEltCnt == VecTy->getElementCount()) {
3923 replaceInstUsesWith(I&: CI, V: Vec);
3924 return eraseInstFromFunction(I&: CI);
3925 }
3926
3927 // Only canonicalize to shufflevector if the destination vector and
3928 // Vec are fixed vectors.
3929 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
3930 break;
3931
3932 SmallVector<int, 8> Mask;
3933 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
3934 Mask.push_back(Elt: IdxN + i);
3935
3936 Value *Shuffle = Builder.CreateShuffleVector(V: Vec, Mask);
3937 return replaceInstUsesWith(I&: CI, V: Shuffle);
3938 }
3939 break;
3940 }
3941 case Intrinsic::experimental_vp_reverse: {
3942 Value *X;
3943 Value *Vec = II->getArgOperand(i: 0);
3944 Value *Mask = II->getArgOperand(i: 1);
3945 if (!match(V: Mask, P: m_AllOnes()))
3946 break;
3947 Value *EVL = II->getArgOperand(i: 2);
3948 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
3949 // rev(unop rev(X)) --> unop X
3950 if (match(V: Vec,
3951 P: m_OneUse(SubPattern: m_UnOp(X: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
3952 Op0: m_Value(V&: X), Op1: m_AllOnes(), Op2: m_Specific(V: EVL)))))) {
3953 auto *OldUnOp = cast<UnaryOperator>(Val: Vec);
3954 auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
3955 Opc: OldUnOp->getOpcode(), V: X, CopyO: OldUnOp, Name: OldUnOp->getName(),
3956 InsertBefore: II->getIterator());
3957 return replaceInstUsesWith(I&: CI, V: NewUnOp);
3958 }
3959 break;
3960 }
3961 case Intrinsic::vector_reduce_or:
3962 case Intrinsic::vector_reduce_and: {
3963 // Canonicalize logical or/and reductions:
3964 // Or reduction for i1 is represented as:
3965 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3966 // %res = cmp ne iReduxWidth %val, 0
3967 // And reduction for i1 is represented as:
3968 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3969 // %res = cmp eq iReduxWidth %val, 11111
3970 Value *Arg = II->getArgOperand(i: 0);
3971 Value *Vect;
3972
3973 if (Value *NewOp =
3974 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3975 replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp);
3976 return II;
3977 }
3978
3979 if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3980 if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3981 if (FTy->getElementType() == Builder.getInt1Ty()) {
3982 Value *Res = Builder.CreateBitCast(
3983 V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
3984 if (IID == Intrinsic::vector_reduce_and) {
3985 Res = Builder.CreateICmpEQ(
3986 LHS: Res, RHS: ConstantInt::getAllOnesValue(Ty: Res->getType()));
3987 } else {
3988 assert(IID == Intrinsic::vector_reduce_or &&
3989 "Expected or reduction.");
3990 Res = Builder.CreateIsNotNull(Arg: Res);
3991 }
3992 if (Arg != Vect)
3993 Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3994 DestTy: II->getType());
3995 return replaceInstUsesWith(I&: CI, V: Res);
3996 }
3997 }
3998 [[fallthrough]];
3999 }
4000 case Intrinsic::vector_reduce_add: {
4001 if (IID == Intrinsic::vector_reduce_add) {
4002 // Convert vector_reduce_add(ZExt(<n x i1>)) to
4003 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4004 // Convert vector_reduce_add(SExt(<n x i1>)) to
4005 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4006 // Convert vector_reduce_add(<n x i1>) to
4007 // Trunc(ctpop(bitcast <n x i1> to in)).
4008 Value *Arg = II->getArgOperand(i: 0);
4009 Value *Vect;
4010
4011 if (Value *NewOp =
4012 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4013 replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp);
4014 return II;
4015 }
4016
4017 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
4018 if (Value *Splat = getSplatValue(V: Arg)) {
4019 ElementCount VecToReduceCount =
4020 cast<VectorType>(Val: Arg->getType())->getElementCount();
4021 if (VecToReduceCount.isFixed()) {
4022 unsigned VectorSize = VecToReduceCount.getFixedValue();
4023 return BinaryOperator::CreateMul(
4024 V1: Splat,
4025 V2: ConstantInt::get(Ty: Splat->getType(), V: VectorSize, /*IsSigned=*/false,
4026 /*ImplicitTrunc=*/true));
4027 }
4028 }
4029
4030 if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4031 if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
4032 if (FTy->getElementType() == Builder.getInt1Ty()) {
4033 Value *V = Builder.CreateBitCast(
4034 V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
4035 Value *Res = Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V);
4036 Res = Builder.CreateZExtOrTrunc(V: Res, DestTy: II->getType());
4037 if (Arg != Vect &&
4038 cast<Instruction>(Val: Arg)->getOpcode() == Instruction::SExt)
4039 Res = Builder.CreateNeg(V: Res);
4040 return replaceInstUsesWith(I&: CI, V: Res);
4041 }
4042 }
4043 }
4044 [[fallthrough]];
4045 }
4046 case Intrinsic::vector_reduce_xor: {
4047 if (IID == Intrinsic::vector_reduce_xor) {
4048 // Exclusive disjunction reduction over the vector with
4049 // (potentially-extended) i1 element type is actually a
4050 // (potentially-extended) arithmetic `add` reduction over the original
4051 // non-extended value:
4052 // vector_reduce_xor(?ext(<n x i1>))
4053 // -->
4054 // ?ext(vector_reduce_add(<n x i1>))
4055 Value *Arg = II->getArgOperand(i: 0);
4056 Value *Vect;
4057
4058 if (Value *NewOp =
4059 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4060 replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp);
4061 return II;
4062 }
4063
4064 if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4065 if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
4066 if (VTy->getElementType() == Builder.getInt1Ty()) {
4067 Value *Res = Builder.CreateAddReduce(Src: Vect);
4068 if (Arg != Vect)
4069 Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
4070 DestTy: II->getType());
4071 return replaceInstUsesWith(I&: CI, V: Res);
4072 }
4073 }
4074 }
4075 [[fallthrough]];
4076 }
4077 case Intrinsic::vector_reduce_mul: {
4078 if (IID == Intrinsic::vector_reduce_mul) {
4079 // Multiplicative reduction over the vector with (potentially-extended)
4080 // i1 element type is actually a (potentially zero-extended)
4081 // logical `and` reduction over the original non-extended value:
4082 // vector_reduce_mul(?ext(<n x i1>))
4083 // -->
4084 // zext(vector_reduce_and(<n x i1>))
4085 Value *Arg = II->getArgOperand(i: 0);
4086 Value *Vect;
4087
4088 if (Value *NewOp =
4089 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4090 replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp);
4091 return II;
4092 }
4093
4094 if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4095 if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
4096 if (VTy->getElementType() == Builder.getInt1Ty()) {
4097 Value *Res = Builder.CreateAndReduce(Src: Vect);
4098 Res = Builder.CreateZExt(V: Res, DestTy: II->getType());
4099 return replaceInstUsesWith(I&: CI, V: Res);
4100 }
4101 }
4102 }
4103 [[fallthrough]];
4104 }
4105 case Intrinsic::vector_reduce_umin:
4106 case Intrinsic::vector_reduce_umax: {
4107 if (IID == Intrinsic::vector_reduce_umin ||
4108 IID == Intrinsic::vector_reduce_umax) {
4109 // UMin/UMax reduction over the vector with (potentially-extended)
4110 // i1 element type is actually a (potentially-extended)
4111 // logical `and`/`or` reduction over the original non-extended value:
4112 // vector_reduce_u{min,max}(?ext(<n x i1>))
4113 // -->
4114 // ?ext(vector_reduce_{and,or}(<n x i1>))
4115 Value *Arg = II->getArgOperand(i: 0);
4116 Value *Vect;
4117
4118 if (Value *NewOp =
4119 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4120 replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp);
4121 return II;
4122 }
4123
4124 if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4125 if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
4126 if (VTy->getElementType() == Builder.getInt1Ty()) {
4127 Value *Res = IID == Intrinsic::vector_reduce_umin
4128 ? Builder.CreateAndReduce(Src: Vect)
4129 : Builder.CreateOrReduce(Src: Vect);
4130 if (Arg != Vect)
4131 Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
4132 DestTy: II->getType());
4133 return replaceInstUsesWith(I&: CI, V: Res);
4134 }
4135 }
4136 }
4137 [[fallthrough]];
4138 }
4139 case Intrinsic::vector_reduce_smin:
4140 case Intrinsic::vector_reduce_smax: {
4141 if (IID == Intrinsic::vector_reduce_smin ||
4142 IID == Intrinsic::vector_reduce_smax) {
4143 // SMin/SMax reduction over the vector with (potentially-extended)
4144 // i1 element type is actually a (potentially-extended)
4145 // logical `and`/`or` reduction over the original non-extended value:
4146 // vector_reduce_s{min,max}(<n x i1>)
4147 // -->
4148 // vector_reduce_{or,and}(<n x i1>)
4149 // and
4150 // vector_reduce_s{min,max}(sext(<n x i1>))
4151 // -->
4152 // sext(vector_reduce_{or,and}(<n x i1>))
4153 // and
4154 // vector_reduce_s{min,max}(zext(<n x i1>))
4155 // -->
4156 // zext(vector_reduce_{and,or}(<n x i1>))
4157 Value *Arg = II->getArgOperand(i: 0);
4158 Value *Vect;
4159
4160 if (Value *NewOp =
4161 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4162 replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp);
4163 return II;
4164 }
4165
4166 if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
4167 if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType()))
4168 if (VTy->getElementType() == Builder.getInt1Ty()) {
4169 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4170 if (Arg != Vect)
4171 ExtOpc = cast<CastInst>(Val: Arg)->getOpcode();
4172 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4173 (ExtOpc == Instruction::CastOps::ZExt))
4174 ? Builder.CreateAndReduce(Src: Vect)
4175 : Builder.CreateOrReduce(Src: Vect);
4176 if (Arg != Vect)
4177 Res = Builder.CreateCast(Op: ExtOpc, V: Res, DestTy: II->getType());
4178 return replaceInstUsesWith(I&: CI, V: Res);
4179 }
4180 }
4181 }
4182 [[fallthrough]];
4183 }
4184 case Intrinsic::vector_reduce_fmax:
4185 case Intrinsic::vector_reduce_fmin:
4186 case Intrinsic::vector_reduce_fadd:
4187 case Intrinsic::vector_reduce_fmul: {
4188 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4189 IID != Intrinsic::vector_reduce_fmul) ||
4190 II->hasAllowReassoc();
4191 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
4192 IID == Intrinsic::vector_reduce_fmul)
4193 ? 1
4194 : 0;
4195 Value *Arg = II->getArgOperand(i: ArgIdx);
4196 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4197 replaceUse(U&: II->getOperandUse(i: ArgIdx), NewValue: NewOp);
4198 return nullptr;
4199 }
4200 break;
4201 }
4202 case Intrinsic::is_fpclass: {
4203 if (Instruction *I = foldIntrinsicIsFPClass(II&: *II))
4204 return I;
4205 break;
4206 }
4207 case Intrinsic::threadlocal_address: {
4208 Align MinAlign = getKnownAlignment(V: II->getArgOperand(i: 0), DL, CxtI: II, AC: &AC, DT: &DT);
4209 MaybeAlign Align = II->getRetAlign();
4210 if (MinAlign > Align.valueOrOne()) {
4211 II->addRetAttr(Attr: Attribute::getWithAlignment(Context&: II->getContext(), Alignment: MinAlign));
4212 return II;
4213 }
4214 break;
4215 }
4216 case Intrinsic::frexp: {
4217 Value *X;
4218 // The first result is idempotent with the added complication of the struct
4219 // return, and the second result is zero because the value is already
4220 // normalized.
4221 if (match(V: II->getArgOperand(i: 0), P: m_ExtractValue<0>(V: m_Value(V&: X)))) {
4222 if (match(V: X, P: m_Intrinsic<Intrinsic::frexp>(Op0: m_Value()))) {
4223 X = Builder.CreateInsertValue(
4224 Agg: X, Val: Constant::getNullValue(Ty: II->getType()->getStructElementType(N: 1)),
4225 Idxs: 1);
4226 return replaceInstUsesWith(I&: *II, V: X);
4227 }
4228 }
4229 break;
4230 }
4231 case Intrinsic::get_active_lane_mask: {
4232 const APInt *Op0, *Op1;
4233 if (match(V: II->getOperand(i_nocapture: 0), P: m_StrictlyPositive(V&: Op0)) &&
4234 match(V: II->getOperand(i_nocapture: 1), P: m_APInt(Res&: Op1))) {
4235 Type *OpTy = II->getOperand(i_nocapture: 0)->getType();
4236 return replaceInstUsesWith(
4237 I&: *II, V: Builder.CreateIntrinsic(
4238 RetTy: II->getType(), ID: Intrinsic::get_active_lane_mask,
4239 Args: {Constant::getNullValue(Ty: OpTy),
4240 ConstantInt::get(Ty: OpTy, V: Op1->usub_sat(RHS: *Op0))}));
4241 }
4242 break;
4243 }
4244 case Intrinsic::experimental_get_vector_length: {
4245 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4246 unsigned BitWidth =
4247 std::max(a: II->getArgOperand(i: 0)->getType()->getScalarSizeInBits(),
4248 b: II->getType()->getScalarSizeInBits());
4249 ConstantRange Cnt =
4250 computeConstantRangeIncludingKnownBits(V: II->getArgOperand(i: 0), ForSigned: false,
4251 SQ: SQ.getWithInstruction(I: II))
4252 .zextOrTrunc(BitWidth);
4253 ConstantRange MaxLanes = cast<ConstantInt>(Val: II->getArgOperand(i: 1))
4254 ->getValue()
4255 .zextOrTrunc(width: Cnt.getBitWidth());
4256 if (cast<ConstantInt>(Val: II->getArgOperand(i: 2))->isOne())
4257 MaxLanes = MaxLanes.multiply(
4258 Other: getVScaleRange(F: II->getFunction(), BitWidth: Cnt.getBitWidth()));
4259
4260 if (Cnt.icmp(Pred: CmpInst::ICMP_ULE, Other: MaxLanes))
4261 return replaceInstUsesWith(
4262 I&: *II, V: Builder.CreateZExtOrTrunc(V: II->getArgOperand(i: 0), DestTy: II->getType()));
4263 return nullptr;
4264 }
4265 default: {
4266 // Handle target specific intrinsics
4267 std::optional<Instruction *> V = targetInstCombineIntrinsic(II&: *II);
4268 if (V)
4269 return *V;
4270 break;
4271 }
4272 }
4273
4274 // Try to fold intrinsic into select/phi operands. This is legal if:
4275 // * The intrinsic is speculatable.
4276 // * The operand is one of the following:
4277 // - a phi.
4278 // - a select with a scalar condition.
4279 // - a select with a vector condition and II is not a cross lane operation.
4280 if (isSafeToSpeculativelyExecuteWithVariableReplaced(I: &CI)) {
4281 for (Value *Op : II->args()) {
4282 if (auto *Sel = dyn_cast<SelectInst>(Val: Op)) {
4283 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4284 if (IsVectorCond &&
4285 (!isNotCrossLaneOperation(I: II) || !II->getType()->isVectorTy()))
4286 continue;
4287 // Don't replace a scalar select with a more expensive vector select if
4288 // we can't simplify both arms of the select.
4289 bool SimplifyBothArms =
4290 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4291 if (Instruction *R = FoldOpIntoSelect(
4292 Op&: *II, SI: Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4293 return R;
4294 }
4295 if (auto *Phi = dyn_cast<PHINode>(Val: Op))
4296 if (Instruction *R = foldOpIntoPhi(I&: *II, PN: Phi))
4297 return R;
4298 }
4299 }
4300
4301 if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
4302 return Shuf;
4303
4304 if (Value *Reverse = foldReversedIntrinsicOperands(II))
4305 return replaceInstUsesWith(I&: *II, V: Reverse);
4306
4307 if (Value *Res = foldIdempotentBinaryIntrinsicRecurrence(IC&: *this, II))
4308 return replaceInstUsesWith(I&: *II, V: Res);
4309
4310 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4311 // context, so it is handled in visitCallBase and we should trigger it.
4312 return visitCallBase(Call&: *II);
4313}
4314
4315// Fence instruction simplification
4316Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
4317 auto *NFI = dyn_cast<FenceInst>(Val: FI.getNextNode());
4318 // This check is solely here to handle arbitrary target-dependent syncscopes.
4319 // TODO: Can remove if does not matter in practice.
4320 if (NFI && FI.isIdenticalTo(I: NFI))
4321 return eraseInstFromFunction(I&: FI);
4322
4323 // Returns true if FI1 is identical or stronger fence than FI2.
4324 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4325 auto FI1SyncScope = FI1->getSyncScopeID();
4326 // Consider same scope, where scope is global or single-thread.
4327 if (FI1SyncScope != FI2->getSyncScopeID() ||
4328 (FI1SyncScope != SyncScope::System &&
4329 FI1SyncScope != SyncScope::SingleThread))
4330 return false;
4331
4332 return isAtLeastOrStrongerThan(AO: FI1->getOrdering(), Other: FI2->getOrdering());
4333 };
4334 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4335 return eraseInstFromFunction(I&: FI);
4336
4337 if (auto *PFI = dyn_cast_or_null<FenceInst>(Val: FI.getPrevNode()))
4338 if (isIdenticalOrStrongerFence(PFI, &FI))
4339 return eraseInstFromFunction(I&: FI);
4340 return nullptr;
4341}
4342
4343// InvokeInst simplification
4344Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) {
4345 return visitCallBase(Call&: II);
4346}
4347
4348// CallBrInst simplification
4349Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
4350 return visitCallBase(Call&: CBI);
4351}
4352
4353static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) {
4354 if (!CI->hasFnAttr(Kind: "modular-format"))
4355 return nullptr;
4356
4357 SmallVector<StringRef> Args(
4358 llvm::split(Str: CI->getFnAttr(Kind: "modular-format").getValueAsString(), Separator: ','));
4359 // TODO: Make use of the first two arguments
4360 unsigned FirstArgIdx;
4361 [[maybe_unused]] bool Error;
4362 Error = Args[2].getAsInteger(Radix: 10, Result&: FirstArgIdx);
4363 assert(!Error && "invalid first arg index");
4364 --FirstArgIdx;
4365 StringRef FnName = Args[3];
4366 StringRef ImplName = Args[4];
4367 ArrayRef<StringRef> AllAspects = ArrayRef<StringRef>(Args).drop_front(N: 5);
4368
4369 if (AllAspects.empty())
4370 return nullptr;
4371
4372 SmallVector<StringRef> NeededAspects;
4373 for (StringRef Aspect : AllAspects) {
4374 if (Aspect == "float") {
4375 if (llvm::any_of(
4376 Range: llvm::make_range(x: std::next(x: CI->arg_begin(), n: FirstArgIdx),
4377 y: CI->arg_end()),
4378 P: [](Value *V) { return V->getType()->isFloatingPointTy(); }))
4379 NeededAspects.push_back(Elt: "float");
4380 } else {
4381 // Unknown aspects are always considered to be needed.
4382 NeededAspects.push_back(Elt: Aspect);
4383 }
4384 }
4385
4386 if (NeededAspects.size() == AllAspects.size())
4387 return nullptr;
4388
4389 Module *M = CI->getModule();
4390 LLVMContext &Ctx = M->getContext();
4391 Function *Callee = CI->getCalledFunction();
4392 FunctionCallee ModularFn = M->getOrInsertFunction(
4393 Name: FnName, T: Callee->getFunctionType(),
4394 AttributeList: Callee->getAttributes().removeFnAttribute(C&: Ctx, Kind: "modular-format"));
4395 CallInst *New = cast<CallInst>(Val: CI->clone());
4396 New->setCalledFunction(ModularFn);
4397 New->removeFnAttr(Kind: "modular-format");
4398 B.Insert(I: New);
4399
4400 const auto ReferenceAspect = [&](StringRef Aspect) {
4401 SmallString<20> Name = ImplName;
4402 Name += '_';
4403 Name += Aspect;
4404 Function *RelocNoneFn =
4405 Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::reloc_none);
4406 B.CreateCall(Callee: RelocNoneFn,
4407 Args: {MetadataAsValue::get(Context&: Ctx, MD: MDString::get(Context&: Ctx, Str: Name))});
4408 };
4409
4410 llvm::sort(C&: NeededAspects);
4411 for (StringRef Request : NeededAspects)
4412 ReferenceAspect(Request);
4413
4414 return New;
4415}
4416
4417Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4418 if (!CI->getCalledFunction()) return nullptr;
4419
4420 // Skip optimizing notail and musttail calls so
4421 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4422 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4423 if (CI->isMustTailCall() || CI->isNoTailCall())
4424 return nullptr;
4425
4426 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4427 replaceInstUsesWith(I&: *From, V: With);
4428 };
4429 auto InstCombineErase = [this](Instruction *I) {
4430 eraseInstFromFunction(I&: *I);
4431 };
4432 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4433 InstCombineRAUW, InstCombineErase);
4434 if (Value *With = Simplifier.optimizeCall(CI, B&: Builder)) {
4435 ++NumSimplified;
4436 return CI->use_empty() ? CI : replaceInstUsesWith(I&: *CI, V: With);
4437 }
4438 if (Value *With = optimizeModularFormat(CI, B&: Builder)) {
4439 ++NumSimplified;
4440 return CI->use_empty() ? CI : replaceInstUsesWith(I&: *CI, V: With);
4441 }
4442
4443 return nullptr;
4444}
4445
4446static IntrinsicInst *findInitTrampolineFromAlloca(Value *TrampMem) {
4447 // Strip off at most one level of pointer casts, looking for an alloca. This
4448 // is good enough in practice and simpler than handling any number of casts.
4449 Value *Underlying = TrampMem->stripPointerCasts();
4450 if (Underlying != TrampMem &&
4451 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4452 return nullptr;
4453 if (!isa<AllocaInst>(Val: Underlying))
4454 return nullptr;
4455
4456 IntrinsicInst *InitTrampoline = nullptr;
4457 for (User *U : TrampMem->users()) {
4458 IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U);
4459 if (!II)
4460 return nullptr;
4461 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4462 if (InitTrampoline)
4463 // More than one init_trampoline writes to this value. Give up.
4464 return nullptr;
4465 InitTrampoline = II;
4466 continue;
4467 }
4468 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4469 // Allow any number of calls to adjust.trampoline.
4470 continue;
4471 return nullptr;
4472 }
4473
4474 // No call to init.trampoline found.
4475 if (!InitTrampoline)
4476 return nullptr;
4477
4478 // Check that the alloca is being used in the expected way.
4479 if (InitTrampoline->getOperand(i_nocapture: 0) != TrampMem)
4480 return nullptr;
4481
4482 return InitTrampoline;
4483}
4484
4485static IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
4486 Value *TrampMem) {
4487 // Visit all the previous instructions in the basic block, and try to find a
4488 // init.trampoline which has a direct path to the adjust.trampoline.
4489 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4490 E = AdjustTramp->getParent()->begin();
4491 I != E;) {
4492 Instruction *Inst = &*--I;
4493 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val&: I))
4494 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4495 II->getOperand(i_nocapture: 0) == TrampMem)
4496 return II;
4497 if (Inst->mayWriteToMemory())
4498 return nullptr;
4499 }
4500 return nullptr;
4501}
4502
4503// Given a call to llvm.adjust.trampoline, find and return the corresponding
4504// call to llvm.init.trampoline if the call to the trampoline can be optimized
4505// to a direct call to a function. Otherwise return NULL.
4506static IntrinsicInst *findInitTrampoline(Value *Callee) {
4507 Callee = Callee->stripPointerCasts();
4508 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Val: Callee);
4509 if (!AdjustTramp ||
4510 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4511 return nullptr;
4512
4513 Value *TrampMem = AdjustTramp->getOperand(i_nocapture: 0);
4514
4515 if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem))
4516 return IT;
4517 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4518 return IT;
4519 return nullptr;
4520}
4521
4522Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4523 const Value *Callee = Call.getCalledOperand();
4524 const auto *IPC = dyn_cast<IntToPtrInst>(Val: Callee);
4525 if (!IPC || !IPC->isNoopCast(DL))
4526 return nullptr;
4527
4528 const auto *II = dyn_cast<IntrinsicInst>(Val: IPC->getOperand(i_nocapture: 0));
4529 if (!II)
4530 return nullptr;
4531
4532 Intrinsic::ID IIID = II->getIntrinsicID();
4533 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4534 return nullptr;
4535
4536 // Isolate the ptrauth bundle from the others.
4537 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4538 SmallVector<OperandBundleDef, 2> NewBundles;
4539 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4540 OperandBundleUse Bundle = Call.getOperandBundleAt(Index: BI);
4541 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4542 PtrAuthBundleOrNone = Bundle;
4543 else
4544 NewBundles.emplace_back(Args&: Bundle);
4545 }
4546
4547 if (!PtrAuthBundleOrNone)
4548 return nullptr;
4549
4550 Value *NewCallee = nullptr;
4551 switch (IIID) {
4552 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4553 // assuming the call bundle and the sign operands match.
4554 case Intrinsic::ptrauth_resign: {
4555 // Resign result key should match bundle.
4556 if (II->getOperand(i_nocapture: 3) != PtrAuthBundleOrNone->Inputs[0])
4557 return nullptr;
4558 // Resign result discriminator should match bundle.
4559 if (II->getOperand(i_nocapture: 4) != PtrAuthBundleOrNone->Inputs[1])
4560 return nullptr;
4561
4562 // Resign input (auth) key should also match: we can't change the key on
4563 // the new call we're generating, because we don't know what keys are valid.
4564 if (II->getOperand(i_nocapture: 1) != PtrAuthBundleOrNone->Inputs[0])
4565 return nullptr;
4566
4567 Value *NewBundleOps[] = {II->getOperand(i_nocapture: 1), II->getOperand(i_nocapture: 2)};
4568 NewBundles.emplace_back(Args: "ptrauth", Args&: NewBundleOps);
4569 NewCallee = II->getOperand(i_nocapture: 0);
4570 break;
4571 }
4572
4573 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4574 // assuming the call bundle and the sign operands match.
4575 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4576 case Intrinsic::ptrauth_sign: {
4577 // Sign key should match bundle.
4578 if (II->getOperand(i_nocapture: 1) != PtrAuthBundleOrNone->Inputs[0])
4579 return nullptr;
4580 // Sign discriminator should match bundle.
4581 if (II->getOperand(i_nocapture: 2) != PtrAuthBundleOrNone->Inputs[1])
4582 return nullptr;
4583 NewCallee = II->getOperand(i_nocapture: 0);
4584 break;
4585 }
4586 default:
4587 llvm_unreachable("unexpected intrinsic ID");
4588 }
4589
4590 if (!NewCallee)
4591 return nullptr;
4592
4593 NewCallee = Builder.CreateBitOrPointerCast(V: NewCallee, DestTy: Callee->getType());
4594 CallBase *NewCall = CallBase::Create(CB: &Call, Bundles: NewBundles);
4595 NewCall->setCalledOperand(NewCallee);
4596 return NewCall;
4597}
4598
4599Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4600 auto *CPA = dyn_cast<ConstantPtrAuth>(Val: Call.getCalledOperand());
4601 if (!CPA)
4602 return nullptr;
4603
4604 auto *CalleeF = dyn_cast<Function>(Val: CPA->getPointer());
4605 // If the ptrauth constant isn't based on a function pointer, bail out.
4606 if (!CalleeF)
4607 return nullptr;
4608
4609 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4610 auto PAB = Call.getOperandBundle(ID: LLVMContext::OB_ptrauth);
4611 if (!PAB)
4612 return nullptr;
4613
4614 auto *Key = cast<ConstantInt>(Val: PAB->Inputs[0]);
4615 Value *Discriminator = PAB->Inputs[1];
4616
4617 // If the bundle doesn't match, this is probably going to fail to auth.
4618 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4619 return nullptr;
4620
4621 // If the bundle matches the constant, proceed in making this a direct call.
4622 auto *NewCall = CallBase::removeOperandBundle(CB: &Call, ID: LLVMContext::OB_ptrauth);
4623 NewCall->setCalledOperand(CalleeF);
4624 return NewCall;
4625}
4626
4627bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4628 const TargetLibraryInfo *TLI) {
4629 // Note: We only handle cases which can't be driven from generic attributes
4630 // here. So, for example, nonnull and noalias (which are common properties
4631 // of some allocation functions) are expected to be handled via annotation
4632 // of the respective allocator declaration with generic attributes.
4633 bool Changed = false;
4634
4635 if (!Call.getType()->isPointerTy())
4636 return Changed;
4637
4638 std::optional<APInt> Size = getAllocSize(CB: &Call, TLI);
4639 if (Size && *Size != 0) {
4640 // TODO: We really should just emit deref_or_null here and then
4641 // let the generic inference code combine that with nonnull.
4642 if (Call.hasRetAttr(Kind: Attribute::NonNull)) {
4643 Changed = !Call.hasRetAttr(Kind: Attribute::Dereferenceable);
4644 Call.addRetAttr(Attr: Attribute::getWithDereferenceableBytes(
4645 Context&: Call.getContext(), Bytes: Size->getLimitedValue()));
4646 } else {
4647 Changed = !Call.hasRetAttr(Kind: Attribute::DereferenceableOrNull);
4648 Call.addRetAttr(Attr: Attribute::getWithDereferenceableOrNullBytes(
4649 Context&: Call.getContext(), Bytes: Size->getLimitedValue()));
4650 }
4651 }
4652
4653 // Add alignment attribute if alignment is a power of two constant.
4654 Value *Alignment = getAllocAlignment(V: &Call, TLI);
4655 if (!Alignment)
4656 return Changed;
4657
4658 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Val: Alignment);
4659 if (AlignOpC && AlignOpC->getValue().ult(RHS: llvm::Value::MaximumAlignment)) {
4660 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4661 if (llvm::isPowerOf2_64(Value: AlignmentVal)) {
4662 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4663 Align NewAlign = Align(AlignmentVal);
4664 if (NewAlign > ExistingAlign) {
4665 Call.addRetAttr(
4666 Attr: Attribute::getWithAlignment(Context&: Call.getContext(), Alignment: NewAlign));
4667 Changed = true;
4668 }
4669 }
4670 }
4671 return Changed;
4672}
4673
4674/// Improvements for call, callbr and invoke instructions.
4675Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4676 bool Changed = annotateAnyAllocSite(Call, TLI: &TLI);
4677
4678 // Mark any parameters that are known to be non-null with the nonnull
4679 // attribute. This is helpful for inlining calls to functions with null
4680 // checks on their arguments.
4681 SmallVector<unsigned, 4> ArgNos;
4682 unsigned ArgNo = 0;
4683
4684 for (Value *V : Call.args()) {
4685 if (V->getType()->isPointerTy()) {
4686 // Simplify the nonnull operand if the parameter is known to be nonnull.
4687 // Otherwise, try to infer nonnull for it.
4688 bool HasDereferenceable = Call.getParamDereferenceableBytes(i: ArgNo) > 0;
4689 if (Call.paramHasAttr(ArgNo, Kind: Attribute::NonNull) ||
4690 (HasDereferenceable &&
4691 !NullPointerIsDefined(F: Call.getFunction(),
4692 AS: V->getType()->getPointerAddressSpace()))) {
4693 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4694 replaceOperand(I&: Call, OpNum: ArgNo, V: Res);
4695 Changed = true;
4696 }
4697 } else if (isKnownNonZero(V,
4698 Q: getSimplifyQuery().getWithInstruction(I: &Call))) {
4699 ArgNos.push_back(Elt: ArgNo);
4700 }
4701 }
4702 ArgNo++;
4703 }
4704
4705 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4706
4707 if (!ArgNos.empty()) {
4708 AttributeList AS = Call.getAttributes();
4709 LLVMContext &Ctx = Call.getContext();
4710 AS = AS.addParamAttribute(C&: Ctx, ArgNos,
4711 A: Attribute::get(Context&: Ctx, Kind: Attribute::NonNull));
4712 Call.setAttributes(AS);
4713 Changed = true;
4714 }
4715
4716 // If the callee is a pointer to a function, attempt to move any casts to the
4717 // arguments of the call/callbr/invoke.
4718 Value *Callee = Call.getCalledOperand();
4719 Function *CalleeF = dyn_cast<Function>(Val: Callee);
4720 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4721 transformConstExprCastCall(Call))
4722 return nullptr;
4723
4724 if (CalleeF) {
4725 // Remove the convergent attr on calls when the callee is not convergent.
4726 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4727 !CalleeF->isIntrinsic()) {
4728 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4729 << "\n");
4730 Call.setNotConvergent();
4731 return &Call;
4732 }
4733
4734 // If the call and callee calling conventions don't match, and neither one
4735 // of the calling conventions is compatible with C calling convention
4736 // this call must be unreachable, as the call is undefined.
4737 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4738 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4739 TargetLibraryInfoImpl::isCallingConvCCompatible(CI: &Call)) &&
4740 !(Call.getCallingConv() == llvm::CallingConv::C &&
4741 TargetLibraryInfoImpl::isCallingConvCCompatible(Callee: CalleeF))) &&
4742 // Only do this for calls to a function with a body. A prototype may
4743 // not actually end up matching the implementation's calling conv for a
4744 // variety of reasons (e.g. it may be written in assembly).
4745 !CalleeF->isDeclaration()) {
4746 Instruction *OldCall = &Call;
4747 CreateNonTerminatorUnreachable(InsertAt: OldCall);
4748 // If OldCall does not return void then replaceInstUsesWith poison.
4749 // This allows ValueHandlers and custom metadata to adjust itself.
4750 if (!OldCall->getType()->isVoidTy())
4751 replaceInstUsesWith(I&: *OldCall, V: PoisonValue::get(T: OldCall->getType()));
4752 if (isa<CallInst>(Val: OldCall))
4753 return eraseInstFromFunction(I&: *OldCall);
4754
4755 // We cannot remove an invoke or a callbr, because it would change thexi
4756 // CFG, just change the callee to a null pointer.
4757 cast<CallBase>(Val: OldCall)->setCalledFunction(
4758 FTy: CalleeF->getFunctionType(),
4759 Fn: Constant::getNullValue(Ty: CalleeF->getType()));
4760 return nullptr;
4761 }
4762 }
4763
4764 // Calling a null function pointer is undefined if a null address isn't
4765 // dereferenceable.
4766 if ((isa<ConstantPointerNull>(Val: Callee) &&
4767 !NullPointerIsDefined(F: Call.getFunction())) ||
4768 isa<UndefValue>(Val: Callee)) {
4769 // If Call does not return void then replaceInstUsesWith poison.
4770 // This allows ValueHandlers and custom metadata to adjust itself.
4771 if (!Call.getType()->isVoidTy())
4772 replaceInstUsesWith(I&: Call, V: PoisonValue::get(T: Call.getType()));
4773
4774 if (Call.isTerminator()) {
4775 // Can't remove an invoke or callbr because we cannot change the CFG.
4776 return nullptr;
4777 }
4778
4779 // This instruction is not reachable, just remove it.
4780 CreateNonTerminatorUnreachable(InsertAt: &Call);
4781 return eraseInstFromFunction(I&: Call);
4782 }
4783
4784 if (IntrinsicInst *II = findInitTrampoline(Callee))
4785 return transformCallThroughTrampoline(Call, Tramp&: *II);
4786
4787 // Combine calls involving pointer authentication intrinsics.
4788 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4789 return NewCall;
4790
4791 // Combine calls to ptrauth constants.
4792 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4793 return NewCall;
4794
4795 if (isa<InlineAsm>(Val: Callee) && !Call.doesNotThrow()) {
4796 InlineAsm *IA = cast<InlineAsm>(Val: Callee);
4797 if (!IA->canThrow()) {
4798 // Normal inline asm calls cannot throw - mark them
4799 // 'nounwind'.
4800 Call.setDoesNotThrow();
4801 Changed = true;
4802 }
4803 }
4804
4805 // Try to optimize the call if possible, we require DataLayout for most of
4806 // this. None of these calls are seen as possibly dead so go ahead and
4807 // delete the instruction now.
4808 if (CallInst *CI = dyn_cast<CallInst>(Val: &Call)) {
4809 Instruction *I = tryOptimizeCall(CI);
4810 // If we changed something return the result, etc. Otherwise let
4811 // the fallthrough check.
4812 if (I) return eraseInstFromFunction(I&: *I);
4813 }
4814
4815 if (!Call.use_empty() && !Call.isMustTailCall())
4816 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4817 Type *CallTy = Call.getType();
4818 Type *RetArgTy = ReturnedArg->getType();
4819 if (RetArgTy->canLosslesslyBitCastTo(Ty: CallTy))
4820 return replaceInstUsesWith(
4821 I&: Call, V: Builder.CreateBitOrPointerCast(V: ReturnedArg, DestTy: CallTy));
4822 }
4823
4824 // Drop unnecessary callee_type metadata from calls that were converted
4825 // into direct calls.
4826 if (Call.getMetadata(KindID: LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4827 Call.setMetadata(KindID: LLVMContext::MD_callee_type, Node: nullptr);
4828 Changed = true;
4829 }
4830
4831 // Drop unnecessary kcfi operand bundles from calls that were converted
4832 // into direct calls.
4833 auto Bundle = Call.getOperandBundle(ID: LLVMContext::OB_kcfi);
4834 if (Bundle && !Call.isIndirectCall()) {
4835 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4836 if (CalleeF) {
4837 ConstantInt *FunctionType = nullptr;
4838 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
4839
4840 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4841 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
4842
4843 if (FunctionType &&
4844 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4845 dbgs() << Call.getModule()->getName()
4846 << ": warning: kcfi: " << Call.getCaller()->getName()
4847 << ": call to " << CalleeF->getName()
4848 << " using a mismatching function pointer type\n";
4849 }
4850 });
4851
4852 return CallBase::removeOperandBundle(CB: &Call, ID: LLVMContext::OB_kcfi);
4853 }
4854
4855 if (isRemovableAlloc(V: &Call, TLI: &TLI))
4856 return visitAllocSite(FI&: Call);
4857
4858 // Handle intrinsics which can be used in both call and invoke context.
4859 switch (Call.getIntrinsicID()) {
4860 case Intrinsic::experimental_gc_statepoint: {
4861 GCStatepointInst &GCSP = *cast<GCStatepointInst>(Val: &Call);
4862 SmallPtrSet<Value *, 32> LiveGcValues;
4863 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4864 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4865
4866 // Remove the relocation if unused.
4867 if (GCR.use_empty()) {
4868 eraseInstFromFunction(I&: GCR);
4869 continue;
4870 }
4871
4872 Value *DerivedPtr = GCR.getDerivedPtr();
4873 Value *BasePtr = GCR.getBasePtr();
4874
4875 // Undef is undef, even after relocation.
4876 if (isa<UndefValue>(Val: DerivedPtr) || isa<UndefValue>(Val: BasePtr)) {
4877 replaceInstUsesWith(I&: GCR, V: UndefValue::get(T: GCR.getType()));
4878 eraseInstFromFunction(I&: GCR);
4879 continue;
4880 }
4881
4882 if (auto *PT = dyn_cast<PointerType>(Val: GCR.getType())) {
4883 // The relocation of null will be null for most any collector.
4884 // TODO: provide a hook for this in GCStrategy. There might be some
4885 // weird collector this property does not hold for.
4886 if (isa<ConstantPointerNull>(Val: DerivedPtr)) {
4887 // Use null-pointer of gc_relocate's type to replace it.
4888 replaceInstUsesWith(I&: GCR, V: ConstantPointerNull::get(T: PT));
4889 eraseInstFromFunction(I&: GCR);
4890 continue;
4891 }
4892
4893 // isKnownNonNull -> nonnull attribute
4894 if (!GCR.hasRetAttr(Kind: Attribute::NonNull) &&
4895 isKnownNonZero(V: DerivedPtr,
4896 Q: getSimplifyQuery().getWithInstruction(I: &Call))) {
4897 GCR.addRetAttr(Kind: Attribute::NonNull);
4898 // We discovered new fact, re-check users.
4899 Worklist.pushUsersToWorkList(I&: GCR);
4900 }
4901 }
4902
4903 // If we have two copies of the same pointer in the statepoint argument
4904 // list, canonicalize to one. This may let us common gc.relocates.
4905 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4906 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4907 auto *OpIntTy = GCR.getOperand(i_nocapture: 2)->getType();
4908 GCR.setOperand(i_nocapture: 2, Val_nocapture: ConstantInt::get(Ty: OpIntTy, V: GCR.getBasePtrIndex()));
4909 }
4910
4911 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4912 // Canonicalize on the type from the uses to the defs
4913
4914 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4915 LiveGcValues.insert(Ptr: BasePtr);
4916 LiveGcValues.insert(Ptr: DerivedPtr);
4917 }
4918 std::optional<OperandBundleUse> Bundle =
4919 GCSP.getOperandBundle(ID: LLVMContext::OB_gc_live);
4920 unsigned NumOfGCLives = LiveGcValues.size();
4921 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
4922 break;
4923 // We can reduce the size of gc live bundle.
4924 DenseMap<Value *, unsigned> Val2Idx;
4925 std::vector<Value *> NewLiveGc;
4926 for (Value *V : Bundle->Inputs) {
4927 auto [It, Inserted] = Val2Idx.try_emplace(Key: V);
4928 if (!Inserted)
4929 continue;
4930 if (LiveGcValues.count(Ptr: V)) {
4931 It->second = NewLiveGc.size();
4932 NewLiveGc.push_back(x: V);
4933 } else
4934 It->second = NumOfGCLives;
4935 }
4936 // Update all gc.relocates
4937 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4938 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4939 Value *BasePtr = GCR.getBasePtr();
4940 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4941 "Missed live gc for base pointer");
4942 auto *OpIntTy1 = GCR.getOperand(i_nocapture: 1)->getType();
4943 GCR.setOperand(i_nocapture: 1, Val_nocapture: ConstantInt::get(Ty: OpIntTy1, V: Val2Idx[BasePtr]));
4944 Value *DerivedPtr = GCR.getDerivedPtr();
4945 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4946 "Missed live gc for derived pointer");
4947 auto *OpIntTy2 = GCR.getOperand(i_nocapture: 2)->getType();
4948 GCR.setOperand(i_nocapture: 2, Val_nocapture: ConstantInt::get(Ty: OpIntTy2, V: Val2Idx[DerivedPtr]));
4949 }
4950 // Create new statepoint instruction.
4951 OperandBundleDef NewBundle("gc-live", std::move(NewLiveGc));
4952 return CallBase::Create(CB: &Call, Bundle: NewBundle);
4953 }
4954 default: { break; }
4955 }
4956
4957 return Changed ? &Call : nullptr;
4958}
4959
4960/// If the callee is a constexpr cast of a function, attempt to move the cast to
4961/// the arguments of the call/invoke.
4962/// CallBrInst is not supported.
4963bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
4964 auto *Callee =
4965 dyn_cast<Function>(Val: Call.getCalledOperand()->stripPointerCasts());
4966 if (!Callee)
4967 return false;
4968
4969 assert(!isa<CallBrInst>(Call) &&
4970 "CallBr's don't have a single point after a def to insert at");
4971
4972 // Don't perform the transform for declarations, which may not be fully
4973 // accurate. For example, void @foo() is commonly used as a placeholder for
4974 // unknown prototypes.
4975 if (Callee->isDeclaration())
4976 return false;
4977
4978 // If this is a call to a thunk function, don't remove the cast. Thunks are
4979 // used to transparently forward all incoming parameters and outgoing return
4980 // values, so it's important to leave the cast in place.
4981 if (Callee->hasFnAttribute(Kind: "thunk"))
4982 return false;
4983
4984 // If this is a call to a naked function, the assembly might be
4985 // using an argument, or otherwise rely on the frame layout,
4986 // the function prototype will mismatch.
4987 if (Callee->hasFnAttribute(Kind: Attribute::Naked))
4988 return false;
4989
4990 // If this is a musttail call, the callee's prototype must match the caller's
4991 // prototype with the exception of pointee types. The code below doesn't
4992 // implement that, so we can't do this transform.
4993 // TODO: Do the transform if it only requires adding pointer casts.
4994 if (Call.isMustTailCall())
4995 return false;
4996
4997 Instruction *Caller = &Call;
4998 const AttributeList &CallerPAL = Call.getAttributes();
4999
5000 // Okay, this is a cast from a function to a different type. Unless doing so
5001 // would cause a type conversion of one of our arguments, change this call to
5002 // be a direct call with arguments casted to the appropriate types.
5003 FunctionType *FT = Callee->getFunctionType();
5004 Type *OldRetTy = Caller->getType();
5005 Type *NewRetTy = FT->getReturnType();
5006
5007 // Check to see if we are changing the return type...
5008 if (OldRetTy != NewRetTy) {
5009
5010 if (NewRetTy->isStructTy())
5011 return false; // TODO: Handle multiple return values.
5012
5013 if (!CastInst::isBitOrNoopPointerCastable(SrcTy: NewRetTy, DestTy: OldRetTy, DL)) {
5014 if (!Caller->use_empty())
5015 return false; // Cannot transform this return value.
5016 }
5017
5018 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
5019 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5020 if (RAttrs.overlaps(AM: AttributeFuncs::typeIncompatible(
5021 Ty: NewRetTy, AS: CallerPAL.getRetAttrs())))
5022 return false; // Attribute not compatible with transformed value.
5023 }
5024
5025 // If the callbase is an invoke instruction, and the return value is
5026 // used by a PHI node in a successor, we cannot change the return type of
5027 // the call because there is no place to put the cast instruction (without
5028 // breaking the critical edge). Bail out in this case.
5029 if (!Caller->use_empty()) {
5030 BasicBlock *PhisNotSupportedBlock = nullptr;
5031 if (auto *II = dyn_cast<InvokeInst>(Val: Caller))
5032 PhisNotSupportedBlock = II->getNormalDest();
5033 if (PhisNotSupportedBlock)
5034 for (User *U : Caller->users())
5035 if (PHINode *PN = dyn_cast<PHINode>(Val: U))
5036 if (PN->getParent() == PhisNotSupportedBlock)
5037 return false;
5038 }
5039 }
5040
5041 unsigned NumActualArgs = Call.arg_size();
5042 unsigned NumCommonArgs = std::min(a: FT->getNumParams(), b: NumActualArgs);
5043
5044 // Prevent us turning:
5045 // declare void @takes_i32_inalloca(i32* inalloca)
5046 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
5047 //
5048 // into:
5049 // call void @takes_i32_inalloca(i32* null)
5050 //
5051 // Similarly, avoid folding away bitcasts of byval calls.
5052 if (Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca) ||
5053 Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::Preallocated))
5054 return false;
5055
5056 auto AI = Call.arg_begin();
5057 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
5058 Type *ParamTy = FT->getParamType(i);
5059 Type *ActTy = (*AI)->getType();
5060
5061 if (!CastInst::isBitOrNoopPointerCastable(SrcTy: ActTy, DestTy: ParamTy, DL))
5062 return false; // Cannot transform this parameter value.
5063
5064 // Check if there are any incompatible attributes we cannot drop safely.
5065 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(ArgNo: i))
5066 .overlaps(AM: AttributeFuncs::typeIncompatible(
5067 Ty: ParamTy, AS: CallerPAL.getParamAttrs(ArgNo: i),
5068 ASK: AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5069 return false; // Attribute not compatible with transformed value.
5070
5071 if (Call.isInAllocaArgument(ArgNo: i) ||
5072 CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::Preallocated))
5073 return false; // Cannot transform to and from inalloca/preallocated.
5074
5075 if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::SwiftError))
5076 return false;
5077
5078 if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::ByVal) !=
5079 Callee->getAttributes().hasParamAttr(ArgNo: i, Kind: Attribute::ByVal))
5080 return false; // Cannot transform to or from byval.
5081 }
5082
5083 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5084 !CallerPAL.isEmpty()) {
5085 // In this case we have more arguments than the new function type, but we
5086 // won't be dropping them. Check that these extra arguments have attributes
5087 // that are compatible with being a vararg call argument.
5088 unsigned SRetIdx;
5089 if (CallerPAL.hasAttrSomewhere(Kind: Attribute::StructRet, Index: &SRetIdx) &&
5090 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5091 return false;
5092 }
5093
5094 // Okay, we decided that this is a safe thing to do: go ahead and start
5095 // inserting cast instructions as necessary.
5096 SmallVector<Value *, 8> Args;
5097 SmallVector<AttributeSet, 8> ArgAttrs;
5098 Args.reserve(N: NumActualArgs);
5099 ArgAttrs.reserve(N: NumActualArgs);
5100
5101 // Get any return attributes.
5102 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5103
5104 // If the return value is not being used, the type may not be compatible
5105 // with the existing attributes. Wipe out any problematic attributes.
5106 RAttrs.remove(
5107 AM: AttributeFuncs::typeIncompatible(Ty: NewRetTy, AS: CallerPAL.getRetAttrs()));
5108
5109 LLVMContext &Ctx = Call.getContext();
5110 AI = Call.arg_begin();
5111 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5112 Type *ParamTy = FT->getParamType(i);
5113
5114 Value *NewArg = *AI;
5115 if ((*AI)->getType() != ParamTy)
5116 NewArg = Builder.CreateBitOrPointerCast(V: *AI, DestTy: ParamTy);
5117 Args.push_back(Elt: NewArg);
5118
5119 // Add any parameter attributes except the ones incompatible with the new
5120 // type. Note that we made sure all incompatible ones are safe to drop.
5121 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5122 Ty: ParamTy, AS: CallerPAL.getParamAttrs(ArgNo: i), ASK: AttributeFuncs::ASK_SAFE_TO_DROP);
5123 ArgAttrs.push_back(
5124 Elt: CallerPAL.getParamAttrs(ArgNo: i).removeAttributes(C&: Ctx, AttrsToRemove: IncompatibleAttrs));
5125 }
5126
5127 // If the function takes more arguments than the call was taking, add them
5128 // now.
5129 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5130 Args.push_back(Elt: Constant::getNullValue(Ty: FT->getParamType(i)));
5131 ArgAttrs.push_back(Elt: AttributeSet());
5132 }
5133
5134 // If we are removing arguments to the function, emit an obnoxious warning.
5135 if (FT->getNumParams() < NumActualArgs) {
5136 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5137 if (FT->isVarArg()) {
5138 // Add all of the arguments in their promoted form to the arg list.
5139 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5140 Type *PTy = getPromotedType(Ty: (*AI)->getType());
5141 Value *NewArg = *AI;
5142 if (PTy != (*AI)->getType()) {
5143 // Must promote to pass through va_arg area!
5144 Instruction::CastOps opcode =
5145 CastInst::getCastOpcode(Val: *AI, SrcIsSigned: false, Ty: PTy, DstIsSigned: false);
5146 NewArg = Builder.CreateCast(Op: opcode, V: *AI, DestTy: PTy);
5147 }
5148 Args.push_back(Elt: NewArg);
5149
5150 // Add any parameter attributes.
5151 ArgAttrs.push_back(Elt: CallerPAL.getParamAttrs(ArgNo: i));
5152 }
5153 }
5154 }
5155
5156 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5157
5158 if (NewRetTy->isVoidTy())
5159 Caller->setName(""); // Void type should not have a name.
5160
5161 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
5162 "missing argument attributes");
5163 AttributeList NewCallerPAL = AttributeList::get(
5164 C&: Ctx, FnAttrs, RetAttrs: AttributeSet::get(C&: Ctx, B: RAttrs), ArgAttrs);
5165
5166 SmallVector<OperandBundleDef, 1> OpBundles;
5167 Call.getOperandBundlesAsDefs(Defs&: OpBundles);
5168
5169 CallBase *NewCall;
5170 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: Caller)) {
5171 NewCall = Builder.CreateInvoke(Callee, NormalDest: II->getNormalDest(),
5172 UnwindDest: II->getUnwindDest(), Args, OpBundles);
5173 } else {
5174 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5175 cast<CallInst>(Val: NewCall)->setTailCallKind(
5176 cast<CallInst>(Val: Caller)->getTailCallKind());
5177 }
5178 NewCall->takeName(V: Caller);
5179 NewCall->setCallingConv(Call.getCallingConv());
5180 NewCall->setAttributes(NewCallerPAL);
5181
5182 // Preserve prof metadata if any.
5183 NewCall->copyMetadata(SrcInst: *Caller, WL: {LLVMContext::MD_prof});
5184
5185 // Insert a cast of the return type as necessary.
5186 Instruction *NC = NewCall;
5187 Value *NV = NC;
5188 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5189 assert(!NV->getType()->isVoidTy());
5190 NV = NC = CastInst::CreateBitOrPointerCast(S: NC, Ty: OldRetTy);
5191 NC->setDebugLoc(Caller->getDebugLoc());
5192
5193 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5194 assert(OptInsertPt && "No place to insert cast");
5195 InsertNewInstBefore(New: NC, Old: *OptInsertPt);
5196 Worklist.pushUsersToWorkList(I&: *Caller);
5197 }
5198
5199 if (!Caller->use_empty())
5200 replaceInstUsesWith(I&: *Caller, V: NV);
5201 else if (Caller->hasValueHandle()) {
5202 if (OldRetTy == NV->getType())
5203 ValueHandleBase::ValueIsRAUWd(Old: Caller, New: NV);
5204 else
5205 // We cannot call ValueIsRAUWd with a different type, and the
5206 // actual tracked value will disappear.
5207 ValueHandleBase::ValueIsDeleted(V: Caller);
5208 }
5209
5210 eraseInstFromFunction(I&: *Caller);
5211 return true;
5212}
5213
5214/// Turn a call to a function created by init_trampoline / adjust_trampoline
5215/// intrinsic pair into a direct call to the underlying function.
5216Instruction *
5217InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5218 IntrinsicInst &Tramp) {
5219 FunctionType *FTy = Call.getFunctionType();
5220 AttributeList Attrs = Call.getAttributes();
5221
5222 // If the call already has the 'nest' attribute somewhere then give up -
5223 // otherwise 'nest' would occur twice after splicing in the chain.
5224 if (Attrs.hasAttrSomewhere(Kind: Attribute::Nest))
5225 return nullptr;
5226
5227 Function *NestF = cast<Function>(Val: Tramp.getArgOperand(i: 1)->stripPointerCasts());
5228 FunctionType *NestFTy = NestF->getFunctionType();
5229
5230 AttributeList NestAttrs = NestF->getAttributes();
5231 if (!NestAttrs.isEmpty()) {
5232 unsigned NestArgNo = 0;
5233 Type *NestTy = nullptr;
5234 AttributeSet NestAttr;
5235
5236 // Look for a parameter marked with the 'nest' attribute.
5237 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5238 E = NestFTy->param_end();
5239 I != E; ++NestArgNo, ++I) {
5240 AttributeSet AS = NestAttrs.getParamAttrs(ArgNo: NestArgNo);
5241 if (AS.hasAttribute(Kind: Attribute::Nest)) {
5242 // Record the parameter type and any other attributes.
5243 NestTy = *I;
5244 NestAttr = AS;
5245 break;
5246 }
5247 }
5248
5249 if (NestTy) {
5250 std::vector<Value*> NewArgs;
5251 std::vector<AttributeSet> NewArgAttrs;
5252 NewArgs.reserve(n: Call.arg_size() + 1);
5253 NewArgAttrs.reserve(n: Call.arg_size());
5254
5255 // Insert the nest argument into the call argument list, which may
5256 // mean appending it. Likewise for attributes.
5257
5258 {
5259 unsigned ArgNo = 0;
5260 auto I = Call.arg_begin(), E = Call.arg_end();
5261 do {
5262 if (ArgNo == NestArgNo) {
5263 // Add the chain argument and attributes.
5264 Value *NestVal = Tramp.getArgOperand(i: 2);
5265 if (NestVal->getType() != NestTy)
5266 NestVal = Builder.CreateBitCast(V: NestVal, DestTy: NestTy, Name: "nest");
5267 NewArgs.push_back(x: NestVal);
5268 NewArgAttrs.push_back(x: NestAttr);
5269 }
5270
5271 if (I == E)
5272 break;
5273
5274 // Add the original argument and attributes.
5275 NewArgs.push_back(x: *I);
5276 NewArgAttrs.push_back(x: Attrs.getParamAttrs(ArgNo));
5277
5278 ++ArgNo;
5279 ++I;
5280 } while (true);
5281 }
5282
5283 // The trampoline may have been bitcast to a bogus type (FTy).
5284 // Handle this by synthesizing a new function type, equal to FTy
5285 // with the chain parameter inserted.
5286
5287 std::vector<Type*> NewTypes;
5288 NewTypes.reserve(n: FTy->getNumParams()+1);
5289
5290 // Insert the chain's type into the list of parameter types, which may
5291 // mean appending it.
5292 {
5293 unsigned ArgNo = 0;
5294 FunctionType::param_iterator I = FTy->param_begin(),
5295 E = FTy->param_end();
5296
5297 do {
5298 if (ArgNo == NestArgNo)
5299 // Add the chain's type.
5300 NewTypes.push_back(x: NestTy);
5301
5302 if (I == E)
5303 break;
5304
5305 // Add the original type.
5306 NewTypes.push_back(x: *I);
5307
5308 ++ArgNo;
5309 ++I;
5310 } while (true);
5311 }
5312
5313 // Replace the trampoline call with a direct call. Let the generic
5314 // code sort out any function type mismatches.
5315 FunctionType *NewFTy =
5316 FunctionType::get(Result: FTy->getReturnType(), Params: NewTypes, isVarArg: FTy->isVarArg());
5317 AttributeList NewPAL =
5318 AttributeList::get(C&: FTy->getContext(), FnAttrs: Attrs.getFnAttrs(),
5319 RetAttrs: Attrs.getRetAttrs(), ArgAttrs: NewArgAttrs);
5320
5321 SmallVector<OperandBundleDef, 1> OpBundles;
5322 Call.getOperandBundlesAsDefs(Defs&: OpBundles);
5323
5324 Instruction *NewCaller;
5325 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &Call)) {
5326 NewCaller = InvokeInst::Create(Ty: NewFTy, Func: NestF, IfNormal: II->getNormalDest(),
5327 IfException: II->getUnwindDest(), Args: NewArgs, Bundles: OpBundles);
5328 cast<InvokeInst>(Val: NewCaller)->setCallingConv(II->getCallingConv());
5329 cast<InvokeInst>(Val: NewCaller)->setAttributes(NewPAL);
5330 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Val: &Call)) {
5331 NewCaller =
5332 CallBrInst::Create(Ty: NewFTy, Func: NestF, DefaultDest: CBI->getDefaultDest(),
5333 IndirectDests: CBI->getIndirectDests(), Args: NewArgs, Bundles: OpBundles);
5334 cast<CallBrInst>(Val: NewCaller)->setCallingConv(CBI->getCallingConv());
5335 cast<CallBrInst>(Val: NewCaller)->setAttributes(NewPAL);
5336 } else {
5337 NewCaller = CallInst::Create(Ty: NewFTy, Func: NestF, Args: NewArgs, Bundles: OpBundles);
5338 cast<CallInst>(Val: NewCaller)->setTailCallKind(
5339 cast<CallInst>(Val&: Call).getTailCallKind());
5340 cast<CallInst>(Val: NewCaller)->setCallingConv(
5341 cast<CallInst>(Val&: Call).getCallingConv());
5342 cast<CallInst>(Val: NewCaller)->setAttributes(NewPAL);
5343 }
5344 NewCaller->setDebugLoc(Call.getDebugLoc());
5345
5346 return NewCaller;
5347 }
5348 }
5349
5350 // Replace the trampoline call with a direct call. Since there is no 'nest'
5351 // parameter, there is no need to adjust the argument list. Let the generic
5352 // code sort out any function type mismatches.
5353 Call.setCalledFunction(FTy, Fn: NestF);
5354 return &Call;
5355}
5356