| 1 | //===- InstCombineCalls.cpp -----------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the visitCall, visitInvoke, and visitCallBr functions. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "InstCombineInternal.h" |
| 14 | #include "llvm/ADT/APFloat.h" |
| 15 | #include "llvm/ADT/APInt.h" |
| 16 | #include "llvm/ADT/APSInt.h" |
| 17 | #include "llvm/ADT/ArrayRef.h" |
| 18 | #include "llvm/ADT/STLFunctionalExtras.h" |
| 19 | #include "llvm/ADT/SmallBitVector.h" |
| 20 | #include "llvm/ADT/SmallVector.h" |
| 21 | #include "llvm/ADT/Statistic.h" |
| 22 | #include "llvm/ADT/StringExtras.h" |
| 23 | #include "llvm/Analysis/AliasAnalysis.h" |
| 24 | #include "llvm/Analysis/AssumeBundleQueries.h" |
| 25 | #include "llvm/Analysis/AssumptionCache.h" |
| 26 | #include "llvm/Analysis/InstructionSimplify.h" |
| 27 | #include "llvm/Analysis/Loads.h" |
| 28 | #include "llvm/Analysis/MemoryBuiltins.h" |
| 29 | #include "llvm/Analysis/ValueTracking.h" |
| 30 | #include "llvm/Analysis/VectorUtils.h" |
| 31 | #include "llvm/IR/AttributeMask.h" |
| 32 | #include "llvm/IR/Attributes.h" |
| 33 | #include "llvm/IR/BasicBlock.h" |
| 34 | #include "llvm/IR/Constant.h" |
| 35 | #include "llvm/IR/Constants.h" |
| 36 | #include "llvm/IR/DataLayout.h" |
| 37 | #include "llvm/IR/DebugInfo.h" |
| 38 | #include "llvm/IR/DerivedTypes.h" |
| 39 | #include "llvm/IR/Function.h" |
| 40 | #include "llvm/IR/GlobalVariable.h" |
| 41 | #include "llvm/IR/InlineAsm.h" |
| 42 | #include "llvm/IR/InstrTypes.h" |
| 43 | #include "llvm/IR/Instruction.h" |
| 44 | #include "llvm/IR/Instructions.h" |
| 45 | #include "llvm/IR/IntrinsicInst.h" |
| 46 | #include "llvm/IR/Intrinsics.h" |
| 47 | #include "llvm/IR/IntrinsicsAArch64.h" |
| 48 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
| 49 | #include "llvm/IR/IntrinsicsARM.h" |
| 50 | #include "llvm/IR/IntrinsicsHexagon.h" |
| 51 | #include "llvm/IR/LLVMContext.h" |
| 52 | #include "llvm/IR/Metadata.h" |
| 53 | #include "llvm/IR/PatternMatch.h" |
| 54 | #include "llvm/IR/ProfDataUtils.h" |
| 55 | #include "llvm/IR/Statepoint.h" |
| 56 | #include "llvm/IR/Type.h" |
| 57 | #include "llvm/IR/User.h" |
| 58 | #include "llvm/IR/Value.h" |
| 59 | #include "llvm/IR/ValueHandle.h" |
| 60 | #include "llvm/Support/AtomicOrdering.h" |
| 61 | #include "llvm/Support/Casting.h" |
| 62 | #include "llvm/Support/CommandLine.h" |
| 63 | #include "llvm/Support/Compiler.h" |
| 64 | #include "llvm/Support/Debug.h" |
| 65 | #include "llvm/Support/ErrorHandling.h" |
| 66 | #include "llvm/Support/KnownBits.h" |
| 67 | #include "llvm/Support/KnownFPClass.h" |
| 68 | #include "llvm/Support/MathExtras.h" |
| 69 | #include "llvm/Support/TypeSize.h" |
| 70 | #include "llvm/Support/raw_ostream.h" |
| 71 | #include "llvm/Transforms/InstCombine/InstCombiner.h" |
| 72 | #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" |
| 73 | #include "llvm/Transforms/Utils/Local.h" |
| 74 | #include "llvm/Transforms/Utils/SimplifyLibCalls.h" |
| 75 | #include <algorithm> |
| 76 | #include <cassert> |
| 77 | #include <cstdint> |
| 78 | #include <optional> |
| 79 | #include <utility> |
| 80 | #include <vector> |
| 81 | |
| 82 | #define DEBUG_TYPE "instcombine" |
| 83 | #include "llvm/Transforms/Utils/InstructionWorklist.h" |
| 84 | |
| 85 | using namespace llvm; |
| 86 | using namespace PatternMatch; |
| 87 | |
| 88 | STATISTIC(NumSimplified, "Number of library calls simplified" ); |
| 89 | |
| 90 | static cl::opt<unsigned> GuardWideningWindow( |
| 91 | "instcombine-guard-widening-window" , |
| 92 | cl::init(Val: 3), |
| 93 | cl::desc("How wide an instruction window to bypass looking for " |
| 94 | "another guard" )); |
| 95 | |
| 96 | /// Return the specified type promoted as it would be to pass though a va_arg |
| 97 | /// area. |
| 98 | static Type *getPromotedType(Type *Ty) { |
| 99 | if (IntegerType* ITy = dyn_cast<IntegerType>(Val: Ty)) { |
| 100 | if (ITy->getBitWidth() < 32) |
| 101 | return Type::getInt32Ty(C&: Ty->getContext()); |
| 102 | } |
| 103 | return Ty; |
| 104 | } |
| 105 | |
| 106 | /// Recognize a memcpy/memmove from a trivially otherwise unused alloca. |
| 107 | /// TODO: This should probably be integrated with visitAllocSites, but that |
| 108 | /// requires a deeper change to allow either unread or unwritten objects. |
| 109 | static bool hasUndefSource(AnyMemTransferInst *MI) { |
| 110 | auto *Src = MI->getRawSource(); |
| 111 | while (isa<GetElementPtrInst>(Val: Src)) { |
| 112 | if (!Src->hasOneUse()) |
| 113 | return false; |
| 114 | Src = cast<Instruction>(Val: Src)->getOperand(i: 0); |
| 115 | } |
| 116 | return isa<AllocaInst>(Val: Src) && Src->hasOneUse(); |
| 117 | } |
| 118 | |
| 119 | Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { |
| 120 | Align DstAlign = getKnownAlignment(V: MI->getRawDest(), DL, CxtI: MI, AC: &AC, DT: &DT); |
| 121 | MaybeAlign CopyDstAlign = MI->getDestAlign(); |
| 122 | if (!CopyDstAlign || *CopyDstAlign < DstAlign) { |
| 123 | MI->setDestAlignment(DstAlign); |
| 124 | return MI; |
| 125 | } |
| 126 | |
| 127 | Align SrcAlign = getKnownAlignment(V: MI->getRawSource(), DL, CxtI: MI, AC: &AC, DT: &DT); |
| 128 | MaybeAlign CopySrcAlign = MI->getSourceAlign(); |
| 129 | if (!CopySrcAlign || *CopySrcAlign < SrcAlign) { |
| 130 | MI->setSourceAlignment(SrcAlign); |
| 131 | return MI; |
| 132 | } |
| 133 | |
| 134 | // If we have a store to a location which is known constant, we can conclude |
| 135 | // that the store must be storing the constant value (else the memory |
| 136 | // wouldn't be constant), and this must be a noop. |
| 137 | if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) { |
| 138 | // Set the size of the copy to 0, it will be deleted on the next iteration. |
| 139 | MI->setLength((uint64_t)0); |
| 140 | return MI; |
| 141 | } |
| 142 | |
| 143 | // If the source is provably undef, the memcpy/memmove doesn't do anything |
| 144 | // (unless the transfer is volatile). |
| 145 | if (hasUndefSource(MI) && !MI->isVolatile()) { |
| 146 | // Set the size of the copy to 0, it will be deleted on the next iteration. |
| 147 | MI->setLength((uint64_t)0); |
| 148 | return MI; |
| 149 | } |
| 150 | |
| 151 | // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with |
| 152 | // load/store. |
| 153 | ConstantInt *MemOpLength = dyn_cast<ConstantInt>(Val: MI->getLength()); |
| 154 | if (!MemOpLength) return nullptr; |
| 155 | |
| 156 | // Source and destination pointer types are always "i8*" for intrinsic. See |
| 157 | // if the size is something we can handle with a single primitive load/store. |
| 158 | // A single load+store correctly handles overlapping memory in the memmove |
| 159 | // case. |
| 160 | uint64_t Size = MemOpLength->getLimitedValue(); |
| 161 | assert(Size && "0-sized memory transferring should be removed already." ); |
| 162 | |
| 163 | if (Size > 8 || (Size&(Size-1))) |
| 164 | return nullptr; // If not 1/2/4/8 bytes, exit. |
| 165 | |
| 166 | // If it is an atomic and alignment is less than the size then we will |
| 167 | // introduce the unaligned memory access which will be later transformed |
| 168 | // into libcall in CodeGen. This is not evident performance gain so disable |
| 169 | // it now. |
| 170 | if (MI->isAtomic()) |
| 171 | if (*CopyDstAlign < Size || *CopySrcAlign < Size) |
| 172 | return nullptr; |
| 173 | |
| 174 | // Use an integer load+store unless we can find something better. |
| 175 | IntegerType* IntType = IntegerType::get(C&: MI->getContext(), NumBits: Size<<3); |
| 176 | |
| 177 | // If the memcpy has metadata describing the members, see if we can get the |
| 178 | // TBAA, scope and noalias tags describing our copy. |
| 179 | AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(AccessSize: Size); |
| 180 | |
| 181 | Value *Src = MI->getArgOperand(i: 1); |
| 182 | Value *Dest = MI->getArgOperand(i: 0); |
| 183 | LoadInst *L = Builder.CreateLoad(Ty: IntType, Ptr: Src); |
| 184 | // Alignment from the mem intrinsic will be better, so use it. |
| 185 | L->setAlignment(*CopySrcAlign); |
| 186 | L->setAAMetadata(AACopyMD); |
| 187 | MDNode *LoopMemParallelMD = |
| 188 | MI->getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access); |
| 189 | if (LoopMemParallelMD) |
| 190 | L->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD); |
| 191 | MDNode *AccessGroupMD = MI->getMetadata(KindID: LLVMContext::MD_access_group); |
| 192 | if (AccessGroupMD) |
| 193 | L->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD); |
| 194 | |
| 195 | StoreInst *S = Builder.CreateStore(Val: L, Ptr: Dest); |
| 196 | // Alignment from the mem intrinsic will be better, so use it. |
| 197 | S->setAlignment(*CopyDstAlign); |
| 198 | S->setAAMetadata(AACopyMD); |
| 199 | if (LoopMemParallelMD) |
| 200 | S->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD); |
| 201 | if (AccessGroupMD) |
| 202 | S->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD); |
| 203 | S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID); |
| 204 | |
| 205 | if (auto *MT = dyn_cast<MemTransferInst>(Val: MI)) { |
| 206 | // non-atomics can be volatile |
| 207 | L->setVolatile(MT->isVolatile()); |
| 208 | S->setVolatile(MT->isVolatile()); |
| 209 | } |
| 210 | if (MI->isAtomic()) { |
| 211 | // atomics have to be unordered |
| 212 | L->setOrdering(AtomicOrdering::Unordered); |
| 213 | S->setOrdering(AtomicOrdering::Unordered); |
| 214 | } |
| 215 | |
| 216 | // Set the size of the copy to 0, it will be deleted on the next iteration. |
| 217 | MI->setLength((uint64_t)0); |
| 218 | return MI; |
| 219 | } |
| 220 | |
| 221 | Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) { |
| 222 | const Align KnownAlignment = |
| 223 | getKnownAlignment(V: MI->getDest(), DL, CxtI: MI, AC: &AC, DT: &DT); |
| 224 | MaybeAlign MemSetAlign = MI->getDestAlign(); |
| 225 | if (!MemSetAlign || *MemSetAlign < KnownAlignment) { |
| 226 | MI->setDestAlignment(KnownAlignment); |
| 227 | return MI; |
| 228 | } |
| 229 | |
| 230 | // If we have a store to a location which is known constant, we can conclude |
| 231 | // that the store must be storing the constant value (else the memory |
| 232 | // wouldn't be constant), and this must be a noop. |
| 233 | if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) { |
| 234 | // Set the size of the copy to 0, it will be deleted on the next iteration. |
| 235 | MI->setLength((uint64_t)0); |
| 236 | return MI; |
| 237 | } |
| 238 | |
| 239 | // Remove memset with an undef value. |
| 240 | // FIXME: This is technically incorrect because it might overwrite a poison |
| 241 | // value. Change to PoisonValue once #52930 is resolved. |
| 242 | if (isa<UndefValue>(Val: MI->getValue())) { |
| 243 | // Set the size of the copy to 0, it will be deleted on the next iteration. |
| 244 | MI->setLength((uint64_t)0); |
| 245 | return MI; |
| 246 | } |
| 247 | |
| 248 | // Extract the length and alignment and fill if they are constant. |
| 249 | ConstantInt *LenC = dyn_cast<ConstantInt>(Val: MI->getLength()); |
| 250 | ConstantInt *FillC = dyn_cast<ConstantInt>(Val: MI->getValue()); |
| 251 | if (!LenC || !FillC || !FillC->getType()->isIntegerTy(Bitwidth: 8)) |
| 252 | return nullptr; |
| 253 | const uint64_t Len = LenC->getLimitedValue(); |
| 254 | assert(Len && "0-sized memory setting should be removed already." ); |
| 255 | const Align Alignment = MI->getDestAlign().valueOrOne(); |
| 256 | |
| 257 | // If it is an atomic and alignment is less than the size then we will |
| 258 | // introduce the unaligned memory access which will be later transformed |
| 259 | // into libcall in CodeGen. This is not evident performance gain so disable |
| 260 | // it now. |
| 261 | if (MI->isAtomic() && Alignment < Len) |
| 262 | return nullptr; |
| 263 | |
| 264 | // memset(s,c,n) -> store s, c (for n=1,2,4,8) |
| 265 | if (Len <= 8 && isPowerOf2_32(Value: (uint32_t)Len)) { |
| 266 | Value *Dest = MI->getDest(); |
| 267 | |
| 268 | // Extract the fill value and store. |
| 269 | Constant *FillVal = ConstantInt::get( |
| 270 | Context&: MI->getContext(), V: APInt::getSplat(NewLen: Len * 8, V: FillC->getValue())); |
| 271 | StoreInst *S = Builder.CreateStore(Val: FillVal, Ptr: Dest, isVolatile: MI->isVolatile()); |
| 272 | S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID); |
| 273 | for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(Inst: S)) { |
| 274 | if (llvm::is_contained(Range: DbgAssign->location_ops(), Element: FillC)) |
| 275 | DbgAssign->replaceVariableLocationOp(OldValue: FillC, NewValue: FillVal); |
| 276 | } |
| 277 | |
| 278 | S->setAlignment(Alignment); |
| 279 | if (MI->isAtomic()) |
| 280 | S->setOrdering(AtomicOrdering::Unordered); |
| 281 | |
| 282 | // Set the size of the copy to 0, it will be deleted on the next iteration. |
| 283 | MI->setLength((uint64_t)0); |
| 284 | return MI; |
| 285 | } |
| 286 | |
| 287 | return nullptr; |
| 288 | } |
| 289 | |
| 290 | // TODO, Obvious Missing Transforms: |
| 291 | // * Narrow width by halfs excluding zero/undef lanes |
| 292 | Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) { |
| 293 | Value *LoadPtr = II.getArgOperand(i: 0); |
| 294 | const Align Alignment = II.getParamAlign(ArgNo: 0).valueOrOne(); |
| 295 | |
| 296 | // If the mask is all ones or undefs, this is a plain vector load of the 1st |
| 297 | // argument. |
| 298 | if (maskIsAllOneOrUndef(Mask: II.getArgOperand(i: 1))) { |
| 299 | LoadInst *L = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment, |
| 300 | Name: "unmaskedload" ); |
| 301 | L->copyMetadata(SrcInst: II); |
| 302 | return L; |
| 303 | } |
| 304 | |
| 305 | // If we can unconditionally load from this address, replace with a |
| 306 | // load/select idiom. TODO: use DT for context sensitive query |
| 307 | if (isDereferenceablePointer(V: LoadPtr, Ty: II.getType(), |
| 308 | DL: II.getDataLayout(), CtxI: &II, AC: &AC)) { |
| 309 | LoadInst *LI = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment, |
| 310 | Name: "unmaskedload" ); |
| 311 | LI->copyMetadata(SrcInst: II); |
| 312 | return Builder.CreateSelect(C: II.getArgOperand(i: 1), True: LI, False: II.getArgOperand(i: 2)); |
| 313 | } |
| 314 | |
| 315 | return nullptr; |
| 316 | } |
| 317 | |
| 318 | // TODO, Obvious Missing Transforms: |
| 319 | // * Single constant active lane -> store |
| 320 | // * Narrow width by halfs excluding zero/undef lanes |
| 321 | Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { |
| 322 | Value *StorePtr = II.getArgOperand(i: 1); |
| 323 | Align Alignment = II.getParamAlign(ArgNo: 1).valueOrOne(); |
| 324 | auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: 2)); |
| 325 | if (!ConstMask) |
| 326 | return nullptr; |
| 327 | |
| 328 | // If the mask is all zeros, this instruction does nothing. |
| 329 | if (maskIsAllZeroOrUndef(Mask: ConstMask)) |
| 330 | return eraseInstFromFunction(I&: II); |
| 331 | |
| 332 | // If the mask is all ones, this is a plain vector store of the 1st argument. |
| 333 | if (maskIsAllOneOrUndef(Mask: ConstMask)) { |
| 334 | StoreInst *S = |
| 335 | new StoreInst(II.getArgOperand(i: 0), StorePtr, false, Alignment); |
| 336 | S->copyMetadata(SrcInst: II); |
| 337 | return S; |
| 338 | } |
| 339 | |
| 340 | if (isa<ScalableVectorType>(Val: ConstMask->getType())) |
| 341 | return nullptr; |
| 342 | |
| 343 | // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts |
| 344 | APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask); |
| 345 | APInt PoisonElts(DemandedElts.getBitWidth(), 0); |
| 346 | if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: 0), DemandedElts, |
| 347 | PoisonElts)) |
| 348 | return replaceOperand(I&: II, OpNum: 0, V); |
| 349 | |
| 350 | return nullptr; |
| 351 | } |
| 352 | |
| 353 | // TODO, Obvious Missing Transforms: |
| 354 | // * Single constant active lane load -> load |
| 355 | // * Dereferenceable address & few lanes -> scalarize speculative load/selects |
| 356 | // * Adjacent vector addresses -> masked.load |
| 357 | // * Narrow width by halfs excluding zero/undef lanes |
| 358 | // * Vector incrementing address -> vector masked load |
| 359 | Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) { |
| 360 | auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: 1)); |
| 361 | if (!ConstMask) |
| 362 | return nullptr; |
| 363 | |
| 364 | // Vector splat address w/known mask -> scalar load |
| 365 | // Fold the gather to load the source vector first lane |
| 366 | // because it is reloading the same value each time |
| 367 | if (ConstMask->isAllOnesValue()) |
| 368 | if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: 0))) { |
| 369 | auto *VecTy = cast<VectorType>(Val: II.getType()); |
| 370 | const Align Alignment = II.getParamAlign(ArgNo: 0).valueOrOne(); |
| 371 | LoadInst *L = Builder.CreateAlignedLoad(Ty: VecTy->getElementType(), Ptr: SplatPtr, |
| 372 | Align: Alignment, Name: "load.scalar" ); |
| 373 | Value *Shuf = |
| 374 | Builder.CreateVectorSplat(EC: VecTy->getElementCount(), V: L, Name: "broadcast" ); |
| 375 | return replaceInstUsesWith(I&: II, V: cast<Instruction>(Val: Shuf)); |
| 376 | } |
| 377 | |
| 378 | return nullptr; |
| 379 | } |
| 380 | |
| 381 | // TODO, Obvious Missing Transforms: |
| 382 | // * Single constant active lane -> store |
| 383 | // * Adjacent vector addresses -> masked.store |
| 384 | // * Narrow store width by halfs excluding zero/undef lanes |
| 385 | // * Vector incrementing address -> vector masked store |
| 386 | Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { |
| 387 | auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: 2)); |
| 388 | if (!ConstMask) |
| 389 | return nullptr; |
| 390 | |
| 391 | // If the mask is all zeros, a scatter does nothing. |
| 392 | if (maskIsAllZeroOrUndef(Mask: ConstMask)) |
| 393 | return eraseInstFromFunction(I&: II); |
| 394 | |
| 395 | // Vector splat address -> scalar store |
| 396 | if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: 1))) { |
| 397 | // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr |
| 398 | if (auto *SplatValue = getSplatValue(V: II.getArgOperand(i: 0))) { |
| 399 | if (maskContainsAllOneOrUndef(Mask: ConstMask)) { |
| 400 | Align Alignment = II.getParamAlign(ArgNo: 1).valueOrOne(); |
| 401 | StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, |
| 402 | Alignment); |
| 403 | S->copyMetadata(SrcInst: II); |
| 404 | return S; |
| 405 | } |
| 406 | } |
| 407 | // scatter(vector, splat(ptr), splat(true)) -> store extract(vector, |
| 408 | // lastlane), ptr |
| 409 | if (ConstMask->isAllOnesValue()) { |
| 410 | Align Alignment = II.getParamAlign(ArgNo: 1).valueOrOne(); |
| 411 | VectorType *WideLoadTy = cast<VectorType>(Val: II.getArgOperand(i: 1)->getType()); |
| 412 | ElementCount VF = WideLoadTy->getElementCount(); |
| 413 | Value *RunTimeVF = Builder.CreateElementCount(Ty: Builder.getInt32Ty(), EC: VF); |
| 414 | Value *LastLane = Builder.CreateSub(LHS: RunTimeVF, RHS: Builder.getInt32(C: 1)); |
| 415 | Value * = |
| 416 | Builder.CreateExtractElement(Vec: II.getArgOperand(i: 0), Idx: LastLane); |
| 417 | StoreInst *S = |
| 418 | new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment); |
| 419 | S->copyMetadata(SrcInst: II); |
| 420 | return S; |
| 421 | } |
| 422 | } |
| 423 | if (isa<ScalableVectorType>(Val: ConstMask->getType())) |
| 424 | return nullptr; |
| 425 | |
| 426 | // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts |
| 427 | APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask); |
| 428 | APInt PoisonElts(DemandedElts.getBitWidth(), 0); |
| 429 | if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: 0), DemandedElts, |
| 430 | PoisonElts)) |
| 431 | return replaceOperand(I&: II, OpNum: 0, V); |
| 432 | if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: 1), DemandedElts, |
| 433 | PoisonElts)) |
| 434 | return replaceOperand(I&: II, OpNum: 1, V); |
| 435 | |
| 436 | return nullptr; |
| 437 | } |
| 438 | |
| 439 | /// This function transforms launder.invariant.group and strip.invariant.group |
| 440 | /// like: |
| 441 | /// launder(launder(%x)) -> launder(%x) (the result is not the argument) |
| 442 | /// launder(strip(%x)) -> launder(%x) |
| 443 | /// strip(strip(%x)) -> strip(%x) (the result is not the argument) |
| 444 | /// strip(launder(%x)) -> strip(%x) |
| 445 | /// This is legal because it preserves the most recent information about |
| 446 | /// the presence or absence of invariant.group. |
| 447 | static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II, |
| 448 | InstCombinerImpl &IC) { |
| 449 | auto *Arg = II.getArgOperand(i: 0); |
| 450 | auto *StrippedArg = Arg->stripPointerCasts(); |
| 451 | auto *StrippedInvariantGroupsArg = StrippedArg; |
| 452 | while (auto *Intr = dyn_cast<IntrinsicInst>(Val: StrippedInvariantGroupsArg)) { |
| 453 | if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group && |
| 454 | Intr->getIntrinsicID() != Intrinsic::strip_invariant_group) |
| 455 | break; |
| 456 | StrippedInvariantGroupsArg = Intr->getArgOperand(i: 0)->stripPointerCasts(); |
| 457 | } |
| 458 | if (StrippedArg == StrippedInvariantGroupsArg) |
| 459 | return nullptr; // No launders/strips to remove. |
| 460 | |
| 461 | Value *Result = nullptr; |
| 462 | |
| 463 | if (II.getIntrinsicID() == Intrinsic::launder_invariant_group) |
| 464 | Result = IC.Builder.CreateLaunderInvariantGroup(Ptr: StrippedInvariantGroupsArg); |
| 465 | else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group) |
| 466 | Result = IC.Builder.CreateStripInvariantGroup(Ptr: StrippedInvariantGroupsArg); |
| 467 | else |
| 468 | llvm_unreachable( |
| 469 | "simplifyInvariantGroupIntrinsic only handles launder and strip" ); |
| 470 | if (Result->getType()->getPointerAddressSpace() != |
| 471 | II.getType()->getPointerAddressSpace()) |
| 472 | Result = IC.Builder.CreateAddrSpaceCast(V: Result, DestTy: II.getType()); |
| 473 | |
| 474 | return cast<Instruction>(Val: Result); |
| 475 | } |
| 476 | |
| 477 | static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { |
| 478 | assert((II.getIntrinsicID() == Intrinsic::cttz || |
| 479 | II.getIntrinsicID() == Intrinsic::ctlz) && |
| 480 | "Expected cttz or ctlz intrinsic" ); |
| 481 | bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; |
| 482 | Value *Op0 = II.getArgOperand(i: 0); |
| 483 | Value *Op1 = II.getArgOperand(i: 1); |
| 484 | Value *X; |
| 485 | // ctlz(bitreverse(x)) -> cttz(x) |
| 486 | // cttz(bitreverse(x)) -> ctlz(x) |
| 487 | if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X)))) { |
| 488 | Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz; |
| 489 | Function *F = |
| 490 | Intrinsic::getOrInsertDeclaration(M: II.getModule(), id: ID, Tys: II.getType()); |
| 491 | return CallInst::Create(Func: F, Args: {X, II.getArgOperand(i: 1)}); |
| 492 | } |
| 493 | |
| 494 | if (II.getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
| 495 | // ctlz/cttz i1 Op0 --> not Op0 |
| 496 | if (match(V: Op1, P: m_Zero())) |
| 497 | return BinaryOperator::CreateNot(Op: Op0); |
| 498 | // If zero is poison, then the input can be assumed to be "true", so the |
| 499 | // instruction simplifies to "false". |
| 500 | assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1" ); |
| 501 | return IC.replaceInstUsesWith(I&: II, V: ConstantInt::getNullValue(Ty: II.getType())); |
| 502 | } |
| 503 | |
| 504 | // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true. |
| 505 | if (II.hasOneUse() && match(V: Op1, P: m_Zero()) && |
| 506 | match(V: II.user_back(), P: m_Shift(L: m_Value(), R: m_Specific(V: &II)))) { |
| 507 | II.dropUBImplyingAttrsAndMetadata(); |
| 508 | return IC.replaceOperand(I&: II, OpNum: 1, V: IC.Builder.getTrue()); |
| 509 | } |
| 510 | |
| 511 | Constant *C; |
| 512 | |
| 513 | if (IsTZ) { |
| 514 | // cttz(-x) -> cttz(x) |
| 515 | if (match(V: Op0, P: m_Neg(V: m_Value(V&: X)))) |
| 516 | return IC.replaceOperand(I&: II, OpNum: 0, V: X); |
| 517 | |
| 518 | // cttz(-x & x) -> cttz(x) |
| 519 | if (match(V: Op0, P: m_c_And(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X)))) |
| 520 | return IC.replaceOperand(I&: II, OpNum: 0, V: X); |
| 521 | |
| 522 | // cttz(sext(x)) -> cttz(zext(x)) |
| 523 | if (match(V: Op0, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) { |
| 524 | auto *Zext = IC.Builder.CreateZExt(V: X, DestTy: II.getType()); |
| 525 | auto *CttzZext = |
| 526 | IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: Zext, RHS: Op1); |
| 527 | return IC.replaceInstUsesWith(I&: II, V: CttzZext); |
| 528 | } |
| 529 | |
| 530 | // Zext doesn't change the number of trailing zeros, so narrow: |
| 531 | // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'. |
| 532 | if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X)))) && match(V: Op1, P: m_One())) { |
| 533 | auto *Cttz = IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: X, |
| 534 | RHS: IC.Builder.getTrue()); |
| 535 | auto *ZextCttz = IC.Builder.CreateZExt(V: Cttz, DestTy: II.getType()); |
| 536 | return IC.replaceInstUsesWith(I&: II, V: ZextCttz); |
| 537 | } |
| 538 | |
| 539 | // cttz(abs(x)) -> cttz(x) |
| 540 | // cttz(nabs(x)) -> cttz(x) |
| 541 | Value *Y; |
| 542 | SelectPatternFlavor SPF = matchSelectPattern(V: Op0, LHS&: X, RHS&: Y).Flavor; |
| 543 | if (SPF == SPF_ABS || SPF == SPF_NABS) |
| 544 | return IC.replaceOperand(I&: II, OpNum: 0, V: X); |
| 545 | |
| 546 | if (match(V: Op0, P: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: X)))) |
| 547 | return IC.replaceOperand(I&: II, OpNum: 0, V: X); |
| 548 | |
| 549 | // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val) |
| 550 | if (match(V: Op0, P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X))) && |
| 551 | match(V: Op1, P: m_One())) { |
| 552 | Value *ConstCttz = |
| 553 | IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1); |
| 554 | return BinaryOperator::CreateAdd(V1: ConstCttz, V2: X); |
| 555 | } |
| 556 | |
| 557 | // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val) |
| 558 | if (match(V: Op0, P: m_Exact(SubPattern: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) && |
| 559 | match(V: Op1, P: m_One())) { |
| 560 | Value *ConstCttz = |
| 561 | IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::cttz, LHS: C, RHS: Op1); |
| 562 | return BinaryOperator::CreateSub(V1: ConstCttz, V2: X); |
| 563 | } |
| 564 | |
| 565 | // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val) |
| 566 | if (match(V: Op0, P: m_Add(L: m_LShr(L: m_AllOnes(), R: m_Value(V&: X)), R: m_One()))) { |
| 567 | Value *Width = |
| 568 | ConstantInt::get(Ty: II.getType(), V: II.getType()->getScalarSizeInBits()); |
| 569 | return BinaryOperator::CreateSub(V1: Width, V2: X); |
| 570 | } |
| 571 | } else { |
| 572 | // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val) |
| 573 | if (match(V: Op0, P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X))) && |
| 574 | match(V: Op1, P: m_One())) { |
| 575 | Value *ConstCtlz = |
| 576 | IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1); |
| 577 | return BinaryOperator::CreateAdd(V1: ConstCtlz, V2: X); |
| 578 | } |
| 579 | |
| 580 | // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val) |
| 581 | if (match(V: Op0, P: m_NUWShl(L: m_ImmConstant(C), R: m_Value(V&: X))) && |
| 582 | match(V: Op1, P: m_One())) { |
| 583 | Value *ConstCtlz = |
| 584 | IC.Builder.CreateBinaryIntrinsic(ID: Intrinsic::ctlz, LHS: C, RHS: Op1); |
| 585 | return BinaryOperator::CreateSub(V1: ConstCtlz, V2: X); |
| 586 | } |
| 587 | |
| 588 | // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false) |
| 589 | if (Op0->hasOneUse() && |
| 590 | match(V: Op0, |
| 591 | P: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) { |
| 592 | Type *Ty = II.getType(); |
| 593 | unsigned BitWidth = Ty->getScalarSizeInBits(); |
| 594 | auto *Cttz = IC.Builder.CreateIntrinsic(ID: Intrinsic::cttz, Types: Ty, |
| 595 | Args: {X, IC.Builder.getFalse()}); |
| 596 | auto *Bw = ConstantInt::get(Ty, V: APInt(BitWidth, BitWidth)); |
| 597 | return IC.replaceInstUsesWith(I&: II, V: IC.Builder.CreateSub(LHS: Bw, RHS: Cttz)); |
| 598 | } |
| 599 | } |
| 600 | |
| 601 | // cttz(Pow2) -> Log2(Pow2) |
| 602 | // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2) |
| 603 | if (auto *R = IC.tryGetLog2(Op: Op0, AssumeNonZero: match(V: Op1, P: m_One()))) { |
| 604 | if (IsTZ) |
| 605 | return IC.replaceInstUsesWith(I&: II, V: R); |
| 606 | BinaryOperator *BO = BinaryOperator::CreateSub( |
| 607 | V1: ConstantInt::get(Ty: R->getType(), V: R->getType()->getScalarSizeInBits() - 1), |
| 608 | V2: R); |
| 609 | BO->setHasNoSignedWrap(); |
| 610 | BO->setHasNoUnsignedWrap(); |
| 611 | return BO; |
| 612 | } |
| 613 | |
| 614 | KnownBits Known = IC.computeKnownBits(V: Op0, CxtI: &II); |
| 615 | |
| 616 | // Create a mask for bits above (ctlz) or below (cttz) the first known one. |
| 617 | unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros() |
| 618 | : Known.countMaxLeadingZeros(); |
| 619 | unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros() |
| 620 | : Known.countMinLeadingZeros(); |
| 621 | |
| 622 | // If all bits above (ctlz) or below (cttz) the first known one are known |
| 623 | // zero, this value is constant. |
| 624 | // FIXME: This should be in InstSimplify because we're replacing an |
| 625 | // instruction with a constant. |
| 626 | if (PossibleZeros == DefiniteZeros) { |
| 627 | auto *C = ConstantInt::get(Ty: Op0->getType(), V: DefiniteZeros); |
| 628 | return IC.replaceInstUsesWith(I&: II, V: C); |
| 629 | } |
| 630 | |
| 631 | // If the input to cttz/ctlz is known to be non-zero, |
| 632 | // then change the 'ZeroIsPoison' parameter to 'true' |
| 633 | // because we know the zero behavior can't affect the result. |
| 634 | if (!Known.One.isZero() || |
| 635 | isKnownNonZero(V: Op0, Q: IC.getSimplifyQuery().getWithInstruction(I: &II))) { |
| 636 | if (!match(V: II.getArgOperand(i: 1), P: m_One())) |
| 637 | return IC.replaceOperand(I&: II, OpNum: 1, V: IC.Builder.getTrue()); |
| 638 | } |
| 639 | |
| 640 | // Add range attribute since known bits can't completely reflect what we know. |
| 641 | unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); |
| 642 | if (BitWidth != 1 && !II.hasRetAttr(Kind: Attribute::Range) && |
| 643 | !II.getMetadata(KindID: LLVMContext::MD_range)) { |
| 644 | ConstantRange Range(APInt(BitWidth, DefiniteZeros), |
| 645 | APInt(BitWidth, PossibleZeros + 1)); |
| 646 | II.addRangeRetAttr(CR: Range); |
| 647 | return &II; |
| 648 | } |
| 649 | |
| 650 | return nullptr; |
| 651 | } |
| 652 | |
| 653 | static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) { |
| 654 | assert(II.getIntrinsicID() == Intrinsic::ctpop && |
| 655 | "Expected ctpop intrinsic" ); |
| 656 | Type *Ty = II.getType(); |
| 657 | unsigned BitWidth = Ty->getScalarSizeInBits(); |
| 658 | Value *Op0 = II.getArgOperand(i: 0); |
| 659 | Value *X, *Y; |
| 660 | |
| 661 | // ctpop(bitreverse(x)) -> ctpop(x) |
| 662 | // ctpop(bswap(x)) -> ctpop(x) |
| 663 | if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X))) || match(V: Op0, P: m_BSwap(Op0: m_Value(V&: X)))) |
| 664 | return IC.replaceOperand(I&: II, OpNum: 0, V: X); |
| 665 | |
| 666 | // ctpop(rot(x)) -> ctpop(x) |
| 667 | if ((match(V: Op0, P: m_FShl(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value())) || |
| 668 | match(V: Op0, P: m_FShr(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value()))) && |
| 669 | X == Y) |
| 670 | return IC.replaceOperand(I&: II, OpNum: 0, V: X); |
| 671 | |
| 672 | // ctpop(x | -x) -> bitwidth - cttz(x, false) |
| 673 | if (Op0->hasOneUse() && |
| 674 | match(V: Op0, P: m_c_Or(L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X))))) { |
| 675 | auto *Cttz = IC.Builder.CreateIntrinsic(ID: Intrinsic::cttz, Types: Ty, |
| 676 | Args: {X, IC.Builder.getFalse()}); |
| 677 | auto *Bw = ConstantInt::get(Ty, V: APInt(BitWidth, BitWidth)); |
| 678 | return IC.replaceInstUsesWith(I&: II, V: IC.Builder.CreateSub(LHS: Bw, RHS: Cttz)); |
| 679 | } |
| 680 | |
| 681 | // ctpop(~x & (x - 1)) -> cttz(x, false) |
| 682 | if (match(V: Op0, |
| 683 | P: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) { |
| 684 | Function *F = |
| 685 | Intrinsic::getOrInsertDeclaration(M: II.getModule(), id: Intrinsic::cttz, Tys: Ty); |
| 686 | return CallInst::Create(Func: F, Args: {X, IC.Builder.getFalse()}); |
| 687 | } |
| 688 | |
| 689 | // Zext doesn't change the number of set bits, so narrow: |
| 690 | // ctpop (zext X) --> zext (ctpop X) |
| 691 | if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))))) { |
| 692 | Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V: X); |
| 693 | return CastInst::Create(Instruction::ZExt, S: NarrowPop, Ty); |
| 694 | } |
| 695 | |
| 696 | KnownBits Known(BitWidth); |
| 697 | IC.computeKnownBits(V: Op0, Known, CxtI: &II); |
| 698 | |
| 699 | // If all bits are zero except for exactly one fixed bit, then the result |
| 700 | // must be 0 or 1, and we can get that answer by shifting to LSB: |
| 701 | // ctpop (X & 32) --> (X & 32) >> 5 |
| 702 | // TODO: Investigate removing this as its likely unnecessary given the below |
| 703 | // `isKnownToBeAPowerOfTwo` check. |
| 704 | if ((~Known.Zero).isPowerOf2()) |
| 705 | return BinaryOperator::CreateLShr( |
| 706 | V1: Op0, V2: ConstantInt::get(Ty, V: (~Known.Zero).exactLogBase2())); |
| 707 | |
| 708 | // More generally we can also handle non-constant power of 2 patterns such as |
| 709 | // shl/shr(Pow2, X), (X & -X), etc... by transforming: |
| 710 | // ctpop(Pow2OrZero) --> icmp ne X, 0 |
| 711 | if (IC.isKnownToBeAPowerOfTwo(V: Op0, /* OrZero */ true)) |
| 712 | return CastInst::Create(Instruction::ZExt, |
| 713 | S: IC.Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Op0, |
| 714 | RHS: Constant::getNullValue(Ty)), |
| 715 | Ty); |
| 716 | |
| 717 | // Add range attribute since known bits can't completely reflect what we know. |
| 718 | if (BitWidth != 1) { |
| 719 | ConstantRange OldRange = |
| 720 | II.getRange().value_or(u: ConstantRange::getFull(BitWidth)); |
| 721 | |
| 722 | unsigned Lower = Known.countMinPopulation(); |
| 723 | unsigned Upper = Known.countMaxPopulation() + 1; |
| 724 | |
| 725 | if (Lower == 0 && OldRange.contains(Val: APInt::getZero(numBits: BitWidth)) && |
| 726 | isKnownNonZero(V: Op0, Q: IC.getSimplifyQuery().getWithInstruction(I: &II))) |
| 727 | Lower = 1; |
| 728 | |
| 729 | ConstantRange Range(APInt(BitWidth, Lower), APInt(BitWidth, Upper)); |
| 730 | Range = Range.intersectWith(CR: OldRange, Type: ConstantRange::Unsigned); |
| 731 | |
| 732 | if (Range != OldRange) { |
| 733 | II.addRangeRetAttr(CR: Range); |
| 734 | return &II; |
| 735 | } |
| 736 | } |
| 737 | |
| 738 | return nullptr; |
| 739 | } |
| 740 | |
| 741 | /// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and |
| 742 | /// at most two source operands are actually referenced. |
| 743 | static Instruction *simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC, |
| 744 | bool IsExtension) { |
| 745 | // Bail out if the mask is not a constant. |
| 746 | auto *C = dyn_cast<Constant>(Val: II.getArgOperand(i: II.arg_size() - 1)); |
| 747 | if (!C) |
| 748 | return nullptr; |
| 749 | |
| 750 | auto *RetTy = cast<FixedVectorType>(Val: II.getType()); |
| 751 | unsigned NumIndexes = RetTy->getNumElements(); |
| 752 | |
| 753 | // Only perform this transformation for <8 x i8> and <16 x i8> vector types. |
| 754 | if (!RetTy->getElementType()->isIntegerTy(Bitwidth: 8) || |
| 755 | (NumIndexes != 8 && NumIndexes != 16)) |
| 756 | return nullptr; |
| 757 | |
| 758 | // For tbx instructions, the first argument is the "fallback" vector, which |
| 759 | // has the same length as the mask and return type. |
| 760 | unsigned int StartIndex = (unsigned)IsExtension; |
| 761 | auto *SourceTy = |
| 762 | cast<FixedVectorType>(Val: II.getArgOperand(i: StartIndex)->getType()); |
| 763 | // Note that the element count of each source vector does *not* need to be the |
| 764 | // same as the element count of the return type and mask! All source vectors |
| 765 | // must have the same element count as each other, though. |
| 766 | unsigned NumElementsPerSource = SourceTy->getNumElements(); |
| 767 | |
| 768 | // There are no tbl/tbx intrinsics for which the destination size exceeds the |
| 769 | // source size. However, our definitions of the intrinsics, at least in |
| 770 | // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it |
| 771 | // *could* technically happen. |
| 772 | if (NumIndexes > NumElementsPerSource) |
| 773 | return nullptr; |
| 774 | |
| 775 | // The tbl/tbx intrinsics take several source operands followed by a mask |
| 776 | // operand. |
| 777 | unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension; |
| 778 | |
| 779 | // Map input operands to shuffle indices. This also helpfully deduplicates the |
| 780 | // input arguments, in case the same value is passed as an argument multiple |
| 781 | // times. |
| 782 | SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot; |
| 783 | Value *ShuffleOperands[2] = {PoisonValue::get(T: SourceTy), |
| 784 | PoisonValue::get(T: SourceTy)}; |
| 785 | |
| 786 | int Indexes[16]; |
| 787 | for (unsigned I = 0; I < NumIndexes; ++I) { |
| 788 | Constant *COp = C->getAggregateElement(Elt: I); |
| 789 | |
| 790 | if (!COp || (!isa<UndefValue>(Val: COp) && !isa<ConstantInt>(Val: COp))) |
| 791 | return nullptr; |
| 792 | |
| 793 | if (isa<UndefValue>(Val: COp)) { |
| 794 | Indexes[I] = -1; |
| 795 | continue; |
| 796 | } |
| 797 | |
| 798 | uint64_t Index = cast<ConstantInt>(Val: COp)->getZExtValue(); |
| 799 | // The index of the input argument that this index references (0 = first |
| 800 | // source argument, etc). |
| 801 | unsigned SourceOperandIndex = Index / NumElementsPerSource; |
| 802 | // The index of the element at that source operand. |
| 803 | unsigned SourceOperandElementIndex = Index % NumElementsPerSource; |
| 804 | |
| 805 | Value *SourceOperand; |
| 806 | if (SourceOperandIndex >= NumSourceOperands) { |
| 807 | // This index is out of bounds. Map it to index into either the fallback |
| 808 | // vector (tbx) or vector of zeroes (tbl). |
| 809 | SourceOperandIndex = NumSourceOperands; |
| 810 | if (IsExtension) { |
| 811 | // For out-of-bounds indices in tbx, choose the `I`th element of the |
| 812 | // fallback. |
| 813 | SourceOperand = II.getArgOperand(i: 0); |
| 814 | SourceOperandElementIndex = I; |
| 815 | } else { |
| 816 | // Otherwise, choose some element from the dummy vector of zeroes (we'll |
| 817 | // always choose the first). |
| 818 | SourceOperand = Constant::getNullValue(Ty: SourceTy); |
| 819 | SourceOperandElementIndex = 0; |
| 820 | } |
| 821 | } else { |
| 822 | SourceOperand = II.getArgOperand(i: SourceOperandIndex + StartIndex); |
| 823 | } |
| 824 | |
| 825 | // The source operand may be the fallback vector, which may not have the |
| 826 | // same number of elements as the source vector. In that case, we *could* |
| 827 | // choose to extend its length with another shufflevector, but it's simpler |
| 828 | // to just bail instead. |
| 829 | if (cast<FixedVectorType>(Val: SourceOperand->getType())->getNumElements() != |
| 830 | NumElementsPerSource) |
| 831 | return nullptr; |
| 832 | |
| 833 | // We now know the source operand referenced by this index. Make it a |
| 834 | // shufflevector operand, if it isn't already. |
| 835 | unsigned NumSlots = ValueToShuffleSlot.size(); |
| 836 | // This shuffle references more than two sources, and hence cannot be |
| 837 | // represented as a shufflevector. |
| 838 | if (NumSlots == 2 && !ValueToShuffleSlot.contains(Val: SourceOperand)) |
| 839 | return nullptr; |
| 840 | |
| 841 | auto [It, Inserted] = |
| 842 | ValueToShuffleSlot.try_emplace(Key: SourceOperand, Args&: NumSlots); |
| 843 | if (Inserted) |
| 844 | ShuffleOperands[It->getSecond()] = SourceOperand; |
| 845 | |
| 846 | unsigned RemappedIndex = |
| 847 | (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex; |
| 848 | Indexes[I] = RemappedIndex; |
| 849 | } |
| 850 | |
| 851 | Value *Shuf = IC.Builder.CreateShuffleVector( |
| 852 | V1: ShuffleOperands[0], V2: ShuffleOperands[1], Mask: ArrayRef(Indexes, NumIndexes)); |
| 853 | return IC.replaceInstUsesWith(I&: II, V: Shuf); |
| 854 | } |
| 855 | |
| 856 | // Returns true iff the 2 intrinsics have the same operands, limiting the |
| 857 | // comparison to the first NumOperands. |
| 858 | static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, |
| 859 | unsigned NumOperands) { |
| 860 | assert(I.arg_size() >= NumOperands && "Not enough operands" ); |
| 861 | assert(E.arg_size() >= NumOperands && "Not enough operands" ); |
| 862 | for (unsigned i = 0; i < NumOperands; i++) |
| 863 | if (I.getArgOperand(i) != E.getArgOperand(i)) |
| 864 | return false; |
| 865 | return true; |
| 866 | } |
| 867 | |
| 868 | // Remove trivially empty start/end intrinsic ranges, i.e. a start |
| 869 | // immediately followed by an end (ignoring debuginfo or other |
| 870 | // start/end intrinsics in between). As this handles only the most trivial |
| 871 | // cases, tracking the nesting level is not needed: |
| 872 | // |
| 873 | // call @llvm.foo.start(i1 0) |
| 874 | // call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed |
| 875 | // call @llvm.foo.end(i1 0) |
| 876 | // call @llvm.foo.end(i1 0) ; &I |
| 877 | static bool |
| 878 | removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, |
| 879 | std::function<bool(const IntrinsicInst &)> IsStart) { |
| 880 | // We start from the end intrinsic and scan backwards, so that InstCombine |
| 881 | // has already processed (and potentially removed) all the instructions |
| 882 | // before the end intrinsic. |
| 883 | BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend()); |
| 884 | for (; BI != BE; ++BI) { |
| 885 | if (auto *I = dyn_cast<IntrinsicInst>(Val: &*BI)) { |
| 886 | if (I->isDebugOrPseudoInst() || |
| 887 | I->getIntrinsicID() == EndI.getIntrinsicID()) |
| 888 | continue; |
| 889 | if (IsStart(*I)) { |
| 890 | if (haveSameOperands(I: EndI, E: *I, NumOperands: EndI.arg_size())) { |
| 891 | IC.eraseInstFromFunction(I&: *I); |
| 892 | IC.eraseInstFromFunction(I&: EndI); |
| 893 | return true; |
| 894 | } |
| 895 | // Skip start intrinsics that don't pair with this end intrinsic. |
| 896 | continue; |
| 897 | } |
| 898 | } |
| 899 | break; |
| 900 | } |
| 901 | |
| 902 | return false; |
| 903 | } |
| 904 | |
| 905 | Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) { |
| 906 | removeTriviallyEmptyRange(EndI&: I, IC&: *this, IsStart: [&I](const IntrinsicInst &II) { |
| 907 | // Bail out on the case where the source va_list of a va_copy is destroyed |
| 908 | // immediately by a follow-up va_end. |
| 909 | return II.getIntrinsicID() == Intrinsic::vastart || |
| 910 | (II.getIntrinsicID() == Intrinsic::vacopy && |
| 911 | I.getArgOperand(i: 0) != II.getArgOperand(i: 1)); |
| 912 | }); |
| 913 | return nullptr; |
| 914 | } |
| 915 | |
| 916 | static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) { |
| 917 | assert(Call.arg_size() > 1 && "Need at least 2 args to swap" ); |
| 918 | Value *Arg0 = Call.getArgOperand(i: 0), *Arg1 = Call.getArgOperand(i: 1); |
| 919 | if (isa<Constant>(Val: Arg0) && !isa<Constant>(Val: Arg1)) { |
| 920 | Call.setArgOperand(i: 0, v: Arg1); |
| 921 | Call.setArgOperand(i: 1, v: Arg0); |
| 922 | return &Call; |
| 923 | } |
| 924 | return nullptr; |
| 925 | } |
| 926 | |
| 927 | /// Creates a result tuple for an overflow intrinsic \p II with a given |
| 928 | /// \p Result and a constant \p Overflow value. |
| 929 | static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result, |
| 930 | Constant *Overflow) { |
| 931 | Constant *V[] = {PoisonValue::get(T: Result->getType()), Overflow}; |
| 932 | StructType *ST = cast<StructType>(Val: II->getType()); |
| 933 | Constant *Struct = ConstantStruct::get(T: ST, V); |
| 934 | return InsertValueInst::Create(Agg: Struct, Val: Result, Idxs: 0); |
| 935 | } |
| 936 | |
| 937 | Instruction * |
| 938 | InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) { |
| 939 | WithOverflowInst *WO = cast<WithOverflowInst>(Val: II); |
| 940 | Value *OperationResult = nullptr; |
| 941 | Constant *OverflowResult = nullptr; |
| 942 | if (OptimizeOverflowCheck(BinaryOp: WO->getBinaryOp(), IsSigned: WO->isSigned(), LHS: WO->getLHS(), |
| 943 | RHS: WO->getRHS(), CtxI&: *WO, OperationResult, OverflowResult)) |
| 944 | return createOverflowTuple(II: WO, Result: OperationResult, Overflow: OverflowResult); |
| 945 | |
| 946 | // See whether we can optimize the overflow check with assumption information. |
| 947 | for (User *U : WO->users()) { |
| 948 | if (!match(V: U, P: m_ExtractValue<1>(V: m_Value()))) |
| 949 | continue; |
| 950 | |
| 951 | for (auto &AssumeVH : AC.assumptionsFor(V: U)) { |
| 952 | if (!AssumeVH) |
| 953 | continue; |
| 954 | CallInst *I = cast<CallInst>(Val&: AssumeVH); |
| 955 | if (!match(V: I->getArgOperand(i: 0), P: m_Not(V: m_Specific(V: U)))) |
| 956 | continue; |
| 957 | if (!isValidAssumeForContext(I, CxtI: II, /*DT=*/nullptr, |
| 958 | /*AllowEphemerals=*/true)) |
| 959 | continue; |
| 960 | Value *Result = |
| 961 | Builder.CreateBinOp(Opc: WO->getBinaryOp(), LHS: WO->getLHS(), RHS: WO->getRHS()); |
| 962 | Result->takeName(V: WO); |
| 963 | if (auto *Inst = dyn_cast<Instruction>(Val: Result)) { |
| 964 | if (WO->isSigned()) |
| 965 | Inst->setHasNoSignedWrap(); |
| 966 | else |
| 967 | Inst->setHasNoUnsignedWrap(); |
| 968 | } |
| 969 | return createOverflowTuple(II: WO, Result, |
| 970 | Overflow: ConstantInt::getFalse(Ty: U->getType())); |
| 971 | } |
| 972 | } |
| 973 | |
| 974 | return nullptr; |
| 975 | } |
| 976 | |
| 977 | static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) { |
| 978 | Ty = Ty->getScalarType(); |
| 979 | return F.getDenormalMode(FPType: Ty->getFltSemantics()).Input == DenormalMode::IEEE; |
| 980 | } |
| 981 | |
| 982 | static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) { |
| 983 | Ty = Ty->getScalarType(); |
| 984 | return F.getDenormalMode(FPType: Ty->getFltSemantics()).inputsAreZero(); |
| 985 | } |
| 986 | |
| 987 | /// \returns the compare predicate type if the test performed by |
| 988 | /// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the |
| 989 | /// floating-point environment assumed for \p F for type \p Ty |
| 990 | static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, |
| 991 | const Function &F, Type *Ty) { |
| 992 | switch (static_cast<unsigned>(Mask)) { |
| 993 | case fcZero: |
| 994 | if (inputDenormalIsIEEE(F, Ty)) |
| 995 | return FCmpInst::FCMP_OEQ; |
| 996 | break; |
| 997 | case fcZero | fcSubnormal: |
| 998 | if (inputDenormalIsDAZ(F, Ty)) |
| 999 | return FCmpInst::FCMP_OEQ; |
| 1000 | break; |
| 1001 | case fcPositive | fcNegZero: |
| 1002 | if (inputDenormalIsIEEE(F, Ty)) |
| 1003 | return FCmpInst::FCMP_OGE; |
| 1004 | break; |
| 1005 | case fcPositive | fcNegZero | fcNegSubnormal: |
| 1006 | if (inputDenormalIsDAZ(F, Ty)) |
| 1007 | return FCmpInst::FCMP_OGE; |
| 1008 | break; |
| 1009 | case fcPosSubnormal | fcPosNormal | fcPosInf: |
| 1010 | if (inputDenormalIsIEEE(F, Ty)) |
| 1011 | return FCmpInst::FCMP_OGT; |
| 1012 | break; |
| 1013 | case fcNegative | fcPosZero: |
| 1014 | if (inputDenormalIsIEEE(F, Ty)) |
| 1015 | return FCmpInst::FCMP_OLE; |
| 1016 | break; |
| 1017 | case fcNegative | fcPosZero | fcPosSubnormal: |
| 1018 | if (inputDenormalIsDAZ(F, Ty)) |
| 1019 | return FCmpInst::FCMP_OLE; |
| 1020 | break; |
| 1021 | case fcNegSubnormal | fcNegNormal | fcNegInf: |
| 1022 | if (inputDenormalIsIEEE(F, Ty)) |
| 1023 | return FCmpInst::FCMP_OLT; |
| 1024 | break; |
| 1025 | case fcPosNormal | fcPosInf: |
| 1026 | if (inputDenormalIsDAZ(F, Ty)) |
| 1027 | return FCmpInst::FCMP_OGT; |
| 1028 | break; |
| 1029 | case fcNegNormal | fcNegInf: |
| 1030 | if (inputDenormalIsDAZ(F, Ty)) |
| 1031 | return FCmpInst::FCMP_OLT; |
| 1032 | break; |
| 1033 | case ~fcZero & ~fcNan: |
| 1034 | if (inputDenormalIsIEEE(F, Ty)) |
| 1035 | return FCmpInst::FCMP_ONE; |
| 1036 | break; |
| 1037 | case ~(fcZero | fcSubnormal) & ~fcNan: |
| 1038 | if (inputDenormalIsDAZ(F, Ty)) |
| 1039 | return FCmpInst::FCMP_ONE; |
| 1040 | break; |
| 1041 | default: |
| 1042 | break; |
| 1043 | } |
| 1044 | |
| 1045 | return FCmpInst::BAD_FCMP_PREDICATE; |
| 1046 | } |
| 1047 | |
| 1048 | Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) { |
| 1049 | Value *Src0 = II.getArgOperand(i: 0); |
| 1050 | Value *Src1 = II.getArgOperand(i: 1); |
| 1051 | const ConstantInt *CMask = cast<ConstantInt>(Val: Src1); |
| 1052 | FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue()); |
| 1053 | const bool IsUnordered = (Mask & fcNan) == fcNan; |
| 1054 | const bool IsOrdered = (Mask & fcNan) == fcNone; |
| 1055 | const FPClassTest OrderedMask = Mask & ~fcNan; |
| 1056 | const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan; |
| 1057 | |
| 1058 | const bool IsStrict = |
| 1059 | II.getFunction()->getAttributes().hasFnAttr(Kind: Attribute::StrictFP); |
| 1060 | |
| 1061 | Value *FNegSrc; |
| 1062 | if (match(V: Src0, P: m_FNeg(X: m_Value(V&: FNegSrc)))) { |
| 1063 | // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask) |
| 1064 | |
| 1065 | II.setArgOperand(i: 1, v: ConstantInt::get(Ty: Src1->getType(), V: fneg(Mask))); |
| 1066 | return replaceOperand(I&: II, OpNum: 0, V: FNegSrc); |
| 1067 | } |
| 1068 | |
| 1069 | Value *FAbsSrc; |
| 1070 | if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: FAbsSrc)))) { |
| 1071 | II.setArgOperand(i: 1, v: ConstantInt::get(Ty: Src1->getType(), V: inverse_fabs(Mask))); |
| 1072 | return replaceOperand(I&: II, OpNum: 0, V: FAbsSrc); |
| 1073 | } |
| 1074 | |
| 1075 | if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) && |
| 1076 | (IsOrdered || IsUnordered) && !IsStrict) { |
| 1077 | // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf |
| 1078 | // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf |
| 1079 | // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf |
| 1080 | // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf |
| 1081 | Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType()); |
| 1082 | FCmpInst::Predicate Pred = |
| 1083 | IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ; |
| 1084 | if (OrderedInvertedMask == fcInf) |
| 1085 | Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE; |
| 1086 | |
| 1087 | Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Src0); |
| 1088 | Value *CmpInf = Builder.CreateFCmp(P: Pred, LHS: Fabs, RHS: Inf); |
| 1089 | CmpInf->takeName(V: &II); |
| 1090 | return replaceInstUsesWith(I&: II, V: CmpInf); |
| 1091 | } |
| 1092 | |
| 1093 | if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) && |
| 1094 | (IsOrdered || IsUnordered) && !IsStrict) { |
| 1095 | // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf |
| 1096 | // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf |
| 1097 | // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf |
| 1098 | // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf |
| 1099 | Constant *Inf = |
| 1100 | ConstantFP::getInfinity(Ty: Src0->getType(), Negative: OrderedMask == fcNegInf); |
| 1101 | Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(LHS: Src0, RHS: Inf) |
| 1102 | : Builder.CreateFCmpOEQ(LHS: Src0, RHS: Inf); |
| 1103 | |
| 1104 | EqInf->takeName(V: &II); |
| 1105 | return replaceInstUsesWith(I&: II, V: EqInf); |
| 1106 | } |
| 1107 | |
| 1108 | if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) && |
| 1109 | (IsOrdered || IsUnordered) && !IsStrict) { |
| 1110 | // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf |
| 1111 | // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf |
| 1112 | // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf |
| 1113 | // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf |
| 1114 | Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType(), |
| 1115 | Negative: OrderedInvertedMask == fcNegInf); |
| 1116 | Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(LHS: Src0, RHS: Inf) |
| 1117 | : Builder.CreateFCmpONE(LHS: Src0, RHS: Inf); |
| 1118 | NeInf->takeName(V: &II); |
| 1119 | return replaceInstUsesWith(I&: II, V: NeInf); |
| 1120 | } |
| 1121 | |
| 1122 | if (Mask == fcNan && !IsStrict) { |
| 1123 | // Equivalent of isnan. Replace with standard fcmp if we don't care about FP |
| 1124 | // exceptions. |
| 1125 | Value *IsNan = |
| 1126 | Builder.CreateFCmpUNO(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType())); |
| 1127 | IsNan->takeName(V: &II); |
| 1128 | return replaceInstUsesWith(I&: II, V: IsNan); |
| 1129 | } |
| 1130 | |
| 1131 | if (Mask == (~fcNan & fcAllFlags) && !IsStrict) { |
| 1132 | // Equivalent of !isnan. Replace with standard fcmp. |
| 1133 | Value *FCmp = |
| 1134 | Builder.CreateFCmpORD(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType())); |
| 1135 | FCmp->takeName(V: &II); |
| 1136 | return replaceInstUsesWith(I&: II, V: FCmp); |
| 1137 | } |
| 1138 | |
| 1139 | FCmpInst::Predicate PredType = FCmpInst::BAD_FCMP_PREDICATE; |
| 1140 | |
| 1141 | // Try to replace with an fcmp with 0 |
| 1142 | // |
| 1143 | // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0 |
| 1144 | // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0 |
| 1145 | // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0 |
| 1146 | // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0 |
| 1147 | // |
| 1148 | // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0 |
| 1149 | // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0 |
| 1150 | // |
| 1151 | // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0 |
| 1152 | // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0 |
| 1153 | // |
| 1154 | if (!IsStrict && (IsOrdered || IsUnordered) && |
| 1155 | (PredType = fpclassTestIsFCmp0(Mask: OrderedMask, F: *II.getFunction(), |
| 1156 | Ty: Src0->getType())) != |
| 1157 | FCmpInst::BAD_FCMP_PREDICATE) { |
| 1158 | Constant *Zero = ConstantFP::getZero(Ty: Src0->getType()); |
| 1159 | // Equivalent of == 0. |
| 1160 | Value *FCmp = Builder.CreateFCmp( |
| 1161 | P: IsUnordered ? FCmpInst::getUnorderedPredicate(Pred: PredType) : PredType, |
| 1162 | LHS: Src0, RHS: Zero); |
| 1163 | |
| 1164 | FCmp->takeName(V: &II); |
| 1165 | return replaceInstUsesWith(I&: II, V: FCmp); |
| 1166 | } |
| 1167 | |
| 1168 | KnownFPClass Known = computeKnownFPClass(Val: Src0, Interested: Mask, CtxI: &II); |
| 1169 | |
| 1170 | // Clear test bits we know must be false from the source value. |
| 1171 | // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other |
| 1172 | // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other |
| 1173 | if ((Mask & Known.KnownFPClasses) != Mask) { |
| 1174 | II.setArgOperand( |
| 1175 | i: 1, v: ConstantInt::get(Ty: Src1->getType(), V: Mask & Known.KnownFPClasses)); |
| 1176 | return &II; |
| 1177 | } |
| 1178 | |
| 1179 | // If none of the tests which can return false are possible, fold to true. |
| 1180 | // fp_class (nnan x), ~(qnan|snan) -> true |
| 1181 | // fp_class (ninf x), ~(ninf|pinf) -> true |
| 1182 | if (Mask == Known.KnownFPClasses) |
| 1183 | return replaceInstUsesWith(I&: II, V: ConstantInt::get(Ty: II.getType(), V: true)); |
| 1184 | |
| 1185 | return nullptr; |
| 1186 | } |
| 1187 | |
| 1188 | static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) { |
| 1189 | KnownBits Known = computeKnownBits(V: Op, Q: SQ); |
| 1190 | if (Known.isNonNegative()) |
| 1191 | return false; |
| 1192 | if (Known.isNegative()) |
| 1193 | return true; |
| 1194 | |
| 1195 | Value *X, *Y; |
| 1196 | if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y)))) |
| 1197 | return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLT, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL); |
| 1198 | |
| 1199 | return std::nullopt; |
| 1200 | } |
| 1201 | |
| 1202 | static std::optional<bool> getKnownSignOrZero(Value *Op, |
| 1203 | const SimplifyQuery &SQ) { |
| 1204 | if (std::optional<bool> Sign = getKnownSign(Op, SQ)) |
| 1205 | return Sign; |
| 1206 | |
| 1207 | Value *X, *Y; |
| 1208 | if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y)))) |
| 1209 | return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLE, LHS: X, RHS: Y, ContextI: SQ.CxtI, DL: SQ.DL); |
| 1210 | |
| 1211 | return std::nullopt; |
| 1212 | } |
| 1213 | |
| 1214 | /// Return true if two values \p Op0 and \p Op1 are known to have the same sign. |
| 1215 | static bool signBitMustBeTheSame(Value *Op0, Value *Op1, |
| 1216 | const SimplifyQuery &SQ) { |
| 1217 | std::optional<bool> Known1 = getKnownSign(Op: Op1, SQ); |
| 1218 | if (!Known1) |
| 1219 | return false; |
| 1220 | std::optional<bool> Known0 = getKnownSign(Op: Op0, SQ); |
| 1221 | if (!Known0) |
| 1222 | return false; |
| 1223 | return *Known0 == *Known1; |
| 1224 | } |
| 1225 | |
| 1226 | /// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This |
| 1227 | /// can trigger other combines. |
| 1228 | static Instruction *moveAddAfterMinMax(IntrinsicInst *II, |
| 1229 | InstCombiner::BuilderTy &Builder) { |
| 1230 | Intrinsic::ID MinMaxID = II->getIntrinsicID(); |
| 1231 | assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin || |
| 1232 | MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) && |
| 1233 | "Expected a min or max intrinsic" ); |
| 1234 | |
| 1235 | // TODO: Match vectors with undef elements, but undef may not propagate. |
| 1236 | Value *Op0 = II->getArgOperand(i: 0), *Op1 = II->getArgOperand(i: 1); |
| 1237 | Value *X; |
| 1238 | const APInt *C0, *C1; |
| 1239 | if (!match(V: Op0, P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: C0)))) || |
| 1240 | !match(V: Op1, P: m_APInt(Res&: C1))) |
| 1241 | return nullptr; |
| 1242 | |
| 1243 | // Check for necessary no-wrap and overflow constraints. |
| 1244 | bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin; |
| 1245 | auto *Add = cast<BinaryOperator>(Val: Op0); |
| 1246 | if ((IsSigned && !Add->hasNoSignedWrap()) || |
| 1247 | (!IsSigned && !Add->hasNoUnsignedWrap())) |
| 1248 | return nullptr; |
| 1249 | |
| 1250 | // If the constant difference overflows, then instsimplify should reduce the |
| 1251 | // min/max to the add or C1. |
| 1252 | bool Overflow; |
| 1253 | APInt CDiff = |
| 1254 | IsSigned ? C1->ssub_ov(RHS: *C0, Overflow) : C1->usub_ov(RHS: *C0, Overflow); |
| 1255 | assert(!Overflow && "Expected simplify of min/max" ); |
| 1256 | |
| 1257 | // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0 |
| 1258 | // Note: the "mismatched" no-overflow setting does not propagate. |
| 1259 | Constant *NewMinMaxC = ConstantInt::get(Ty: II->getType(), V: CDiff); |
| 1260 | Value *NewMinMax = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: NewMinMaxC); |
| 1261 | return IsSigned ? BinaryOperator::CreateNSWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: 1)) |
| 1262 | : BinaryOperator::CreateNUWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: 1)); |
| 1263 | } |
| 1264 | /// Match a sadd_sat or ssub_sat which is using min/max to clamp the value. |
| 1265 | Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) { |
| 1266 | Type *Ty = MinMax1.getType(); |
| 1267 | |
| 1268 | // We are looking for a tree of: |
| 1269 | // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B)))) |
| 1270 | // Where the min and max could be reversed |
| 1271 | Instruction *MinMax2; |
| 1272 | BinaryOperator *AddSub; |
| 1273 | const APInt *MinValue, *MaxValue; |
| 1274 | if (match(V: &MinMax1, P: m_SMin(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MaxValue)))) { |
| 1275 | if (!match(V: MinMax2, P: m_SMax(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MinValue)))) |
| 1276 | return nullptr; |
| 1277 | } else if (match(V: &MinMax1, |
| 1278 | P: m_SMax(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MinValue)))) { |
| 1279 | if (!match(V: MinMax2, P: m_SMin(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MaxValue)))) |
| 1280 | return nullptr; |
| 1281 | } else |
| 1282 | return nullptr; |
| 1283 | |
| 1284 | // Check that the constants clamp a saturate, and that the new type would be |
| 1285 | // sensible to convert to. |
| 1286 | if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1) |
| 1287 | return nullptr; |
| 1288 | // In what bitwidth can this be treated as saturating arithmetics? |
| 1289 | unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1; |
| 1290 | // FIXME: This isn't quite right for vectors, but using the scalar type is a |
| 1291 | // good first approximation for what should be done there. |
| 1292 | if (!shouldChangeType(FromBitWidth: Ty->getScalarType()->getIntegerBitWidth(), ToBitWidth: NewBitWidth)) |
| 1293 | return nullptr; |
| 1294 | |
| 1295 | // Also make sure that the inner min/max and the add/sub have one use. |
| 1296 | if (!MinMax2->hasOneUse() || !AddSub->hasOneUse()) |
| 1297 | return nullptr; |
| 1298 | |
| 1299 | // Create the new type (which can be a vector type) |
| 1300 | Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth); |
| 1301 | |
| 1302 | Intrinsic::ID IntrinsicID; |
| 1303 | if (AddSub->getOpcode() == Instruction::Add) |
| 1304 | IntrinsicID = Intrinsic::sadd_sat; |
| 1305 | else if (AddSub->getOpcode() == Instruction::Sub) |
| 1306 | IntrinsicID = Intrinsic::ssub_sat; |
| 1307 | else |
| 1308 | return nullptr; |
| 1309 | |
| 1310 | // The two operands of the add/sub must be nsw-truncatable to the NewTy. This |
| 1311 | // is usually achieved via a sext from a smaller type. |
| 1312 | if (ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: 0), CxtI: AddSub) > NewBitWidth || |
| 1313 | ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: 1), CxtI: AddSub) > NewBitWidth) |
| 1314 | return nullptr; |
| 1315 | |
| 1316 | // Finally create and return the sat intrinsic, truncated to the new type |
| 1317 | Value *AT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: 0), DestTy: NewTy); |
| 1318 | Value *BT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: 1), DestTy: NewTy); |
| 1319 | Value *Sat = Builder.CreateIntrinsic(ID: IntrinsicID, Types: NewTy, Args: {AT, BT}); |
| 1320 | return CastInst::Create(Instruction::SExt, S: Sat, Ty); |
| 1321 | } |
| 1322 | |
| 1323 | |
| 1324 | /// If we have a clamp pattern like max (min X, 42), 41 -- where the output |
| 1325 | /// can only be one of two possible constant values -- turn that into a select |
| 1326 | /// of constants. |
| 1327 | static Instruction *foldClampRangeOfTwo(IntrinsicInst *II, |
| 1328 | InstCombiner::BuilderTy &Builder) { |
| 1329 | Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1); |
| 1330 | Value *X; |
| 1331 | const APInt *C0, *C1; |
| 1332 | if (!match(V: I1, P: m_APInt(Res&: C1)) || !I0->hasOneUse()) |
| 1333 | return nullptr; |
| 1334 | |
| 1335 | CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; |
| 1336 | switch (II->getIntrinsicID()) { |
| 1337 | case Intrinsic::smax: |
| 1338 | if (match(V: I0, P: m_SMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C0 == *C1 + 1) |
| 1339 | Pred = ICmpInst::ICMP_SGT; |
| 1340 | break; |
| 1341 | case Intrinsic::smin: |
| 1342 | if (match(V: I0, P: m_SMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C1 == *C0 + 1) |
| 1343 | Pred = ICmpInst::ICMP_SLT; |
| 1344 | break; |
| 1345 | case Intrinsic::umax: |
| 1346 | if (match(V: I0, P: m_UMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C0 == *C1 + 1) |
| 1347 | Pred = ICmpInst::ICMP_UGT; |
| 1348 | break; |
| 1349 | case Intrinsic::umin: |
| 1350 | if (match(V: I0, P: m_UMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && *C1 == *C0 + 1) |
| 1351 | Pred = ICmpInst::ICMP_ULT; |
| 1352 | break; |
| 1353 | default: |
| 1354 | llvm_unreachable("Expected min/max intrinsic" ); |
| 1355 | } |
| 1356 | if (Pred == CmpInst::BAD_ICMP_PREDICATE) |
| 1357 | return nullptr; |
| 1358 | |
| 1359 | // max (min X, 42), 41 --> X > 41 ? 42 : 41 |
| 1360 | // min (max X, 42), 43 --> X < 43 ? 42 : 43 |
| 1361 | Value *Cmp = Builder.CreateICmp(P: Pred, LHS: X, RHS: I1); |
| 1362 | return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty: II->getType(), V: *C0), S2: I1); |
| 1363 | } |
| 1364 | |
| 1365 | /// If this min/max has a constant operand and an operand that is a matching |
| 1366 | /// min/max with a constant operand, constant-fold the 2 constant operands. |
| 1367 | static Value *reassociateMinMaxWithConstants(IntrinsicInst *II, |
| 1368 | IRBuilderBase &Builder, |
| 1369 | const SimplifyQuery &SQ) { |
| 1370 | Intrinsic::ID MinMaxID = II->getIntrinsicID(); |
| 1371 | auto *LHS = dyn_cast<MinMaxIntrinsic>(Val: II->getArgOperand(i: 0)); |
| 1372 | if (!LHS) |
| 1373 | return nullptr; |
| 1374 | |
| 1375 | Constant *C0, *C1; |
| 1376 | if (!match(V: LHS->getArgOperand(i: 1), P: m_ImmConstant(C&: C0)) || |
| 1377 | !match(V: II->getArgOperand(i: 1), P: m_ImmConstant(C&: C1))) |
| 1378 | return nullptr; |
| 1379 | |
| 1380 | // max (max X, C0), C1 --> max X, (max C0, C1) |
| 1381 | // min (min X, C0), C1 --> min X, (min C0, C1) |
| 1382 | // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1) |
| 1383 | // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1) |
| 1384 | Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID(); |
| 1385 | if (InnerMinMaxID != MinMaxID && |
| 1386 | !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) || |
| 1387 | (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) && |
| 1388 | isKnownNonNegative(V: C0, SQ) && isKnownNonNegative(V: C1, SQ))) |
| 1389 | return nullptr; |
| 1390 | |
| 1391 | ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(ID: MinMaxID); |
| 1392 | Value *CondC = Builder.CreateICmp(P: Pred, LHS: C0, RHS: C1); |
| 1393 | Value *NewC = Builder.CreateSelect(C: CondC, True: C0, False: C1); |
| 1394 | return Builder.CreateIntrinsic(ID: InnerMinMaxID, Types: II->getType(), |
| 1395 | Args: {LHS->getArgOperand(i: 0), NewC}); |
| 1396 | } |
| 1397 | |
| 1398 | /// If this min/max has a matching min/max operand with a constant, try to push |
| 1399 | /// the constant operand into this instruction. This can enable more folds. |
| 1400 | static Instruction * |
| 1401 | reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, |
| 1402 | InstCombiner::BuilderTy &Builder) { |
| 1403 | // Match and capture a min/max operand candidate. |
| 1404 | Value *X, *Y; |
| 1405 | Constant *C; |
| 1406 | Instruction *Inner; |
| 1407 | if (!match(V: II, P: m_c_MaxOrMin(L: m_OneUse(SubPattern: m_CombineAnd( |
| 1408 | L: m_Instruction(I&: Inner), |
| 1409 | R: m_MaxOrMin(L: m_Value(V&: X), R: m_ImmConstant(C)))), |
| 1410 | R: m_Value(V&: Y)))) |
| 1411 | return nullptr; |
| 1412 | |
| 1413 | // The inner op must match. Check for constants to avoid infinite loops. |
| 1414 | Intrinsic::ID MinMaxID = II->getIntrinsicID(); |
| 1415 | auto *InnerMM = dyn_cast<IntrinsicInst>(Val: Inner); |
| 1416 | if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID || |
| 1417 | match(V: X, P: m_ImmConstant()) || match(V: Y, P: m_ImmConstant())) |
| 1418 | return nullptr; |
| 1419 | |
| 1420 | // max (max X, C), Y --> max (max X, Y), C |
| 1421 | Function *MinMax = Intrinsic::getOrInsertDeclaration(M: II->getModule(), |
| 1422 | id: MinMaxID, Tys: II->getType()); |
| 1423 | Value *NewInner = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: Y); |
| 1424 | NewInner->takeName(V: Inner); |
| 1425 | return CallInst::Create(Func: MinMax, Args: {NewInner, C}); |
| 1426 | } |
| 1427 | |
| 1428 | /// Reduce a sequence of min/max intrinsics with a common operand. |
| 1429 | static Instruction *factorizeMinMaxTree(IntrinsicInst *II) { |
| 1430 | // Match 3 of the same min/max ops. Example: umin(umin(), umin()). |
| 1431 | auto *LHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0)); |
| 1432 | auto *RHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 1)); |
| 1433 | Intrinsic::ID MinMaxID = II->getIntrinsicID(); |
| 1434 | if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID || |
| 1435 | RHS->getIntrinsicID() != MinMaxID || |
| 1436 | (!LHS->hasOneUse() && !RHS->hasOneUse())) |
| 1437 | return nullptr; |
| 1438 | |
| 1439 | Value *A = LHS->getArgOperand(i: 0); |
| 1440 | Value *B = LHS->getArgOperand(i: 1); |
| 1441 | Value *C = RHS->getArgOperand(i: 0); |
| 1442 | Value *D = RHS->getArgOperand(i: 1); |
| 1443 | |
| 1444 | // Look for a common operand. |
| 1445 | Value *MinMaxOp = nullptr; |
| 1446 | Value *ThirdOp = nullptr; |
| 1447 | if (LHS->hasOneUse()) { |
| 1448 | // If the LHS is only used in this chain and the RHS is used outside of it, |
| 1449 | // reuse the RHS min/max because that will eliminate the LHS. |
| 1450 | if (D == A || C == A) { |
| 1451 | // min(min(a, b), min(c, a)) --> min(min(c, a), b) |
| 1452 | // min(min(a, b), min(a, d)) --> min(min(a, d), b) |
| 1453 | MinMaxOp = RHS; |
| 1454 | ThirdOp = B; |
| 1455 | } else if (D == B || C == B) { |
| 1456 | // min(min(a, b), min(c, b)) --> min(min(c, b), a) |
| 1457 | // min(min(a, b), min(b, d)) --> min(min(b, d), a) |
| 1458 | MinMaxOp = RHS; |
| 1459 | ThirdOp = A; |
| 1460 | } |
| 1461 | } else { |
| 1462 | assert(RHS->hasOneUse() && "Expected one-use operand" ); |
| 1463 | // Reuse the LHS. This will eliminate the RHS. |
| 1464 | if (D == A || D == B) { |
| 1465 | // min(min(a, b), min(c, a)) --> min(min(a, b), c) |
| 1466 | // min(min(a, b), min(c, b)) --> min(min(a, b), c) |
| 1467 | MinMaxOp = LHS; |
| 1468 | ThirdOp = C; |
| 1469 | } else if (C == A || C == B) { |
| 1470 | // min(min(a, b), min(b, d)) --> min(min(a, b), d) |
| 1471 | // min(min(a, b), min(c, b)) --> min(min(a, b), d) |
| 1472 | MinMaxOp = LHS; |
| 1473 | ThirdOp = D; |
| 1474 | } |
| 1475 | } |
| 1476 | |
| 1477 | if (!MinMaxOp || !ThirdOp) |
| 1478 | return nullptr; |
| 1479 | |
| 1480 | Module *Mod = II->getModule(); |
| 1481 | Function *MinMax = |
| 1482 | Intrinsic::getOrInsertDeclaration(M: Mod, id: MinMaxID, Tys: II->getType()); |
| 1483 | return CallInst::Create(Func: MinMax, Args: { MinMaxOp, ThirdOp }); |
| 1484 | } |
| 1485 | |
| 1486 | /// If all arguments of the intrinsic are unary shuffles with the same mask, |
| 1487 | /// try to shuffle after the intrinsic. |
| 1488 | Instruction * |
| 1489 | InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) { |
| 1490 | if (!II->getType()->isVectorTy() || |
| 1491 | !isTriviallyVectorizable(ID: II->getIntrinsicID()) || |
| 1492 | !II->getCalledFunction()->isSpeculatable()) |
| 1493 | return nullptr; |
| 1494 | |
| 1495 | Value *X; |
| 1496 | Constant *C; |
| 1497 | ArrayRef<int> Mask; |
| 1498 | auto *NonConstArg = find_if_not(Range: II->args(), P: [&II](Use &Arg) { |
| 1499 | return isa<Constant>(Val: Arg.get()) || |
| 1500 | isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(), |
| 1501 | ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr); |
| 1502 | }); |
| 1503 | if (!NonConstArg || |
| 1504 | !match(V: NonConstArg, P: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_Mask(Mask)))) |
| 1505 | return nullptr; |
| 1506 | |
| 1507 | // At least 1 operand must be a shuffle with 1 use because we are creating 2 |
| 1508 | // instructions. |
| 1509 | if (none_of(Range: II->args(), P: match_fn(P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(), v2: m_Value()))))) |
| 1510 | return nullptr; |
| 1511 | |
| 1512 | // See if all arguments are shuffled with the same mask. |
| 1513 | SmallVector<Value *, 4> NewArgs; |
| 1514 | Type *SrcTy = X->getType(); |
| 1515 | for (Use &Arg : II->args()) { |
| 1516 | if (isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(), |
| 1517 | ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr)) |
| 1518 | NewArgs.push_back(Elt: Arg); |
| 1519 | else if (match(V: &Arg, |
| 1520 | P: m_Shuffle(v1: m_Value(V&: X), v2: m_Poison(), mask: m_SpecificMask(Mask))) && |
| 1521 | X->getType() == SrcTy) |
| 1522 | NewArgs.push_back(Elt: X); |
| 1523 | else if (match(V: &Arg, P: m_ImmConstant(C))) { |
| 1524 | // If it's a constant, try find the constant that would be shuffled to C. |
| 1525 | if (Constant *ShuffledC = |
| 1526 | unshuffleConstant(ShMask: Mask, C, NewCTy: cast<VectorType>(Val: SrcTy))) |
| 1527 | NewArgs.push_back(Elt: ShuffledC); |
| 1528 | else |
| 1529 | return nullptr; |
| 1530 | } else |
| 1531 | return nullptr; |
| 1532 | } |
| 1533 | |
| 1534 | // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M |
| 1535 | Instruction *FPI = isa<FPMathOperator>(Val: II) ? II : nullptr; |
| 1536 | // Result type might be a different vector width. |
| 1537 | // TODO: Check that the result type isn't widened? |
| 1538 | VectorType *ResTy = |
| 1539 | VectorType::get(ElementType: II->getType()->getScalarType(), Other: cast<VectorType>(Val: SrcTy)); |
| 1540 | Value *NewIntrinsic = |
| 1541 | Builder.CreateIntrinsic(RetTy: ResTy, ID: II->getIntrinsicID(), Args: NewArgs, FMFSource: FPI); |
| 1542 | return new ShuffleVectorInst(NewIntrinsic, Mask); |
| 1543 | } |
| 1544 | |
| 1545 | /// If all arguments of the intrinsic are reverses, try to pull the reverse |
| 1546 | /// after the intrinsic. |
| 1547 | Value *InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst *II) { |
| 1548 | if (!isTriviallyVectorizable(ID: II->getIntrinsicID())) |
| 1549 | return nullptr; |
| 1550 | |
| 1551 | // At least 1 operand must be a reverse with 1 use because we are creating 2 |
| 1552 | // instructions. |
| 1553 | if (none_of(Range: II->args(), P: [](Value *V) { |
| 1554 | return match(V, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value()))); |
| 1555 | })) |
| 1556 | return nullptr; |
| 1557 | |
| 1558 | Value *X; |
| 1559 | Constant *C; |
| 1560 | SmallVector<Value *> NewArgs; |
| 1561 | for (Use &Arg : II->args()) { |
| 1562 | if (isVectorIntrinsicWithScalarOpAtArg(ID: II->getIntrinsicID(), |
| 1563 | ScalarOpdIdx: Arg.getOperandNo(), TTI: nullptr)) |
| 1564 | NewArgs.push_back(Elt: Arg); |
| 1565 | else if (match(V: &Arg, P: m_VecReverse(Op0: m_Value(V&: X)))) |
| 1566 | NewArgs.push_back(Elt: X); |
| 1567 | else if (isSplatValue(V: Arg)) |
| 1568 | NewArgs.push_back(Elt: Arg); |
| 1569 | else if (match(V: &Arg, P: m_ImmConstant(C))) |
| 1570 | NewArgs.push_back(Elt: Builder.CreateVectorReverse(V: C)); |
| 1571 | else |
| 1572 | return nullptr; |
| 1573 | } |
| 1574 | |
| 1575 | // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...) |
| 1576 | Instruction *FPI = isa<FPMathOperator>(Val: II) ? II : nullptr; |
| 1577 | Instruction *NewIntrinsic = Builder.CreateIntrinsic( |
| 1578 | RetTy: II->getType(), ID: II->getIntrinsicID(), Args: NewArgs, FMFSource: FPI); |
| 1579 | return Builder.CreateVectorReverse(V: NewIntrinsic); |
| 1580 | } |
| 1581 | |
| 1582 | /// Fold the following cases and accepts bswap and bitreverse intrinsics: |
| 1583 | /// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y)) |
| 1584 | /// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse) |
| 1585 | template <Intrinsic::ID IntrID> |
| 1586 | static Instruction *foldBitOrderCrossLogicOp(Value *V, |
| 1587 | InstCombiner::BuilderTy &Builder) { |
| 1588 | static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse, |
| 1589 | "This helper only supports BSWAP and BITREVERSE intrinsics" ); |
| 1590 | |
| 1591 | Value *X, *Y; |
| 1592 | // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we |
| 1593 | // don't match ConstantExpr that aren't meaningful for this transform. |
| 1594 | if (match(V, P: m_OneUse(SubPattern: m_BitwiseLogic(L: m_Value(V&: X), R: m_Value(V&: Y)))) && |
| 1595 | isa<BinaryOperator>(Val: V)) { |
| 1596 | Value *OldReorderX, *OldReorderY; |
| 1597 | BinaryOperator::BinaryOps Op = cast<BinaryOperator>(Val: V)->getOpcode(); |
| 1598 | |
| 1599 | // If both X and Y are bswap/bitreverse, the transform reduces the number |
| 1600 | // of instructions even if there's multiuse. |
| 1601 | // If only one operand is bswap/bitreverse, we need to ensure the operand |
| 1602 | // have only one use. |
| 1603 | if (match(X, m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))) && |
| 1604 | match(Y, m_Intrinsic<IntrID>(m_Value(V&: OldReorderY)))) { |
| 1605 | return BinaryOperator::Create(Op, S1: OldReorderX, S2: OldReorderY); |
| 1606 | } |
| 1607 | |
| 1608 | if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))))) { |
| 1609 | Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: Y); |
| 1610 | return BinaryOperator::Create(Op, S1: OldReorderX, S2: NewReorder); |
| 1611 | } |
| 1612 | |
| 1613 | if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderY))))) { |
| 1614 | Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: X); |
| 1615 | return BinaryOperator::Create(Op, S1: NewReorder, S2: OldReorderY); |
| 1616 | } |
| 1617 | } |
| 1618 | return nullptr; |
| 1619 | } |
| 1620 | |
| 1621 | /// Helper to match idempotent binary intrinsics, namely, intrinsics where |
| 1622 | /// `f(f(x, y), y) == f(x, y)` holds. |
| 1623 | static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID) { |
| 1624 | switch (IID) { |
| 1625 | case Intrinsic::smax: |
| 1626 | case Intrinsic::smin: |
| 1627 | case Intrinsic::umax: |
| 1628 | case Intrinsic::umin: |
| 1629 | case Intrinsic::maximum: |
| 1630 | case Intrinsic::minimum: |
| 1631 | case Intrinsic::maximumnum: |
| 1632 | case Intrinsic::minimumnum: |
| 1633 | case Intrinsic::maxnum: |
| 1634 | case Intrinsic::minnum: |
| 1635 | return true; |
| 1636 | default: |
| 1637 | return false; |
| 1638 | } |
| 1639 | } |
| 1640 | |
| 1641 | /// Attempt to simplify value-accumulating recurrences of kind: |
| 1642 | /// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ] |
| 1643 | /// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b) |
| 1644 | /// And let the idempotent binary intrinsic be hoisted, when the operands are |
| 1645 | /// known to be loop-invariant. |
| 1646 | static Value *foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, |
| 1647 | IntrinsicInst *II) { |
| 1648 | PHINode *PN; |
| 1649 | Value *Init, *OtherOp; |
| 1650 | |
| 1651 | // A binary intrinsic recurrence with loop-invariant operands is equivalent to |
| 1652 | // `call @llvm.binary.intrinsic(Init, OtherOp)`. |
| 1653 | auto IID = II->getIntrinsicID(); |
| 1654 | if (!isIdempotentBinaryIntrinsic(IID) || |
| 1655 | !matchSimpleBinaryIntrinsicRecurrence(I: II, P&: PN, Init, OtherOp) || |
| 1656 | !IC.getDominatorTree().dominates(Def: OtherOp, User: PN)) |
| 1657 | return nullptr; |
| 1658 | |
| 1659 | auto *InvariantBinaryInst = |
| 1660 | IC.Builder.CreateBinaryIntrinsic(ID: IID, LHS: Init, RHS: OtherOp); |
| 1661 | if (isa<FPMathOperator>(Val: InvariantBinaryInst)) |
| 1662 | cast<Instruction>(Val: InvariantBinaryInst)->copyFastMathFlags(I: II); |
| 1663 | return InvariantBinaryInst; |
| 1664 | } |
| 1665 | |
| 1666 | static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) { |
| 1667 | if (!CanReorderLanes) |
| 1668 | return nullptr; |
| 1669 | |
| 1670 | Value *V; |
| 1671 | if (match(V: Arg, P: m_VecReverse(Op0: m_Value(V)))) |
| 1672 | return V; |
| 1673 | |
| 1674 | ArrayRef<int> Mask; |
| 1675 | if (!isa<FixedVectorType>(Val: Arg->getType()) || |
| 1676 | !match(V: Arg, P: m_Shuffle(v1: m_Value(V), v2: m_Undef(), mask: m_Mask(Mask))) || |
| 1677 | !cast<ShuffleVectorInst>(Val: Arg)->isSingleSource()) |
| 1678 | return nullptr; |
| 1679 | |
| 1680 | int Sz = Mask.size(); |
| 1681 | SmallBitVector UsedIndices(Sz); |
| 1682 | for (int Idx : Mask) { |
| 1683 | if (Idx == PoisonMaskElem || UsedIndices.test(Idx)) |
| 1684 | return nullptr; |
| 1685 | UsedIndices.set(Idx); |
| 1686 | } |
| 1687 | |
| 1688 | // Can remove shuffle iff just shuffled elements, no repeats, undefs, or |
| 1689 | // other changes. |
| 1690 | return UsedIndices.all() ? V : nullptr; |
| 1691 | } |
| 1692 | |
| 1693 | /// Fold an unsigned minimum of trailing or leading zero bits counts: |
| 1694 | /// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp)) |
| 1695 | /// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin |
| 1696 | /// >> ConstOp)) |
| 1697 | /// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2) |
| 1698 | /// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2) |
| 1699 | template <Intrinsic::ID IntrID> |
| 1700 | static Value * |
| 1701 | foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, |
| 1702 | const DataLayout &DL, |
| 1703 | InstCombiner::BuilderTy &Builder) { |
| 1704 | static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz, |
| 1705 | "This helper only supports cttz and ctlz intrinsics" ); |
| 1706 | |
| 1707 | Value *CtOp1, *CtOp2; |
| 1708 | Value *ZeroUndef1, *ZeroUndef2; |
| 1709 | if (!match(I0, m_OneUse( |
| 1710 | m_Intrinsic<IntrID>(m_Value(V&: CtOp1), m_Value(V&: ZeroUndef1))))) |
| 1711 | return nullptr; |
| 1712 | |
| 1713 | if (match(I1, |
| 1714 | m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: CtOp2), m_Value(V&: ZeroUndef2))))) |
| 1715 | return Builder.CreateBinaryIntrinsic( |
| 1716 | ID: IntrID, LHS: Builder.CreateOr(LHS: CtOp1, RHS: CtOp2), |
| 1717 | RHS: Builder.CreateOr(LHS: ZeroUndef1, RHS: ZeroUndef2)); |
| 1718 | |
| 1719 | unsigned BitWidth = I1->getType()->getScalarSizeInBits(); |
| 1720 | auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); }; |
| 1721 | if (!match(I1, m_CheckedInt(LessBitWidth))) |
| 1722 | // We have a constant >= BitWidth (which can be handled by CVP) |
| 1723 | // or a non-splat vector with elements < and >= BitWidth |
| 1724 | return nullptr; |
| 1725 | |
| 1726 | Type *Ty = I1->getType(); |
| 1727 | Constant *NewConst = ConstantFoldBinaryOpOperands( |
| 1728 | Opcode: IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr, |
| 1729 | LHS: IntrID == Intrinsic::cttz |
| 1730 | ? ConstantInt::get(Ty, V: 1) |
| 1731 | : ConstantInt::get(Ty, V: APInt::getSignedMinValue(numBits: BitWidth)), |
| 1732 | RHS: cast<Constant>(Val: I1), DL); |
| 1733 | return Builder.CreateBinaryIntrinsic( |
| 1734 | ID: IntrID, LHS: Builder.CreateOr(LHS: CtOp1, RHS: NewConst), |
| 1735 | RHS: ConstantInt::getTrue(Ty: ZeroUndef1->getType())); |
| 1736 | } |
| 1737 | |
| 1738 | /// Return whether "X LOp (Y ROp Z)" is always equal to |
| 1739 | /// "(X LOp Y) ROp (X LOp Z)". |
| 1740 | static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, |
| 1741 | bool HasNSW, Intrinsic::ID ROp) { |
| 1742 | switch (ROp) { |
| 1743 | case Intrinsic::umax: |
| 1744 | case Intrinsic::umin: |
| 1745 | if (HasNUW && LOp == Instruction::Add) |
| 1746 | return true; |
| 1747 | if (HasNUW && LOp == Instruction::Shl) |
| 1748 | return true; |
| 1749 | return false; |
| 1750 | case Intrinsic::smax: |
| 1751 | case Intrinsic::smin: |
| 1752 | return HasNSW && LOp == Instruction::Add; |
| 1753 | default: |
| 1754 | return false; |
| 1755 | } |
| 1756 | } |
| 1757 | |
| 1758 | // Attempts to factorise a common term |
| 1759 | // in an instruction that has the form "(A op' B) op (C op' D) |
| 1760 | // where op is an intrinsic and op' is a binop |
| 1761 | static Value * |
| 1762 | foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, |
| 1763 | InstCombiner::BuilderTy &Builder) { |
| 1764 | Value *LHS = II->getOperand(i_nocapture: 0), *RHS = II->getOperand(i_nocapture: 1); |
| 1765 | Intrinsic::ID TopLevelOpcode = II->getIntrinsicID(); |
| 1766 | |
| 1767 | OverflowingBinaryOperator *Op0 = dyn_cast<OverflowingBinaryOperator>(Val: LHS); |
| 1768 | OverflowingBinaryOperator *Op1 = dyn_cast<OverflowingBinaryOperator>(Val: RHS); |
| 1769 | |
| 1770 | if (!Op0 || !Op1) |
| 1771 | return nullptr; |
| 1772 | |
| 1773 | if (Op0->getOpcode() != Op1->getOpcode()) |
| 1774 | return nullptr; |
| 1775 | |
| 1776 | if (!Op0->hasOneUse() || !Op1->hasOneUse()) |
| 1777 | return nullptr; |
| 1778 | |
| 1779 | Instruction::BinaryOps InnerOpcode = |
| 1780 | static_cast<Instruction::BinaryOps>(Op0->getOpcode()); |
| 1781 | bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap(); |
| 1782 | bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap(); |
| 1783 | |
| 1784 | if (!leftDistributesOverRight(LOp: InnerOpcode, HasNUW, HasNSW, ROp: TopLevelOpcode)) |
| 1785 | return nullptr; |
| 1786 | |
| 1787 | Value *A = Op0->getOperand(i_nocapture: 0); |
| 1788 | Value *B = Op0->getOperand(i_nocapture: 1); |
| 1789 | Value *C = Op1->getOperand(i_nocapture: 0); |
| 1790 | Value *D = Op1->getOperand(i_nocapture: 1); |
| 1791 | |
| 1792 | // Attempts to swap variables such that A equals C or B equals D, |
| 1793 | // if the inner operation is commutative. |
| 1794 | if (Op0->isCommutative() && A != C && B != D) { |
| 1795 | if (A == D || B == C) |
| 1796 | std::swap(a&: C, b&: D); |
| 1797 | else |
| 1798 | return nullptr; |
| 1799 | } |
| 1800 | |
| 1801 | BinaryOperator *NewBinop; |
| 1802 | if (A == C) { |
| 1803 | Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(ID: TopLevelOpcode, LHS: B, RHS: D); |
| 1804 | NewBinop = |
| 1805 | cast<BinaryOperator>(Val: Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: NewIntrinsic)); |
| 1806 | } else if (B == D) { |
| 1807 | Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(ID: TopLevelOpcode, LHS: A, RHS: C); |
| 1808 | NewBinop = |
| 1809 | cast<BinaryOperator>(Val: Builder.CreateBinOp(Opc: InnerOpcode, LHS: NewIntrinsic, RHS: B)); |
| 1810 | } else { |
| 1811 | return nullptr; |
| 1812 | } |
| 1813 | |
| 1814 | NewBinop->setHasNoUnsignedWrap(HasNUW); |
| 1815 | NewBinop->setHasNoSignedWrap(HasNSW); |
| 1816 | |
| 1817 | return NewBinop; |
| 1818 | } |
| 1819 | |
| 1820 | static Instruction *foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC) { |
| 1821 | Value *Arg0 = II->getArgOperand(i: 0); |
| 1822 | auto *ShiftConst = dyn_cast<Constant>(Val: II->getArgOperand(i: 1)); |
| 1823 | if (!ShiftConst) |
| 1824 | return nullptr; |
| 1825 | |
| 1826 | int ElemBits = Arg0->getType()->getScalarSizeInBits(); |
| 1827 | bool AllPositive = true; |
| 1828 | bool AllNegative = true; |
| 1829 | |
| 1830 | auto Check = [&](Constant *C) -> bool { |
| 1831 | if (auto *CI = dyn_cast_or_null<ConstantInt>(Val: C)) { |
| 1832 | const APInt &V = CI->getValue(); |
| 1833 | if (V.isNonNegative()) { |
| 1834 | AllNegative = false; |
| 1835 | return AllPositive && V.ult(RHS: ElemBits); |
| 1836 | } |
| 1837 | AllPositive = false; |
| 1838 | return AllNegative && V.sgt(RHS: -ElemBits); |
| 1839 | } |
| 1840 | return false; |
| 1841 | }; |
| 1842 | |
| 1843 | if (auto *VTy = dyn_cast<FixedVectorType>(Val: Arg0->getType())) { |
| 1844 | for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) { |
| 1845 | if (!Check(ShiftConst->getAggregateElement(Elt: I))) |
| 1846 | return nullptr; |
| 1847 | } |
| 1848 | |
| 1849 | } else if (!Check(ShiftConst)) |
| 1850 | return nullptr; |
| 1851 | |
| 1852 | IRBuilderBase &B = IC.Builder; |
| 1853 | if (AllPositive) |
| 1854 | return IC.replaceInstUsesWith(I&: *II, V: B.CreateShl(LHS: Arg0, RHS: ShiftConst)); |
| 1855 | |
| 1856 | Value *NegAmt = B.CreateNeg(V: ShiftConst); |
| 1857 | Intrinsic::ID IID = II->getIntrinsicID(); |
| 1858 | const bool IsSigned = |
| 1859 | IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl; |
| 1860 | Value *Result = |
| 1861 | IsSigned ? B.CreateAShr(LHS: Arg0, RHS: NegAmt) : B.CreateLShr(LHS: Arg0, RHS: NegAmt); |
| 1862 | return IC.replaceInstUsesWith(I&: *II, V: Result); |
| 1863 | } |
| 1864 | |
| 1865 | /// CallInst simplification. This mostly only handles folding of intrinsic |
| 1866 | /// instructions. For normal calls, it allows visitCallBase to do the heavy |
| 1867 | /// lifting. |
| 1868 | Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { |
| 1869 | // Don't try to simplify calls without uses. It will not do anything useful, |
| 1870 | // but will result in the following folds being skipped. |
| 1871 | if (!CI.use_empty()) { |
| 1872 | SmallVector<Value *, 8> Args(CI.args()); |
| 1873 | if (Value *V = simplifyCall(Call: &CI, Callee: CI.getCalledOperand(), Args, |
| 1874 | Q: SQ.getWithInstruction(I: &CI))) |
| 1875 | return replaceInstUsesWith(I&: CI, V); |
| 1876 | } |
| 1877 | |
| 1878 | if (Value *FreedOp = getFreedOperand(CB: &CI, TLI: &TLI)) |
| 1879 | return visitFree(FI&: CI, FreedOp); |
| 1880 | |
| 1881 | // If the caller function (i.e. us, the function that contains this CallInst) |
| 1882 | // is nounwind, mark the call as nounwind, even if the callee isn't. |
| 1883 | if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) { |
| 1884 | CI.setDoesNotThrow(); |
| 1885 | return &CI; |
| 1886 | } |
| 1887 | |
| 1888 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &CI); |
| 1889 | if (!II) |
| 1890 | return visitCallBase(Call&: CI); |
| 1891 | |
| 1892 | // Intrinsics cannot occur in an invoke or a callbr, so handle them here |
| 1893 | // instead of in visitCallBase. |
| 1894 | if (auto *MI = dyn_cast<AnyMemIntrinsic>(Val: II)) { |
| 1895 | if (auto NumBytes = MI->getLengthInBytes()) { |
| 1896 | // memmove/cpy/set of zero bytes is a noop. |
| 1897 | if (NumBytes->isZero()) |
| 1898 | return eraseInstFromFunction(I&: CI); |
| 1899 | |
| 1900 | // For atomic unordered mem intrinsics if len is not a positive or |
| 1901 | // not a multiple of element size then behavior is undefined. |
| 1902 | if (MI->isAtomic() && |
| 1903 | (NumBytes->isNegative() || |
| 1904 | (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) { |
| 1905 | CreateNonTerminatorUnreachable(InsertAt: MI); |
| 1906 | assert(MI->getType()->isVoidTy() && |
| 1907 | "non void atomic unordered mem intrinsic" ); |
| 1908 | return eraseInstFromFunction(I&: *MI); |
| 1909 | } |
| 1910 | } |
| 1911 | |
| 1912 | // No other transformations apply to volatile transfers. |
| 1913 | if (MI->isVolatile()) |
| 1914 | return nullptr; |
| 1915 | |
| 1916 | if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) { |
| 1917 | // memmove(x,x,size) -> noop. |
| 1918 | if (MTI->getSource() == MTI->getDest()) |
| 1919 | return eraseInstFromFunction(I&: CI); |
| 1920 | } |
| 1921 | |
| 1922 | auto IsPointerUndefined = [MI](Value *Ptr) { |
| 1923 | return isa<ConstantPointerNull>(Val: Ptr) && |
| 1924 | !NullPointerIsDefined( |
| 1925 | F: MI->getFunction(), |
| 1926 | AS: cast<PointerType>(Val: Ptr->getType())->getAddressSpace()); |
| 1927 | }; |
| 1928 | bool SrcIsUndefined = false; |
| 1929 | // If we can determine a pointer alignment that is bigger than currently |
| 1930 | // set, update the alignment. |
| 1931 | if (auto *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) { |
| 1932 | if (Instruction *I = SimplifyAnyMemTransfer(MI: MTI)) |
| 1933 | return I; |
| 1934 | SrcIsUndefined = IsPointerUndefined(MTI->getRawSource()); |
| 1935 | } else if (auto *MSI = dyn_cast<AnyMemSetInst>(Val: MI)) { |
| 1936 | if (Instruction *I = SimplifyAnyMemSet(MI: MSI)) |
| 1937 | return I; |
| 1938 | } |
| 1939 | |
| 1940 | // If src/dest is null, this memory intrinsic must be a noop. |
| 1941 | if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) { |
| 1942 | Builder.CreateAssumption(Cond: Builder.CreateIsNull(Arg: MI->getLength())); |
| 1943 | return eraseInstFromFunction(I&: CI); |
| 1944 | } |
| 1945 | |
| 1946 | // If we have a memmove and the source operation is a constant global, |
| 1947 | // then the source and dest pointers can't alias, so we can change this |
| 1948 | // into a call to memcpy. |
| 1949 | if (auto *MMI = dyn_cast<AnyMemMoveInst>(Val: MI)) { |
| 1950 | if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(Val: MMI->getSource())) |
| 1951 | if (GVSrc->isConstant()) { |
| 1952 | Module *M = CI.getModule(); |
| 1953 | Intrinsic::ID MemCpyID = |
| 1954 | MMI->isAtomic() |
| 1955 | ? Intrinsic::memcpy_element_unordered_atomic |
| 1956 | : Intrinsic::memcpy; |
| 1957 | Type *Tys[3] = { CI.getArgOperand(i: 0)->getType(), |
| 1958 | CI.getArgOperand(i: 1)->getType(), |
| 1959 | CI.getArgOperand(i: 2)->getType() }; |
| 1960 | CI.setCalledFunction( |
| 1961 | Intrinsic::getOrInsertDeclaration(M, id: MemCpyID, Tys)); |
| 1962 | return II; |
| 1963 | } |
| 1964 | } |
| 1965 | } |
| 1966 | |
| 1967 | // For fixed width vector result intrinsics, use the generic demanded vector |
| 1968 | // support. |
| 1969 | if (auto *IIFVTy = dyn_cast<FixedVectorType>(Val: II->getType())) { |
| 1970 | auto VWidth = IIFVTy->getNumElements(); |
| 1971 | APInt PoisonElts(VWidth, 0); |
| 1972 | APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth)); |
| 1973 | if (Value *V = SimplifyDemandedVectorElts(V: II, DemandedElts: AllOnesEltMask, PoisonElts)) { |
| 1974 | if (V != II) |
| 1975 | return replaceInstUsesWith(I&: *II, V); |
| 1976 | return II; |
| 1977 | } |
| 1978 | } |
| 1979 | |
| 1980 | if (II->isCommutative()) { |
| 1981 | if (auto Pair = matchSymmetricPair(LHS: II->getOperand(i_nocapture: 0), RHS: II->getOperand(i_nocapture: 1))) { |
| 1982 | replaceOperand(I&: *II, OpNum: 0, V: Pair->first); |
| 1983 | replaceOperand(I&: *II, OpNum: 1, V: Pair->second); |
| 1984 | return II; |
| 1985 | } |
| 1986 | |
| 1987 | if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(Call&: CI)) |
| 1988 | return NewCall; |
| 1989 | } |
| 1990 | |
| 1991 | // Unused constrained FP intrinsic calls may have declared side effect, which |
| 1992 | // prevents it from being removed. In some cases however the side effect is |
| 1993 | // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it |
| 1994 | // returns a replacement, the call may be removed. |
| 1995 | if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(Val: CI)) { |
| 1996 | if (simplifyConstrainedFPCall(Call: &CI, Q: SQ.getWithInstruction(I: &CI))) |
| 1997 | return eraseInstFromFunction(I&: CI); |
| 1998 | } |
| 1999 | |
| 2000 | Intrinsic::ID IID = II->getIntrinsicID(); |
| 2001 | switch (IID) { |
| 2002 | case Intrinsic::objectsize: { |
| 2003 | SmallVector<Instruction *> InsertedInstructions; |
| 2004 | if (Value *V = lowerObjectSizeCall(ObjectSize: II, DL, TLI: &TLI, AA, /*MustSucceed=*/false, |
| 2005 | InsertedInstructions: &InsertedInstructions)) { |
| 2006 | for (Instruction *Inserted : InsertedInstructions) |
| 2007 | Worklist.add(I: Inserted); |
| 2008 | return replaceInstUsesWith(I&: CI, V); |
| 2009 | } |
| 2010 | return nullptr; |
| 2011 | } |
| 2012 | case Intrinsic::abs: { |
| 2013 | Value *IIOperand = II->getArgOperand(i: 0); |
| 2014 | bool IntMinIsPoison = cast<Constant>(Val: II->getArgOperand(i: 1))->isOneValue(); |
| 2015 | |
| 2016 | // abs(-x) -> abs(x) |
| 2017 | Value *X; |
| 2018 | if (match(V: IIOperand, P: m_Neg(V: m_Value(V&: X)))) { |
| 2019 | if (cast<Instruction>(Val: IIOperand)->hasNoSignedWrap() || IntMinIsPoison) |
| 2020 | replaceOperand(I&: *II, OpNum: 1, V: Builder.getTrue()); |
| 2021 | return replaceOperand(I&: *II, OpNum: 0, V: X); |
| 2022 | } |
| 2023 | if (match(V: IIOperand, P: m_c_Select(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X)))) |
| 2024 | return replaceOperand(I&: *II, OpNum: 0, V: X); |
| 2025 | |
| 2026 | Value *Y; |
| 2027 | // abs(a * abs(b)) -> abs(a * b) |
| 2028 | if (match(V: IIOperand, |
| 2029 | P: m_OneUse(SubPattern: m_c_Mul(L: m_Value(V&: X), |
| 2030 | R: m_Intrinsic<Intrinsic::abs>(Op0: m_Value(V&: Y)))))) { |
| 2031 | bool NSW = |
| 2032 | cast<Instruction>(Val: IIOperand)->hasNoSignedWrap() && IntMinIsPoison; |
| 2033 | auto *XY = NSW ? Builder.CreateNSWMul(LHS: X, RHS: Y) : Builder.CreateMul(LHS: X, RHS: Y); |
| 2034 | return replaceOperand(I&: *II, OpNum: 0, V: XY); |
| 2035 | } |
| 2036 | |
| 2037 | if (std::optional<bool> Known = |
| 2038 | getKnownSignOrZero(Op: IIOperand, SQ: SQ.getWithInstruction(I: II))) { |
| 2039 | // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y) |
| 2040 | // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y) |
| 2041 | if (!*Known) |
| 2042 | return replaceInstUsesWith(I&: *II, V: IIOperand); |
| 2043 | |
| 2044 | // abs(x) -> -x if x < 0 |
| 2045 | // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y) |
| 2046 | if (IntMinIsPoison) |
| 2047 | return BinaryOperator::CreateNSWNeg(Op: IIOperand); |
| 2048 | return BinaryOperator::CreateNeg(Op: IIOperand); |
| 2049 | } |
| 2050 | |
| 2051 | // abs (sext X) --> zext (abs X*) |
| 2052 | // Clear the IsIntMin (nsw) bit on the abs to allow narrowing. |
| 2053 | if (match(V: IIOperand, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) { |
| 2054 | Value *NarrowAbs = |
| 2055 | Builder.CreateBinaryIntrinsic(ID: Intrinsic::abs, LHS: X, RHS: Builder.getFalse()); |
| 2056 | return CastInst::Create(Instruction::ZExt, S: NarrowAbs, Ty: II->getType()); |
| 2057 | } |
| 2058 | |
| 2059 | // Match a complicated way to check if a number is odd/even: |
| 2060 | // abs (srem X, 2) --> and X, 1 |
| 2061 | const APInt *C; |
| 2062 | if (match(V: IIOperand, P: m_SRem(L: m_Value(V&: X), R: m_APInt(Res&: C))) && *C == 2) |
| 2063 | return BinaryOperator::CreateAnd(V1: X, V2: ConstantInt::get(Ty: II->getType(), V: 1)); |
| 2064 | |
| 2065 | break; |
| 2066 | } |
| 2067 | case Intrinsic::umin: { |
| 2068 | Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1); |
| 2069 | // umin(x, 1) == zext(x != 0) |
| 2070 | if (match(V: I1, P: m_One())) { |
| 2071 | assert(II->getType()->getScalarSizeInBits() != 1 && |
| 2072 | "Expected simplify of umin with max constant" ); |
| 2073 | Value *Zero = Constant::getNullValue(Ty: I0->getType()); |
| 2074 | Value *Cmp = Builder.CreateICmpNE(LHS: I0, RHS: Zero); |
| 2075 | return CastInst::Create(Instruction::ZExt, S: Cmp, Ty: II->getType()); |
| 2076 | } |
| 2077 | // umin(cttz(x), const) --> cttz(x | (1 << const)) |
| 2078 | if (Value *FoldedCttz = |
| 2079 | foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::cttz>( |
| 2080 | I0, I1, DL, Builder)) |
| 2081 | return replaceInstUsesWith(I&: *II, V: FoldedCttz); |
| 2082 | // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const)) |
| 2083 | if (Value *FoldedCtlz = |
| 2084 | foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::ctlz>( |
| 2085 | I0, I1, DL, Builder)) |
| 2086 | return replaceInstUsesWith(I&: *II, V: FoldedCtlz); |
| 2087 | [[fallthrough]]; |
| 2088 | } |
| 2089 | case Intrinsic::umax: { |
| 2090 | Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1); |
| 2091 | Value *X, *Y; |
| 2092 | if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_ZExt(Op: m_Value(V&: Y))) && |
| 2093 | (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) { |
| 2094 | Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y); |
| 2095 | return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType()); |
| 2096 | } |
| 2097 | Constant *C; |
| 2098 | if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) && |
| 2099 | I0->hasOneUse()) { |
| 2100 | if (Constant *NarrowC = getLosslessUnsignedTrunc(C, DestTy: X->getType(), DL)) { |
| 2101 | Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC); |
| 2102 | return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType()); |
| 2103 | } |
| 2104 | } |
| 2105 | // If C is not 0: |
| 2106 | // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C) |
| 2107 | // If C is not 0 or 1: |
| 2108 | // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C) |
| 2109 | auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * { |
| 2110 | const APInt *C; |
| 2111 | Value *X; |
| 2112 | if (!match(V: A, P: m_NUWShl(L: m_Value(V&: X), R: m_APInt(Res&: C))) && |
| 2113 | !(match(V: A, P: m_NUWMul(L: m_Value(V&: X), R: m_APInt(Res&: C))) && !C->isOne())) |
| 2114 | return nullptr; |
| 2115 | if (C->isZero()) |
| 2116 | return nullptr; |
| 2117 | if (!match(V: B, P: m_OneUse(SubPattern: m_Add(L: m_Specific(V: X), R: m_One())))) |
| 2118 | return nullptr; |
| 2119 | |
| 2120 | Value *Cmp = Builder.CreateICmpEQ(LHS: X, RHS: ConstantInt::get(Ty: X->getType(), V: 0)); |
| 2121 | Value *NewSelect = nullptr; |
| 2122 | NewSelect = Builder.CreateSelectWithUnknownProfile( |
| 2123 | C: Cmp, True: ConstantInt::get(Ty: X->getType(), V: 1), False: A, DEBUG_TYPE); |
| 2124 | return replaceInstUsesWith(I&: *II, V: NewSelect); |
| 2125 | }; |
| 2126 | |
| 2127 | if (IID == Intrinsic::umax) { |
| 2128 | if (Instruction *I = foldMaxMulShift(I0, I1)) |
| 2129 | return I; |
| 2130 | if (Instruction *I = foldMaxMulShift(I1, I0)) |
| 2131 | return I; |
| 2132 | } |
| 2133 | |
| 2134 | // If both operands of unsigned min/max are sign-extended, it is still ok |
| 2135 | // to narrow the operation. |
| 2136 | [[fallthrough]]; |
| 2137 | } |
| 2138 | case Intrinsic::smax: |
| 2139 | case Intrinsic::smin: { |
| 2140 | Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1); |
| 2141 | Value *X, *Y; |
| 2142 | if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_SExt(Op: m_Value(V&: Y))) && |
| 2143 | (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) { |
| 2144 | Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y); |
| 2145 | return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType()); |
| 2146 | } |
| 2147 | |
| 2148 | Constant *C; |
| 2149 | if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) && |
| 2150 | I0->hasOneUse()) { |
| 2151 | if (Constant *NarrowC = getLosslessSignedTrunc(C, DestTy: X->getType(), DL)) { |
| 2152 | Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC); |
| 2153 | return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType()); |
| 2154 | } |
| 2155 | } |
| 2156 | |
| 2157 | // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC |
| 2158 | // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC |
| 2159 | const APInt *MinC, *MaxC; |
| 2160 | auto CreateCanonicalClampForm = [&](bool IsSigned) { |
| 2161 | auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax; |
| 2162 | auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin; |
| 2163 | Value *NewMax = Builder.CreateBinaryIntrinsic( |
| 2164 | ID: MaxIID, LHS: X, RHS: ConstantInt::get(Ty: X->getType(), V: *MaxC)); |
| 2165 | return replaceInstUsesWith( |
| 2166 | I&: *II, V: Builder.CreateBinaryIntrinsic( |
| 2167 | ID: MinIID, LHS: NewMax, RHS: ConstantInt::get(Ty: X->getType(), V: *MinC))); |
| 2168 | }; |
| 2169 | if (IID == Intrinsic::smax && |
| 2170 | match(V: I0, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::smin>(Op0: m_Value(V&: X), |
| 2171 | Op1: m_APInt(Res&: MinC)))) && |
| 2172 | match(V: I1, P: m_APInt(Res&: MaxC)) && MinC->sgt(RHS: *MaxC)) |
| 2173 | return CreateCanonicalClampForm(true); |
| 2174 | if (IID == Intrinsic::umax && |
| 2175 | match(V: I0, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::umin>(Op0: m_Value(V&: X), |
| 2176 | Op1: m_APInt(Res&: MinC)))) && |
| 2177 | match(V: I1, P: m_APInt(Res&: MaxC)) && MinC->ugt(RHS: *MaxC)) |
| 2178 | return CreateCanonicalClampForm(false); |
| 2179 | |
| 2180 | // umin(i1 X, i1 Y) -> and i1 X, Y |
| 2181 | // smax(i1 X, i1 Y) -> and i1 X, Y |
| 2182 | if ((IID == Intrinsic::umin || IID == Intrinsic::smax) && |
| 2183 | II->getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
| 2184 | return BinaryOperator::CreateAnd(V1: I0, V2: I1); |
| 2185 | } |
| 2186 | |
| 2187 | // umax(i1 X, i1 Y) -> or i1 X, Y |
| 2188 | // smin(i1 X, i1 Y) -> or i1 X, Y |
| 2189 | if ((IID == Intrinsic::umax || IID == Intrinsic::smin) && |
| 2190 | II->getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
| 2191 | return BinaryOperator::CreateOr(V1: I0, V2: I1); |
| 2192 | } |
| 2193 | |
| 2194 | // smin(smax(X, -1), 1) -> scmp(X, 0) |
| 2195 | // smax(smin(X, 1), -1) -> scmp(X, 0) |
| 2196 | // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1) |
| 2197 | // And i1's have been changed to and/ors |
| 2198 | // So we only need to check for smin |
| 2199 | if (IID == Intrinsic::smin) { |
| 2200 | if (match(V: I0, P: m_OneUse(SubPattern: m_SMax(L: m_Value(V&: X), R: m_AllOnes()))) && |
| 2201 | match(V: I1, P: m_One())) { |
| 2202 | Value *Zero = ConstantInt::get(Ty: X->getType(), V: 0); |
| 2203 | return replaceInstUsesWith( |
| 2204 | I&: CI, |
| 2205 | V: Builder.CreateIntrinsic(RetTy: II->getType(), ID: Intrinsic::scmp, Args: {X, Zero})); |
| 2206 | } |
| 2207 | } |
| 2208 | |
| 2209 | if (IID == Intrinsic::smax || IID == Intrinsic::smin) { |
| 2210 | // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y) |
| 2211 | // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y) |
| 2212 | // TODO: Canonicalize neg after min/max if I1 is constant. |
| 2213 | if (match(V: I0, P: m_NSWNeg(V: m_Value(V&: X))) && match(V: I1, P: m_NSWNeg(V: m_Value(V&: Y))) && |
| 2214 | (I0->hasOneUse() || I1->hasOneUse())) { |
| 2215 | Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID); |
| 2216 | Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: X, RHS: Y); |
| 2217 | return BinaryOperator::CreateNSWNeg(Op: InvMaxMin); |
| 2218 | } |
| 2219 | } |
| 2220 | |
| 2221 | // (umax X, (xor X, Pow2)) |
| 2222 | // -> (or X, Pow2) |
| 2223 | // (umin X, (xor X, Pow2)) |
| 2224 | // -> (and X, ~Pow2) |
| 2225 | // (smax X, (xor X, Pos_Pow2)) |
| 2226 | // -> (or X, Pos_Pow2) |
| 2227 | // (smin X, (xor X, Pos_Pow2)) |
| 2228 | // -> (and X, ~Pos_Pow2) |
| 2229 | // (smax X, (xor X, Neg_Pow2)) |
| 2230 | // -> (and X, ~Neg_Pow2) |
| 2231 | // (smin X, (xor X, Neg_Pow2)) |
| 2232 | // -> (or X, Neg_Pow2) |
| 2233 | if ((match(V: I0, P: m_c_Xor(L: m_Specific(V: I1), R: m_Value(V&: X))) || |
| 2234 | match(V: I1, P: m_c_Xor(L: m_Specific(V: I0), R: m_Value(V&: X)))) && |
| 2235 | isKnownToBeAPowerOfTwo(V: X, /* OrZero */ true)) { |
| 2236 | bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax; |
| 2237 | bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin; |
| 2238 | |
| 2239 | if (IID == Intrinsic::smax || IID == Intrinsic::smin) { |
| 2240 | auto KnownSign = getKnownSign(Op: X, SQ: SQ.getWithInstruction(I: II)); |
| 2241 | if (KnownSign == std::nullopt) { |
| 2242 | UseOr = false; |
| 2243 | UseAndN = false; |
| 2244 | } else if (*KnownSign /* true is Signed. */) { |
| 2245 | UseOr ^= true; |
| 2246 | UseAndN ^= true; |
| 2247 | Type *Ty = I0->getType(); |
| 2248 | // Negative power of 2 must be IntMin. It's possible to be able to |
| 2249 | // prove negative / power of 2 without actually having known bits, so |
| 2250 | // just get the value by hand. |
| 2251 | X = Constant::getIntegerValue( |
| 2252 | Ty, V: APInt::getSignedMinValue(numBits: Ty->getScalarSizeInBits())); |
| 2253 | } |
| 2254 | } |
| 2255 | if (UseOr) |
| 2256 | return BinaryOperator::CreateOr(V1: I0, V2: X); |
| 2257 | else if (UseAndN) |
| 2258 | return BinaryOperator::CreateAnd(V1: I0, V2: Builder.CreateNot(V: X)); |
| 2259 | } |
| 2260 | |
| 2261 | // If we can eliminate ~A and Y is free to invert: |
| 2262 | // max ~A, Y --> ~(min A, ~Y) |
| 2263 | // |
| 2264 | // Examples: |
| 2265 | // max ~A, ~Y --> ~(min A, Y) |
| 2266 | // max ~A, C --> ~(min A, ~C) |
| 2267 | // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z)) |
| 2268 | auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * { |
| 2269 | Value *A; |
| 2270 | if (match(V: X, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: A)))) && |
| 2271 | !isFreeToInvert(V: A, WillInvertAllUses: A->hasOneUse())) { |
| 2272 | if (Value *NotY = getFreelyInverted(V: Y, WillInvertAllUses: Y->hasOneUse(), Builder: &Builder)) { |
| 2273 | Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID); |
| 2274 | Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: A, RHS: NotY); |
| 2275 | return BinaryOperator::CreateNot(Op: InvMaxMin); |
| 2276 | } |
| 2277 | } |
| 2278 | return nullptr; |
| 2279 | }; |
| 2280 | |
| 2281 | if (Instruction *I = moveNotAfterMinMax(I0, I1)) |
| 2282 | return I; |
| 2283 | if (Instruction *I = moveNotAfterMinMax(I1, I0)) |
| 2284 | return I; |
| 2285 | |
| 2286 | if (Instruction *I = moveAddAfterMinMax(II, Builder)) |
| 2287 | return I; |
| 2288 | |
| 2289 | // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C |
| 2290 | const APInt *RHSC; |
| 2291 | if (match(V: I0, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: X), R: m_NegatedPower2(V&: RHSC)))) && |
| 2292 | match(V: I1, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: Y), R: m_SpecificInt(V: *RHSC))))) |
| 2293 | return BinaryOperator::CreateAnd(V1: Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y), |
| 2294 | V2: ConstantInt::get(Ty: II->getType(), V: *RHSC)); |
| 2295 | |
| 2296 | // smax(X, -X) --> abs(X) |
| 2297 | // smin(X, -X) --> -abs(X) |
| 2298 | // umax(X, -X) --> -abs(X) |
| 2299 | // umin(X, -X) --> abs(X) |
| 2300 | if (isKnownNegation(X: I0, Y: I1)) { |
| 2301 | // We can choose either operand as the input to abs(), but if we can |
| 2302 | // eliminate the only use of a value, that's better for subsequent |
| 2303 | // transforms/analysis. |
| 2304 | if (I0->hasOneUse() && !I1->hasOneUse()) |
| 2305 | std::swap(a&: I0, b&: I1); |
| 2306 | |
| 2307 | // This is some variant of abs(). See if we can propagate 'nsw' to the abs |
| 2308 | // operation and potentially its negation. |
| 2309 | bool IntMinIsPoison = isKnownNegation(X: I0, Y: I1, /* NeedNSW */ true); |
| 2310 | Value *Abs = Builder.CreateBinaryIntrinsic( |
| 2311 | ID: Intrinsic::abs, LHS: I0, |
| 2312 | RHS: ConstantInt::getBool(Context&: II->getContext(), V: IntMinIsPoison)); |
| 2313 | |
| 2314 | // We don't have a "nabs" intrinsic, so negate if needed based on the |
| 2315 | // max/min operation. |
| 2316 | if (IID == Intrinsic::smin || IID == Intrinsic::umax) |
| 2317 | Abs = Builder.CreateNeg(V: Abs, Name: "nabs" , HasNSW: IntMinIsPoison); |
| 2318 | return replaceInstUsesWith(I&: CI, V: Abs); |
| 2319 | } |
| 2320 | |
| 2321 | if (Instruction *Sel = foldClampRangeOfTwo(II, Builder)) |
| 2322 | return Sel; |
| 2323 | |
| 2324 | if (Instruction *SAdd = matchSAddSubSat(MinMax1&: *II)) |
| 2325 | return SAdd; |
| 2326 | |
| 2327 | if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ)) |
| 2328 | return replaceInstUsesWith(I&: *II, V: NewMinMax); |
| 2329 | |
| 2330 | if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder)) |
| 2331 | return R; |
| 2332 | |
| 2333 | if (Instruction *NewMinMax = factorizeMinMaxTree(II)) |
| 2334 | return NewMinMax; |
| 2335 | |
| 2336 | // Try to fold minmax with constant RHS based on range information |
| 2337 | if (match(V: I1, P: m_APIntAllowPoison(Res&: RHSC))) { |
| 2338 | ICmpInst::Predicate Pred = |
| 2339 | ICmpInst::getNonStrictPredicate(pred: MinMaxIntrinsic::getPredicate(ID: IID)); |
| 2340 | bool IsSigned = MinMaxIntrinsic::isSigned(ID: IID); |
| 2341 | ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits( |
| 2342 | V: I0, ForSigned: IsSigned, SQ: SQ.getWithInstruction(I: II)); |
| 2343 | if (!LHS_CR.isFullSet()) { |
| 2344 | if (LHS_CR.icmp(Pred, Other: *RHSC)) |
| 2345 | return replaceInstUsesWith(I&: *II, V: I0); |
| 2346 | if (LHS_CR.icmp(Pred: ICmpInst::getSwappedPredicate(pred: Pred), Other: *RHSC)) |
| 2347 | return replaceInstUsesWith(I&: *II, |
| 2348 | V: ConstantInt::get(Ty: II->getType(), V: *RHSC)); |
| 2349 | } |
| 2350 | } |
| 2351 | |
| 2352 | if (Value *V = foldIntrinsicUsingDistributiveLaws(II, Builder)) |
| 2353 | return replaceInstUsesWith(I&: *II, V); |
| 2354 | |
| 2355 | break; |
| 2356 | } |
| 2357 | case Intrinsic::scmp: { |
| 2358 | Value *I0 = II->getArgOperand(i: 0), *I1 = II->getArgOperand(i: 1); |
| 2359 | Value *LHS, *RHS; |
| 2360 | if (match(V: I0, P: m_NSWSub(L: m_Value(V&: LHS), R: m_Value(V&: RHS))) && match(V: I1, P: m_Zero())) |
| 2361 | return replaceInstUsesWith( |
| 2362 | I&: CI, |
| 2363 | V: Builder.CreateIntrinsic(RetTy: II->getType(), ID: Intrinsic::scmp, Args: {LHS, RHS})); |
| 2364 | break; |
| 2365 | } |
| 2366 | case Intrinsic::bitreverse: { |
| 2367 | Value *IIOperand = II->getArgOperand(i: 0); |
| 2368 | // bitrev (zext i1 X to ?) --> X ? SignBitC : 0 |
| 2369 | Value *X; |
| 2370 | if (match(V: IIOperand, P: m_ZExt(Op: m_Value(V&: X))) && |
| 2371 | X->getType()->isIntOrIntVectorTy(BitWidth: 1)) { |
| 2372 | Type *Ty = II->getType(); |
| 2373 | APInt SignBit = APInt::getSignMask(BitWidth: Ty->getScalarSizeInBits()); |
| 2374 | return SelectInst::Create(C: X, S1: ConstantInt::get(Ty, V: SignBit), |
| 2375 | S2: ConstantInt::getNullValue(Ty)); |
| 2376 | } |
| 2377 | |
| 2378 | if (Instruction *crossLogicOpFold = |
| 2379 | foldBitOrderCrossLogicOp<Intrinsic::bitreverse>(V: IIOperand, Builder)) |
| 2380 | return crossLogicOpFold; |
| 2381 | |
| 2382 | break; |
| 2383 | } |
| 2384 | case Intrinsic::bswap: { |
| 2385 | Value *IIOperand = II->getArgOperand(i: 0); |
| 2386 | |
| 2387 | // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as |
| 2388 | // inverse-shift-of-bswap: |
| 2389 | // bswap (shl X, Y) --> lshr (bswap X), Y |
| 2390 | // bswap (lshr X, Y) --> shl (bswap X), Y |
| 2391 | Value *X, *Y; |
| 2392 | if (match(V: IIOperand, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: X), R: m_Value(V&: Y))))) { |
| 2393 | unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits(); |
| 2394 | if (MaskedValueIsZero(V: Y, Mask: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: 3))) { |
| 2395 | Value *NewSwap = Builder.CreateUnaryIntrinsic(ID: Intrinsic::bswap, V: X); |
| 2396 | BinaryOperator::BinaryOps InverseShift = |
| 2397 | cast<BinaryOperator>(Val: IIOperand)->getOpcode() == Instruction::Shl |
| 2398 | ? Instruction::LShr |
| 2399 | : Instruction::Shl; |
| 2400 | return BinaryOperator::Create(Op: InverseShift, S1: NewSwap, S2: Y); |
| 2401 | } |
| 2402 | } |
| 2403 | |
| 2404 | KnownBits Known = computeKnownBits(V: IIOperand, CxtI: II); |
| 2405 | uint64_t LZ = alignDown(Value: Known.countMinLeadingZeros(), Align: 8); |
| 2406 | uint64_t TZ = alignDown(Value: Known.countMinTrailingZeros(), Align: 8); |
| 2407 | unsigned BW = Known.getBitWidth(); |
| 2408 | |
| 2409 | // bswap(x) -> shift(x) if x has exactly one "active byte" |
| 2410 | if (BW - LZ - TZ == 8) { |
| 2411 | assert(LZ != TZ && "active byte cannot be in the middle" ); |
| 2412 | if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x |
| 2413 | return BinaryOperator::CreateNUWShl( |
| 2414 | V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: LZ - TZ)); |
| 2415 | // -> lshr(x) if the "active byte" is in the high part of x |
| 2416 | return BinaryOperator::CreateExactLShr( |
| 2417 | V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: TZ - LZ)); |
| 2418 | } |
| 2419 | |
| 2420 | // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) |
| 2421 | if (match(V: IIOperand, P: m_Trunc(Op: m_BSwap(Op0: m_Value(V&: X))))) { |
| 2422 | unsigned C = X->getType()->getScalarSizeInBits() - BW; |
| 2423 | Value *CV = ConstantInt::get(Ty: X->getType(), V: C); |
| 2424 | Value *V = Builder.CreateLShr(LHS: X, RHS: CV); |
| 2425 | return new TruncInst(V, IIOperand->getType()); |
| 2426 | } |
| 2427 | |
| 2428 | if (Instruction *crossLogicOpFold = |
| 2429 | foldBitOrderCrossLogicOp<Intrinsic::bswap>(V: IIOperand, Builder)) { |
| 2430 | return crossLogicOpFold; |
| 2431 | } |
| 2432 | |
| 2433 | // Try to fold into bitreverse if bswap is the root of the expression tree. |
| 2434 | if (Instruction *BitOp = matchBSwapOrBitReverse(I&: *II, /*MatchBSwaps*/ false, |
| 2435 | /*MatchBitReversals*/ true)) |
| 2436 | return BitOp; |
| 2437 | break; |
| 2438 | } |
| 2439 | case Intrinsic::masked_load: |
| 2440 | if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(II&: *II)) |
| 2441 | return replaceInstUsesWith(I&: CI, V: SimplifiedMaskedOp); |
| 2442 | break; |
| 2443 | case Intrinsic::masked_store: |
| 2444 | return simplifyMaskedStore(II&: *II); |
| 2445 | case Intrinsic::masked_gather: |
| 2446 | return simplifyMaskedGather(II&: *II); |
| 2447 | case Intrinsic::masked_scatter: |
| 2448 | return simplifyMaskedScatter(II&: *II); |
| 2449 | case Intrinsic::launder_invariant_group: |
| 2450 | case Intrinsic::strip_invariant_group: |
| 2451 | if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(II&: *II, IC&: *this)) |
| 2452 | return replaceInstUsesWith(I&: *II, V: SkippedBarrier); |
| 2453 | break; |
| 2454 | case Intrinsic::powi: |
| 2455 | if (ConstantInt *Power = dyn_cast<ConstantInt>(Val: II->getArgOperand(i: 1))) { |
| 2456 | // 0 and 1 are handled in instsimplify |
| 2457 | // powi(x, -1) -> 1/x |
| 2458 | if (Power->isMinusOne()) |
| 2459 | return BinaryOperator::CreateFDivFMF(V1: ConstantFP::get(Ty: CI.getType(), V: 1.0), |
| 2460 | V2: II->getArgOperand(i: 0), FMFSource: II); |
| 2461 | // powi(x, 2) -> x*x |
| 2462 | if (Power->equalsInt(V: 2)) |
| 2463 | return BinaryOperator::CreateFMulFMF(V1: II->getArgOperand(i: 0), |
| 2464 | V2: II->getArgOperand(i: 0), FMFSource: II); |
| 2465 | |
| 2466 | if (!Power->getValue()[0]) { |
| 2467 | Value *X; |
| 2468 | // If power is even: |
| 2469 | // powi(-x, p) -> powi(x, p) |
| 2470 | // powi(fabs(x), p) -> powi(x, p) |
| 2471 | // powi(copysign(x, y), p) -> powi(x, p) |
| 2472 | if (match(V: II->getArgOperand(i: 0), P: m_FNeg(X: m_Value(V&: X))) || |
| 2473 | match(V: II->getArgOperand(i: 0), P: m_FAbs(Op0: m_Value(V&: X))) || |
| 2474 | match(V: II->getArgOperand(i: 0), |
| 2475 | P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(V&: X), Op1: m_Value()))) |
| 2476 | return replaceOperand(I&: *II, OpNum: 0, V: X); |
| 2477 | } |
| 2478 | } |
| 2479 | break; |
| 2480 | |
| 2481 | case Intrinsic::cttz: |
| 2482 | case Intrinsic::ctlz: |
| 2483 | if (auto *I = foldCttzCtlz(II&: *II, IC&: *this)) |
| 2484 | return I; |
| 2485 | break; |
| 2486 | |
| 2487 | case Intrinsic::ctpop: |
| 2488 | if (auto *I = foldCtpop(II&: *II, IC&: *this)) |
| 2489 | return I; |
| 2490 | break; |
| 2491 | |
| 2492 | case Intrinsic::fshl: |
| 2493 | case Intrinsic::fshr: { |
| 2494 | Value *Op0 = II->getArgOperand(i: 0), *Op1 = II->getArgOperand(i: 1); |
| 2495 | Type *Ty = II->getType(); |
| 2496 | unsigned BitWidth = Ty->getScalarSizeInBits(); |
| 2497 | Constant *ShAmtC; |
| 2498 | if (match(V: II->getArgOperand(i: 2), P: m_ImmConstant(C&: ShAmtC))) { |
| 2499 | // Canonicalize a shift amount constant operand to modulo the bit-width. |
| 2500 | Constant *WidthC = ConstantInt::get(Ty, V: BitWidth); |
| 2501 | Constant *ModuloC = |
| 2502 | ConstantFoldBinaryOpOperands(Opcode: Instruction::URem, LHS: ShAmtC, RHS: WidthC, DL); |
| 2503 | if (!ModuloC) |
| 2504 | return nullptr; |
| 2505 | if (ModuloC != ShAmtC) |
| 2506 | return replaceOperand(I&: *II, OpNum: 2, V: ModuloC); |
| 2507 | |
| 2508 | assert(match(ConstantFoldCompareInstOperands(ICmpInst::ICMP_UGT, WidthC, |
| 2509 | ShAmtC, DL), |
| 2510 | m_One()) && |
| 2511 | "Shift amount expected to be modulo bitwidth" ); |
| 2512 | |
| 2513 | // Canonicalize funnel shift right by constant to funnel shift left. This |
| 2514 | // is not entirely arbitrary. For historical reasons, the backend may |
| 2515 | // recognize rotate left patterns but miss rotate right patterns. |
| 2516 | if (IID == Intrinsic::fshr) { |
| 2517 | // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero. |
| 2518 | if (!isKnownNonZero(V: ShAmtC, Q: SQ.getWithInstruction(I: II))) |
| 2519 | return nullptr; |
| 2520 | |
| 2521 | Constant *LeftShiftC = ConstantExpr::getSub(C1: WidthC, C2: ShAmtC); |
| 2522 | Module *Mod = II->getModule(); |
| 2523 | Function *Fshl = |
| 2524 | Intrinsic::getOrInsertDeclaration(M: Mod, id: Intrinsic::fshl, Tys: Ty); |
| 2525 | return CallInst::Create(Func: Fshl, Args: { Op0, Op1, LeftShiftC }); |
| 2526 | } |
| 2527 | assert(IID == Intrinsic::fshl && |
| 2528 | "All funnel shifts by simple constants should go left" ); |
| 2529 | |
| 2530 | // fshl(X, 0, C) --> shl X, C |
| 2531 | // fshl(X, undef, C) --> shl X, C |
| 2532 | if (match(V: Op1, P: m_ZeroInt()) || match(V: Op1, P: m_Undef())) |
| 2533 | return BinaryOperator::CreateShl(V1: Op0, V2: ShAmtC); |
| 2534 | |
| 2535 | // fshl(0, X, C) --> lshr X, (BW-C) |
| 2536 | // fshl(undef, X, C) --> lshr X, (BW-C) |
| 2537 | if (match(V: Op0, P: m_ZeroInt()) || match(V: Op0, P: m_Undef())) |
| 2538 | return BinaryOperator::CreateLShr(V1: Op1, |
| 2539 | V2: ConstantExpr::getSub(C1: WidthC, C2: ShAmtC)); |
| 2540 | |
| 2541 | // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form) |
| 2542 | if (Op0 == Op1 && BitWidth == 16 && match(V: ShAmtC, P: m_SpecificInt(V: 8))) { |
| 2543 | Module *Mod = II->getModule(); |
| 2544 | Function *Bswap = |
| 2545 | Intrinsic::getOrInsertDeclaration(M: Mod, id: Intrinsic::bswap, Tys: Ty); |
| 2546 | return CallInst::Create(Func: Bswap, Args: { Op0 }); |
| 2547 | } |
| 2548 | if (Instruction *BitOp = |
| 2549 | matchBSwapOrBitReverse(I&: *II, /*MatchBSwaps*/ true, |
| 2550 | /*MatchBitReversals*/ true)) |
| 2551 | return BitOp; |
| 2552 | |
| 2553 | // R = fshl(X, X, C2) |
| 2554 | // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize) |
| 2555 | Value *InnerOp; |
| 2556 | const APInt *ShAmtInnerC, *ShAmtOuterC; |
| 2557 | if (match(V: Op0, P: m_FShl(Op0: m_Value(V&: InnerOp), Op1: m_Deferred(V: InnerOp), |
| 2558 | Op2: m_APInt(Res&: ShAmtInnerC))) && |
| 2559 | match(V: ShAmtC, P: m_APInt(Res&: ShAmtOuterC)) && Op0 == Op1) { |
| 2560 | APInt Sum = *ShAmtOuterC + *ShAmtInnerC; |
| 2561 | APInt Modulo = Sum.urem(RHS: APInt(Sum.getBitWidth(), BitWidth)); |
| 2562 | if (Modulo.isZero()) |
| 2563 | return replaceInstUsesWith(I&: *II, V: InnerOp); |
| 2564 | Constant *ModuloC = ConstantInt::get(Ty, V: Modulo); |
| 2565 | return CallInst::Create(Func: cast<IntrinsicInst>(Val: Op0)->getCalledFunction(), |
| 2566 | Args: {InnerOp, InnerOp, ModuloC}); |
| 2567 | } |
| 2568 | } |
| 2569 | |
| 2570 | // fshl(X, X, Neg(Y)) --> fshr(X, X, Y) |
| 2571 | // fshr(X, X, Neg(Y)) --> fshl(X, X, Y) |
| 2572 | // if BitWidth is a power-of-2 |
| 2573 | Value *Y; |
| 2574 | if (Op0 == Op1 && isPowerOf2_32(Value: BitWidth) && |
| 2575 | match(V: II->getArgOperand(i: 2), P: m_Neg(V: m_Value(V&: Y)))) { |
| 2576 | Module *Mod = II->getModule(); |
| 2577 | Function *OppositeShift = Intrinsic::getOrInsertDeclaration( |
| 2578 | M: Mod, id: IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Tys: Ty); |
| 2579 | return CallInst::Create(Func: OppositeShift, Args: {Op0, Op1, Y}); |
| 2580 | } |
| 2581 | |
| 2582 | // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a |
| 2583 | // power-of-2 |
| 2584 | if (IID == Intrinsic::fshl && isPowerOf2_32(Value: BitWidth) && |
| 2585 | match(V: Op1, P: m_ZeroInt())) { |
| 2586 | Value *Op2 = II->getArgOperand(i: 2); |
| 2587 | Value *And = Builder.CreateAnd(LHS: Op2, RHS: ConstantInt::get(Ty, V: BitWidth - 1)); |
| 2588 | return BinaryOperator::CreateShl(V1: Op0, V2: And); |
| 2589 | } |
| 2590 | |
| 2591 | // Left or right might be masked. |
| 2592 | if (SimplifyDemandedInstructionBits(Inst&: *II)) |
| 2593 | return &CI; |
| 2594 | |
| 2595 | // The shift amount (operand 2) of a funnel shift is modulo the bitwidth, |
| 2596 | // so only the low bits of the shift amount are demanded if the bitwidth is |
| 2597 | // a power-of-2. |
| 2598 | if (!isPowerOf2_32(Value: BitWidth)) |
| 2599 | break; |
| 2600 | APInt Op2Demanded = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: Log2_32_Ceil(Value: BitWidth)); |
| 2601 | KnownBits Op2Known(BitWidth); |
| 2602 | if (SimplifyDemandedBits(I: II, OpNo: 2, DemandedMask: Op2Demanded, Known&: Op2Known)) |
| 2603 | return &CI; |
| 2604 | break; |
| 2605 | } |
| 2606 | case Intrinsic::ptrmask: { |
| 2607 | unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType()); |
| 2608 | KnownBits Known(BitWidth); |
| 2609 | if (SimplifyDemandedInstructionBits(Inst&: *II, Known)) |
| 2610 | return II; |
| 2611 | |
| 2612 | Value *InnerPtr, *InnerMask; |
| 2613 | bool Changed = false; |
| 2614 | // Combine: |
| 2615 | // (ptrmask (ptrmask p, A), B) |
| 2616 | // -> (ptrmask p, (and A, B)) |
| 2617 | if (match(V: II->getArgOperand(i: 0), |
| 2618 | P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ptrmask>(Op0: m_Value(V&: InnerPtr), |
| 2619 | Op1: m_Value(V&: InnerMask))))) { |
| 2620 | assert(II->getArgOperand(1)->getType() == InnerMask->getType() && |
| 2621 | "Mask types must match" ); |
| 2622 | // TODO: If InnerMask == Op1, we could copy attributes from inner |
| 2623 | // callsite -> outer callsite. |
| 2624 | Value *NewMask = Builder.CreateAnd(LHS: II->getArgOperand(i: 1), RHS: InnerMask); |
| 2625 | replaceOperand(I&: CI, OpNum: 0, V: InnerPtr); |
| 2626 | replaceOperand(I&: CI, OpNum: 1, V: NewMask); |
| 2627 | Changed = true; |
| 2628 | } |
| 2629 | |
| 2630 | // See if we can deduce non-null. |
| 2631 | if (!CI.hasRetAttr(Kind: Attribute::NonNull) && |
| 2632 | (Known.isNonZero() || |
| 2633 | isKnownNonZero(V: II, Q: getSimplifyQuery().getWithInstruction(I: II)))) { |
| 2634 | CI.addRetAttr(Kind: Attribute::NonNull); |
| 2635 | Changed = true; |
| 2636 | } |
| 2637 | |
| 2638 | unsigned NewAlignmentLog = |
| 2639 | std::min(a: Value::MaxAlignmentExponent, |
| 2640 | b: std::min(a: BitWidth - 1, b: Known.countMinTrailingZeros())); |
| 2641 | // Known bits will capture if we had alignment information associated with |
| 2642 | // the pointer argument. |
| 2643 | if (NewAlignmentLog > Log2(A: CI.getRetAlign().valueOrOne())) { |
| 2644 | CI.addRetAttr(Attr: Attribute::getWithAlignment( |
| 2645 | Context&: CI.getContext(), Alignment: Align(uint64_t(1) << NewAlignmentLog))); |
| 2646 | Changed = true; |
| 2647 | } |
| 2648 | if (Changed) |
| 2649 | return &CI; |
| 2650 | break; |
| 2651 | } |
| 2652 | case Intrinsic::uadd_with_overflow: |
| 2653 | case Intrinsic::sadd_with_overflow: { |
| 2654 | if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) |
| 2655 | return I; |
| 2656 | |
| 2657 | // Given 2 constant operands whose sum does not overflow: |
| 2658 | // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1 |
| 2659 | // saddo (X +nsw C0), C1 -> saddo X, C0 + C1 |
| 2660 | Value *X; |
| 2661 | const APInt *C0, *C1; |
| 2662 | Value *Arg0 = II->getArgOperand(i: 0); |
| 2663 | Value *Arg1 = II->getArgOperand(i: 1); |
| 2664 | bool IsSigned = IID == Intrinsic::sadd_with_overflow; |
| 2665 | bool HasNWAdd = IsSigned |
| 2666 | ? match(V: Arg0, P: m_NSWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0))) |
| 2667 | : match(V: Arg0, P: m_NUWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0))); |
| 2668 | if (HasNWAdd && match(V: Arg1, P: m_APInt(Res&: C1))) { |
| 2669 | bool Overflow; |
| 2670 | APInt NewC = |
| 2671 | IsSigned ? C1->sadd_ov(RHS: *C0, Overflow) : C1->uadd_ov(RHS: *C0, Overflow); |
| 2672 | if (!Overflow) |
| 2673 | return replaceInstUsesWith( |
| 2674 | I&: *II, V: Builder.CreateBinaryIntrinsic( |
| 2675 | ID: IID, LHS: X, RHS: ConstantInt::get(Ty: Arg1->getType(), V: NewC))); |
| 2676 | } |
| 2677 | break; |
| 2678 | } |
| 2679 | |
| 2680 | case Intrinsic::umul_with_overflow: |
| 2681 | case Intrinsic::smul_with_overflow: |
| 2682 | case Intrinsic::usub_with_overflow: |
| 2683 | if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) |
| 2684 | return I; |
| 2685 | break; |
| 2686 | |
| 2687 | case Intrinsic::ssub_with_overflow: { |
| 2688 | if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) |
| 2689 | return I; |
| 2690 | |
| 2691 | Constant *C; |
| 2692 | Value *Arg0 = II->getArgOperand(i: 0); |
| 2693 | Value *Arg1 = II->getArgOperand(i: 1); |
| 2694 | // Given a constant C that is not the minimum signed value |
| 2695 | // for an integer of a given bit width: |
| 2696 | // |
| 2697 | // ssubo X, C -> saddo X, -C |
| 2698 | if (match(V: Arg1, P: m_Constant(C)) && C->isNotMinSignedValue()) { |
| 2699 | Value *NegVal = ConstantExpr::getNeg(C); |
| 2700 | // Build a saddo call that is equivalent to the discovered |
| 2701 | // ssubo call. |
| 2702 | return replaceInstUsesWith( |
| 2703 | I&: *II, V: Builder.CreateBinaryIntrinsic(ID: Intrinsic::sadd_with_overflow, |
| 2704 | LHS: Arg0, RHS: NegVal)); |
| 2705 | } |
| 2706 | |
| 2707 | break; |
| 2708 | } |
| 2709 | |
| 2710 | case Intrinsic::uadd_sat: |
| 2711 | case Intrinsic::sadd_sat: |
| 2712 | case Intrinsic::usub_sat: |
| 2713 | case Intrinsic::ssub_sat: { |
| 2714 | SaturatingInst *SI = cast<SaturatingInst>(Val: II); |
| 2715 | Type *Ty = SI->getType(); |
| 2716 | Value *Arg0 = SI->getLHS(); |
| 2717 | Value *Arg1 = SI->getRHS(); |
| 2718 | |
| 2719 | // Make use of known overflow information. |
| 2720 | OverflowResult OR = computeOverflow(BinaryOp: SI->getBinaryOp(), IsSigned: SI->isSigned(), |
| 2721 | LHS: Arg0, RHS: Arg1, CxtI: SI); |
| 2722 | switch (OR) { |
| 2723 | case OverflowResult::MayOverflow: |
| 2724 | break; |
| 2725 | case OverflowResult::NeverOverflows: |
| 2726 | if (SI->isSigned()) |
| 2727 | return BinaryOperator::CreateNSW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1); |
| 2728 | else |
| 2729 | return BinaryOperator::CreateNUW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1); |
| 2730 | case OverflowResult::AlwaysOverflowsLow: { |
| 2731 | unsigned BitWidth = Ty->getScalarSizeInBits(); |
| 2732 | APInt Min = APSInt::getMinValue(numBits: BitWidth, Unsigned: !SI->isSigned()); |
| 2733 | return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Min)); |
| 2734 | } |
| 2735 | case OverflowResult::AlwaysOverflowsHigh: { |
| 2736 | unsigned BitWidth = Ty->getScalarSizeInBits(); |
| 2737 | APInt Max = APSInt::getMaxValue(numBits: BitWidth, Unsigned: !SI->isSigned()); |
| 2738 | return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Max)); |
| 2739 | } |
| 2740 | } |
| 2741 | |
| 2742 | // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A) |
| 2743 | // which after that: |
| 2744 | // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C |
| 2745 | // usub_sat((sub nuw C, A), C1) -> 0 otherwise |
| 2746 | Constant *C, *C1; |
| 2747 | Value *A; |
| 2748 | if (IID == Intrinsic::usub_sat && |
| 2749 | match(V: Arg0, P: m_NUWSub(L: m_ImmConstant(C), R: m_Value(V&: A))) && |
| 2750 | match(V: Arg1, P: m_ImmConstant(C&: C1))) { |
| 2751 | auto *NewC = Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: C, RHS: C1); |
| 2752 | auto *NewSub = |
| 2753 | Builder.CreateBinaryIntrinsic(ID: Intrinsic::usub_sat, LHS: NewC, RHS: A); |
| 2754 | return replaceInstUsesWith(I&: *SI, V: NewSub); |
| 2755 | } |
| 2756 | |
| 2757 | // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN |
| 2758 | if (IID == Intrinsic::ssub_sat && match(V: Arg1, P: m_Constant(C)) && |
| 2759 | C->isNotMinSignedValue()) { |
| 2760 | Value *NegVal = ConstantExpr::getNeg(C); |
| 2761 | return replaceInstUsesWith( |
| 2762 | I&: *II, V: Builder.CreateBinaryIntrinsic( |
| 2763 | ID: Intrinsic::sadd_sat, LHS: Arg0, RHS: NegVal)); |
| 2764 | } |
| 2765 | |
| 2766 | // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2)) |
| 2767 | // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2)) |
| 2768 | // if Val and Val2 have the same sign |
| 2769 | if (auto *Other = dyn_cast<IntrinsicInst>(Val: Arg0)) { |
| 2770 | Value *X; |
| 2771 | const APInt *Val, *Val2; |
| 2772 | APInt NewVal; |
| 2773 | bool IsUnsigned = |
| 2774 | IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat; |
| 2775 | if (Other->getIntrinsicID() == IID && |
| 2776 | match(V: Arg1, P: m_APInt(Res&: Val)) && |
| 2777 | match(V: Other->getArgOperand(i: 0), P: m_Value(V&: X)) && |
| 2778 | match(V: Other->getArgOperand(i: 1), P: m_APInt(Res&: Val2))) { |
| 2779 | if (IsUnsigned) |
| 2780 | NewVal = Val->uadd_sat(RHS: *Val2); |
| 2781 | else if (Val->isNonNegative() == Val2->isNonNegative()) { |
| 2782 | bool Overflow; |
| 2783 | NewVal = Val->sadd_ov(RHS: *Val2, Overflow); |
| 2784 | if (Overflow) { |
| 2785 | // Both adds together may add more than SignedMaxValue |
| 2786 | // without saturating the final result. |
| 2787 | break; |
| 2788 | } |
| 2789 | } else { |
| 2790 | // Cannot fold saturated addition with different signs. |
| 2791 | break; |
| 2792 | } |
| 2793 | |
| 2794 | return replaceInstUsesWith( |
| 2795 | I&: *II, V: Builder.CreateBinaryIntrinsic( |
| 2796 | ID: IID, LHS: X, RHS: ConstantInt::get(Ty: II->getType(), V: NewVal))); |
| 2797 | } |
| 2798 | } |
| 2799 | break; |
| 2800 | } |
| 2801 | |
| 2802 | case Intrinsic::minnum: |
| 2803 | case Intrinsic::maxnum: |
| 2804 | case Intrinsic::minimumnum: |
| 2805 | case Intrinsic::maximumnum: |
| 2806 | case Intrinsic::minimum: |
| 2807 | case Intrinsic::maximum: { |
| 2808 | Value *Arg0 = II->getArgOperand(i: 0); |
| 2809 | Value *Arg1 = II->getArgOperand(i: 1); |
| 2810 | Value *X, *Y; |
| 2811 | if (match(V: Arg0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Arg1, P: m_FNeg(X: m_Value(V&: Y))) && |
| 2812 | (Arg0->hasOneUse() || Arg1->hasOneUse())) { |
| 2813 | // If both operands are negated, invert the call and negate the result: |
| 2814 | // min(-X, -Y) --> -(max(X, Y)) |
| 2815 | // max(-X, -Y) --> -(min(X, Y)) |
| 2816 | Intrinsic::ID NewIID; |
| 2817 | switch (IID) { |
| 2818 | case Intrinsic::maxnum: |
| 2819 | NewIID = Intrinsic::minnum; |
| 2820 | break; |
| 2821 | case Intrinsic::minnum: |
| 2822 | NewIID = Intrinsic::maxnum; |
| 2823 | break; |
| 2824 | case Intrinsic::maximumnum: |
| 2825 | NewIID = Intrinsic::minimumnum; |
| 2826 | break; |
| 2827 | case Intrinsic::minimumnum: |
| 2828 | NewIID = Intrinsic::maximumnum; |
| 2829 | break; |
| 2830 | case Intrinsic::maximum: |
| 2831 | NewIID = Intrinsic::minimum; |
| 2832 | break; |
| 2833 | case Intrinsic::minimum: |
| 2834 | NewIID = Intrinsic::maximum; |
| 2835 | break; |
| 2836 | default: |
| 2837 | llvm_unreachable("unexpected intrinsic ID" ); |
| 2838 | } |
| 2839 | Value *NewCall = Builder.CreateBinaryIntrinsic(ID: NewIID, LHS: X, RHS: Y, FMFSource: II); |
| 2840 | Instruction *FNeg = UnaryOperator::CreateFNeg(V: NewCall); |
| 2841 | FNeg->copyIRFlags(V: II); |
| 2842 | return FNeg; |
| 2843 | } |
| 2844 | |
| 2845 | // m(m(X, C2), C1) -> m(X, C) |
| 2846 | const APFloat *C1, *C2; |
| 2847 | if (auto *M = dyn_cast<IntrinsicInst>(Val: Arg0)) { |
| 2848 | if (M->getIntrinsicID() == IID && match(V: Arg1, P: m_APFloat(Res&: C1)) && |
| 2849 | ((match(V: M->getArgOperand(i: 0), P: m_Value(V&: X)) && |
| 2850 | match(V: M->getArgOperand(i: 1), P: m_APFloat(Res&: C2))) || |
| 2851 | (match(V: M->getArgOperand(i: 1), P: m_Value(V&: X)) && |
| 2852 | match(V: M->getArgOperand(i: 0), P: m_APFloat(Res&: C2))))) { |
| 2853 | APFloat Res(0.0); |
| 2854 | switch (IID) { |
| 2855 | case Intrinsic::maxnum: |
| 2856 | Res = maxnum(A: *C1, B: *C2); |
| 2857 | break; |
| 2858 | case Intrinsic::minnum: |
| 2859 | Res = minnum(A: *C1, B: *C2); |
| 2860 | break; |
| 2861 | case Intrinsic::maximumnum: |
| 2862 | Res = maximumnum(A: *C1, B: *C2); |
| 2863 | break; |
| 2864 | case Intrinsic::minimumnum: |
| 2865 | Res = minimumnum(A: *C1, B: *C2); |
| 2866 | break; |
| 2867 | case Intrinsic::maximum: |
| 2868 | Res = maximum(A: *C1, B: *C2); |
| 2869 | break; |
| 2870 | case Intrinsic::minimum: |
| 2871 | Res = minimum(A: *C1, B: *C2); |
| 2872 | break; |
| 2873 | default: |
| 2874 | llvm_unreachable("unexpected intrinsic ID" ); |
| 2875 | } |
| 2876 | // TODO: Conservatively intersecting FMF. If Res == C2, the transform |
| 2877 | // was a simplification (so Arg0 and its original flags could |
| 2878 | // propagate?) |
| 2879 | Value *V = Builder.CreateBinaryIntrinsic( |
| 2880 | ID: IID, LHS: X, RHS: ConstantFP::get(Ty: Arg0->getType(), V: Res), |
| 2881 | FMFSource: FMFSource::intersect(A: II, B: M)); |
| 2882 | return replaceInstUsesWith(I&: *II, V); |
| 2883 | } |
| 2884 | } |
| 2885 | |
| 2886 | // m((fpext X), (fpext Y)) -> fpext (m(X, Y)) |
| 2887 | if (match(V: Arg0, P: m_FPExt(Op: m_Value(V&: X))) && match(V: Arg1, P: m_FPExt(Op: m_Value(V&: Y))) && |
| 2888 | (Arg0->hasOneUse() || Arg1->hasOneUse()) && |
| 2889 | X->getType() == Y->getType()) { |
| 2890 | Value *NewCall = |
| 2891 | Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y, FMFSource: II, Name: II->getName()); |
| 2892 | return new FPExtInst(NewCall, II->getType()); |
| 2893 | } |
| 2894 | |
| 2895 | // m(fpext X, C) -> fpext m(X, TruncC) if C can be losslessly truncated. |
| 2896 | Constant *C; |
| 2897 | if (match(V: Arg0, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: X)))) && |
| 2898 | match(V: Arg1, P: m_ImmConstant(C))) { |
| 2899 | if (Constant *TruncC = |
| 2900 | getLosslessInvCast(C, InvCastTo: X->getType(), CastOp: Instruction::FPExt, DL)) { |
| 2901 | Value *NewCall = |
| 2902 | Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: TruncC, FMFSource: II, Name: II->getName()); |
| 2903 | return new FPExtInst(NewCall, II->getType()); |
| 2904 | } |
| 2905 | } |
| 2906 | |
| 2907 | // max X, -X --> fabs X |
| 2908 | // min X, -X --> -(fabs X) |
| 2909 | // TODO: Remove one-use limitation? That is obviously better for max, |
| 2910 | // hence why we don't check for one-use for that. However, |
| 2911 | // it would be an extra instruction for min (fnabs), but |
| 2912 | // that is still likely better for analysis and codegen. |
| 2913 | auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) { |
| 2914 | if (match(V: Op0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Op1, P: m_Specific(V: X))) |
| 2915 | return Op0->hasOneUse() || |
| 2916 | (IID != Intrinsic::minimum && IID != Intrinsic::minnum && |
| 2917 | IID != Intrinsic::minimumnum); |
| 2918 | return false; |
| 2919 | }; |
| 2920 | |
| 2921 | if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) { |
| 2922 | Value *R = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II); |
| 2923 | if (IID == Intrinsic::minimum || IID == Intrinsic::minnum || |
| 2924 | IID == Intrinsic::minimumnum) |
| 2925 | R = Builder.CreateFNegFMF(V: R, FMFSource: II); |
| 2926 | return replaceInstUsesWith(I&: *II, V: R); |
| 2927 | } |
| 2928 | |
| 2929 | break; |
| 2930 | } |
| 2931 | case Intrinsic::matrix_multiply: { |
| 2932 | // Optimize negation in matrix multiplication. |
| 2933 | |
| 2934 | // -A * -B -> A * B |
| 2935 | Value *A, *B; |
| 2936 | if (match(V: II->getArgOperand(i: 0), P: m_FNeg(X: m_Value(V&: A))) && |
| 2937 | match(V: II->getArgOperand(i: 1), P: m_FNeg(X: m_Value(V&: B)))) { |
| 2938 | replaceOperand(I&: *II, OpNum: 0, V: A); |
| 2939 | replaceOperand(I&: *II, OpNum: 1, V: B); |
| 2940 | return II; |
| 2941 | } |
| 2942 | |
| 2943 | Value *Op0 = II->getOperand(i_nocapture: 0); |
| 2944 | Value *Op1 = II->getOperand(i_nocapture: 1); |
| 2945 | Value *OpNotNeg, *NegatedOp; |
| 2946 | unsigned NegatedOpArg, OtherOpArg; |
| 2947 | if (match(V: Op0, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) { |
| 2948 | NegatedOp = Op0; |
| 2949 | NegatedOpArg = 0; |
| 2950 | OtherOpArg = 1; |
| 2951 | } else if (match(V: Op1, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) { |
| 2952 | NegatedOp = Op1; |
| 2953 | NegatedOpArg = 1; |
| 2954 | OtherOpArg = 0; |
| 2955 | } else |
| 2956 | // Multiplication doesn't have a negated operand. |
| 2957 | break; |
| 2958 | |
| 2959 | // Only optimize if the negated operand has only one use. |
| 2960 | if (!NegatedOp->hasOneUse()) |
| 2961 | break; |
| 2962 | |
| 2963 | Value *OtherOp = II->getOperand(i_nocapture: OtherOpArg); |
| 2964 | VectorType *RetTy = cast<VectorType>(Val: II->getType()); |
| 2965 | VectorType *NegatedOpTy = cast<VectorType>(Val: NegatedOp->getType()); |
| 2966 | VectorType *OtherOpTy = cast<VectorType>(Val: OtherOp->getType()); |
| 2967 | ElementCount NegatedCount = NegatedOpTy->getElementCount(); |
| 2968 | ElementCount OtherCount = OtherOpTy->getElementCount(); |
| 2969 | ElementCount RetCount = RetTy->getElementCount(); |
| 2970 | // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa. |
| 2971 | if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: OtherCount) && |
| 2972 | ElementCount::isKnownLT(LHS: OtherCount, RHS: RetCount)) { |
| 2973 | Value *InverseOtherOp = Builder.CreateFNeg(V: OtherOp); |
| 2974 | replaceOperand(I&: *II, OpNum: NegatedOpArg, V: OpNotNeg); |
| 2975 | replaceOperand(I&: *II, OpNum: OtherOpArg, V: InverseOtherOp); |
| 2976 | return II; |
| 2977 | } |
| 2978 | // (-A) * B -> -(A * B), if it is cheaper to negate the result |
| 2979 | if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: RetCount)) { |
| 2980 | SmallVector<Value *, 5> NewArgs(II->args()); |
| 2981 | NewArgs[NegatedOpArg] = OpNotNeg; |
| 2982 | Instruction *NewMul = |
| 2983 | Builder.CreateIntrinsic(RetTy: II->getType(), ID: IID, Args: NewArgs, FMFSource: II); |
| 2984 | return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: NewMul, FMFSource: II)); |
| 2985 | } |
| 2986 | break; |
| 2987 | } |
| 2988 | case Intrinsic::fmuladd: { |
| 2989 | // Try to simplify the underlying FMul. |
| 2990 | if (Value *V = |
| 2991 | simplifyFMulInst(LHS: II->getArgOperand(i: 0), RHS: II->getArgOperand(i: 1), |
| 2992 | FMF: II->getFastMathFlags(), Q: SQ.getWithInstruction(I: II))) |
| 2993 | return BinaryOperator::CreateFAddFMF(V1: V, V2: II->getArgOperand(i: 2), |
| 2994 | FMF: II->getFastMathFlags()); |
| 2995 | |
| 2996 | [[fallthrough]]; |
| 2997 | } |
| 2998 | case Intrinsic::fma: { |
| 2999 | // fma fneg(x), fneg(y), z -> fma x, y, z |
| 3000 | Value *Src0 = II->getArgOperand(i: 0); |
| 3001 | Value *Src1 = II->getArgOperand(i: 1); |
| 3002 | Value *Src2 = II->getArgOperand(i: 2); |
| 3003 | Value *X, *Y; |
| 3004 | if (match(V: Src0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Src1, P: m_FNeg(X: m_Value(V&: Y)))) { |
| 3005 | replaceOperand(I&: *II, OpNum: 0, V: X); |
| 3006 | replaceOperand(I&: *II, OpNum: 1, V: Y); |
| 3007 | return II; |
| 3008 | } |
| 3009 | |
| 3010 | // fma fabs(x), fabs(x), z -> fma x, x, z |
| 3011 | if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: X))) && |
| 3012 | match(V: Src1, P: m_FAbs(Op0: m_Specific(V: X)))) { |
| 3013 | replaceOperand(I&: *II, OpNum: 0, V: X); |
| 3014 | replaceOperand(I&: *II, OpNum: 1, V: X); |
| 3015 | return II; |
| 3016 | } |
| 3017 | |
| 3018 | // Try to simplify the underlying FMul. We can only apply simplifications |
| 3019 | // that do not require rounding. |
| 3020 | if (Value *V = simplifyFMAFMul(LHS: Src0, RHS: Src1, FMF: II->getFastMathFlags(), |
| 3021 | Q: SQ.getWithInstruction(I: II))) |
| 3022 | return BinaryOperator::CreateFAddFMF(V1: V, V2: Src2, FMF: II->getFastMathFlags()); |
| 3023 | |
| 3024 | // fma x, y, 0 -> fmul x, y |
| 3025 | // This is always valid for -0.0, but requires nsz for +0.0 as |
| 3026 | // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own. |
| 3027 | if (match(V: Src2, P: m_NegZeroFP()) || |
| 3028 | (match(V: Src2, P: m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros())) |
| 3029 | return BinaryOperator::CreateFMulFMF(V1: Src0, V2: Src1, FMFSource: II); |
| 3030 | |
| 3031 | // fma x, -1.0, y -> fsub y, x |
| 3032 | if (match(V: Src1, P: m_SpecificFP(V: -1.0))) |
| 3033 | return BinaryOperator::CreateFSubFMF(V1: Src2, V2: Src0, FMFSource: II); |
| 3034 | |
| 3035 | break; |
| 3036 | } |
| 3037 | case Intrinsic::copysign: { |
| 3038 | Value *Mag = II->getArgOperand(i: 0), *Sign = II->getArgOperand(i: 1); |
| 3039 | if (std::optional<bool> KnownSignBit = computeKnownFPSignBit( |
| 3040 | V: Sign, SQ: getSimplifyQuery().getWithInstruction(I: II))) { |
| 3041 | if (*KnownSignBit) { |
| 3042 | // If we know that the sign argument is negative, reduce to FNABS: |
| 3043 | // copysign Mag, -Sign --> fneg (fabs Mag) |
| 3044 | Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II); |
| 3045 | return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: Fabs, FMFSource: II)); |
| 3046 | } |
| 3047 | |
| 3048 | // If we know that the sign argument is positive, reduce to FABS: |
| 3049 | // copysign Mag, +Sign --> fabs Mag |
| 3050 | Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Mag, FMFSource: II); |
| 3051 | return replaceInstUsesWith(I&: *II, V: Fabs); |
| 3052 | } |
| 3053 | |
| 3054 | // Propagate sign argument through nested calls: |
| 3055 | // copysign Mag, (copysign ?, X) --> copysign Mag, X |
| 3056 | Value *X; |
| 3057 | if (match(V: Sign, P: m_Intrinsic<Intrinsic::copysign>(Op0: m_Value(), Op1: m_Value(V&: X)))) { |
| 3058 | Value *CopySign = |
| 3059 | Builder.CreateCopySign(LHS: Mag, RHS: X, FMFSource: FMFSource::intersect(A: II, B: Sign)); |
| 3060 | return replaceInstUsesWith(I&: *II, V: CopySign); |
| 3061 | } |
| 3062 | |
| 3063 | // Clear sign-bit of constant magnitude: |
| 3064 | // copysign -MagC, X --> copysign MagC, X |
| 3065 | // TODO: Support constant folding for fabs |
| 3066 | const APFloat *MagC; |
| 3067 | if (match(V: Mag, P: m_APFloat(Res&: MagC)) && MagC->isNegative()) { |
| 3068 | APFloat PosMagC = *MagC; |
| 3069 | PosMagC.clearSign(); |
| 3070 | return replaceOperand(I&: *II, OpNum: 0, V: ConstantFP::get(Ty: Mag->getType(), V: PosMagC)); |
| 3071 | } |
| 3072 | |
| 3073 | // Peek through changes of magnitude's sign-bit. This call rewrites those: |
| 3074 | // copysign (fabs X), Sign --> copysign X, Sign |
| 3075 | // copysign (fneg X), Sign --> copysign X, Sign |
| 3076 | if (match(V: Mag, P: m_FAbs(Op0: m_Value(V&: X))) || match(V: Mag, P: m_FNeg(X: m_Value(V&: X)))) |
| 3077 | return replaceOperand(I&: *II, OpNum: 0, V: X); |
| 3078 | |
| 3079 | Type *SignEltTy = Sign->getType()->getScalarType(); |
| 3080 | |
| 3081 | Value *CastSrc; |
| 3082 | if (match(V: Sign, |
| 3083 | P: m_OneUse(SubPattern: m_ElementWiseBitCast(Op: m_OneUse(SubPattern: m_Value(V&: CastSrc))))) && |
| 3084 | CastSrc->getType()->isIntOrIntVectorTy() && |
| 3085 | APFloat::hasSignBitInMSB(SignEltTy->getFltSemantics())) { |
| 3086 | KnownBits Known(SignEltTy->getPrimitiveSizeInBits()); |
| 3087 | if (SimplifyDemandedBits(I: cast<Instruction>(Val: Sign), Op: 0, |
| 3088 | DemandedMask: APInt::getSignMask(BitWidth: Known.getBitWidth()), Known, |
| 3089 | Q: SQ)) |
| 3090 | return II; |
| 3091 | } |
| 3092 | |
| 3093 | break; |
| 3094 | } |
| 3095 | case Intrinsic::fabs: { |
| 3096 | Value *Cond, *TVal, *FVal; |
| 3097 | Value *Arg = II->getArgOperand(i: 0); |
| 3098 | Value *X; |
| 3099 | // fabs (-X) --> fabs (X) |
| 3100 | if (match(V: Arg, P: m_FNeg(X: m_Value(V&: X)))) { |
| 3101 | CallInst *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: II); |
| 3102 | return replaceInstUsesWith(I&: CI, V: Fabs); |
| 3103 | } |
| 3104 | |
| 3105 | if (match(V: Arg, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))) { |
| 3106 | // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF |
| 3107 | if (Arg->hasOneUse() ? (isa<Constant>(Val: TVal) || isa<Constant>(Val: FVal)) |
| 3108 | : (isa<Constant>(Val: TVal) && isa<Constant>(Val: FVal))) { |
| 3109 | CallInst *AbsT = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {TVal}); |
| 3110 | CallInst *AbsF = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {FVal}); |
| 3111 | SelectInst *SI = SelectInst::Create(C: Cond, S1: AbsT, S2: AbsF); |
| 3112 | FastMathFlags FMF1 = II->getFastMathFlags(); |
| 3113 | FastMathFlags FMF2 = cast<SelectInst>(Val: Arg)->getFastMathFlags(); |
| 3114 | FMF2.setNoSignedZeros(false); |
| 3115 | SI->setFastMathFlags(FMF1 | FMF2); |
| 3116 | return SI; |
| 3117 | } |
| 3118 | // fabs (select Cond, -FVal, FVal) --> fabs FVal |
| 3119 | if (match(V: TVal, P: m_FNeg(X: m_Specific(V: FVal)))) |
| 3120 | return replaceOperand(I&: *II, OpNum: 0, V: FVal); |
| 3121 | // fabs (select Cond, TVal, -TVal) --> fabs TVal |
| 3122 | if (match(V: FVal, P: m_FNeg(X: m_Specific(V: TVal)))) |
| 3123 | return replaceOperand(I&: *II, OpNum: 0, V: TVal); |
| 3124 | } |
| 3125 | |
| 3126 | Value *Magnitude, *Sign; |
| 3127 | if (match(V: II->getArgOperand(i: 0), |
| 3128 | P: m_CopySign(Op0: m_Value(V&: Magnitude), Op1: m_Value(V&: Sign)))) { |
| 3129 | // fabs (copysign x, y) -> (fabs x) |
| 3130 | CallInst *AbsSign = |
| 3131 | Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Magnitude, FMFSource: II); |
| 3132 | return replaceInstUsesWith(I&: *II, V: AbsSign); |
| 3133 | } |
| 3134 | |
| 3135 | [[fallthrough]]; |
| 3136 | } |
| 3137 | case Intrinsic::ceil: |
| 3138 | case Intrinsic::floor: |
| 3139 | case Intrinsic::round: |
| 3140 | case Intrinsic::roundeven: |
| 3141 | case Intrinsic::nearbyint: |
| 3142 | case Intrinsic::rint: |
| 3143 | case Intrinsic::trunc: { |
| 3144 | Value *ExtSrc; |
| 3145 | if (match(V: II->getArgOperand(i: 0), P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: ExtSrc))))) { |
| 3146 | // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x) |
| 3147 | Value *NarrowII = Builder.CreateUnaryIntrinsic(ID: IID, V: ExtSrc, FMFSource: II); |
| 3148 | return new FPExtInst(NarrowII, II->getType()); |
| 3149 | } |
| 3150 | break; |
| 3151 | } |
| 3152 | case Intrinsic::cos: |
| 3153 | case Intrinsic::amdgcn_cos: |
| 3154 | case Intrinsic::cosh: { |
| 3155 | Value *X, *Sign; |
| 3156 | Value *Src = II->getArgOperand(i: 0); |
| 3157 | if (match(V: Src, P: m_FNeg(X: m_Value(V&: X))) || match(V: Src, P: m_FAbs(Op0: m_Value(V&: X))) || |
| 3158 | match(V: Src, P: m_CopySign(Op0: m_Value(V&: X), Op1: m_Value(V&: Sign)))) { |
| 3159 | // f(-x) --> f(x) |
| 3160 | // f(fabs(x)) --> f(x) |
| 3161 | // f(copysign(x, y)) --> f(x) |
| 3162 | // for f in {cos, cosh} |
| 3163 | return replaceOperand(I&: *II, OpNum: 0, V: X); |
| 3164 | } |
| 3165 | break; |
| 3166 | } |
| 3167 | case Intrinsic::sin: |
| 3168 | case Intrinsic::amdgcn_sin: |
| 3169 | case Intrinsic::sinh: |
| 3170 | case Intrinsic::tan: |
| 3171 | case Intrinsic::tanh: { |
| 3172 | Value *X; |
| 3173 | if (match(V: II->getArgOperand(i: 0), P: m_OneUse(SubPattern: m_FNeg(X: m_Value(V&: X))))) { |
| 3174 | // f(-x) --> -f(x) |
| 3175 | // for f in {sin, sinh, tan, tanh} |
| 3176 | Value *NewFunc = Builder.CreateUnaryIntrinsic(ID: IID, V: X, FMFSource: II); |
| 3177 | return UnaryOperator::CreateFNegFMF(Op: NewFunc, FMFSource: II); |
| 3178 | } |
| 3179 | break; |
| 3180 | } |
| 3181 | case Intrinsic::ldexp: { |
| 3182 | // ldexp(ldexp(x, a), b) -> ldexp(x, a + b) |
| 3183 | // |
| 3184 | // The danger is if the first ldexp would overflow to infinity or underflow |
| 3185 | // to zero, but the combined exponent avoids it. We ignore this with |
| 3186 | // reassoc. |
| 3187 | // |
| 3188 | // It's also safe to fold if we know both exponents are >= 0 or <= 0 since |
| 3189 | // it would just double down on the overflow/underflow which would occur |
| 3190 | // anyway. |
| 3191 | // |
| 3192 | // TODO: Could do better if we had range tracking for the input value |
| 3193 | // exponent. Also could broaden sign check to cover == 0 case. |
| 3194 | Value *Src = II->getArgOperand(i: 0); |
| 3195 | Value *Exp = II->getArgOperand(i: 1); |
| 3196 | |
| 3197 | uint64_t ConstExp; |
| 3198 | if (match(V: Exp, P: m_ConstantInt(V&: ConstExp))) { |
| 3199 | // ldexp(x, K) -> fmul x, 2^K |
| 3200 | const fltSemantics &FPTy = |
| 3201 | Src->getType()->getScalarType()->getFltSemantics(); |
| 3202 | |
| 3203 | APFloat Scaled = scalbn(X: APFloat::getOne(Sem: FPTy), Exp: static_cast<int>(ConstExp), |
| 3204 | RM: APFloat::rmNearestTiesToEven); |
| 3205 | if (!Scaled.isZero() && !Scaled.isInfinity()) { |
| 3206 | // Skip overflow and underflow cases. |
| 3207 | Constant *FPConst = ConstantFP::get(Ty: Src->getType(), V: Scaled); |
| 3208 | return BinaryOperator::CreateFMulFMF(V1: Src, V2: FPConst, FMFSource: II); |
| 3209 | } |
| 3210 | } |
| 3211 | |
| 3212 | Value *InnerSrc; |
| 3213 | Value *InnerExp; |
| 3214 | if (match(V: Src, P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ldexp>( |
| 3215 | Op0: m_Value(V&: InnerSrc), Op1: m_Value(V&: InnerExp)))) && |
| 3216 | Exp->getType() == InnerExp->getType()) { |
| 3217 | FastMathFlags FMF = II->getFastMathFlags(); |
| 3218 | FastMathFlags InnerFlags = cast<FPMathOperator>(Val: Src)->getFastMathFlags(); |
| 3219 | |
| 3220 | if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) || |
| 3221 | signBitMustBeTheSame(Op0: Exp, Op1: InnerExp, SQ: SQ.getWithInstruction(I: II))) { |
| 3222 | // TODO: Add nsw/nuw probably safe if integer type exceeds exponent |
| 3223 | // width. |
| 3224 | Value *NewExp = Builder.CreateAdd(LHS: InnerExp, RHS: Exp); |
| 3225 | II->setArgOperand(i: 1, v: NewExp); |
| 3226 | II->setFastMathFlags(InnerFlags); // Or the inner flags. |
| 3227 | return replaceOperand(I&: *II, OpNum: 0, V: InnerSrc); |
| 3228 | } |
| 3229 | } |
| 3230 | |
| 3231 | // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0) |
| 3232 | // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0) |
| 3233 | Value *ExtSrc; |
| 3234 | if (match(V: Exp, P: m_ZExt(Op: m_Value(V&: ExtSrc))) && |
| 3235 | ExtSrc->getType()->getScalarSizeInBits() == 1) { |
| 3236 | Value *Select = |
| 3237 | Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: 2.0), |
| 3238 | False: ConstantFP::get(Ty: II->getType(), V: 1.0)); |
| 3239 | return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II); |
| 3240 | } |
| 3241 | if (match(V: Exp, P: m_SExt(Op: m_Value(V&: ExtSrc))) && |
| 3242 | ExtSrc->getType()->getScalarSizeInBits() == 1) { |
| 3243 | Value *Select = |
| 3244 | Builder.CreateSelect(C: ExtSrc, True: ConstantFP::get(Ty: II->getType(), V: 0.5), |
| 3245 | False: ConstantFP::get(Ty: II->getType(), V: 1.0)); |
| 3246 | return BinaryOperator::CreateFMulFMF(V1: Src, V2: Select, FMFSource: II); |
| 3247 | } |
| 3248 | |
| 3249 | // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x |
| 3250 | // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp) |
| 3251 | /// |
| 3252 | // TODO: If we cared, should insert a canonicalize for x |
| 3253 | Value *SelectCond, *SelectLHS, *SelectRHS; |
| 3254 | if (match(V: II->getArgOperand(i: 1), |
| 3255 | P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: SelectCond), L: m_Value(V&: SelectLHS), |
| 3256 | R: m_Value(V&: SelectRHS))))) { |
| 3257 | Value *NewLdexp = nullptr; |
| 3258 | Value *Select = nullptr; |
| 3259 | if (match(V: SelectRHS, P: m_ZeroInt())) { |
| 3260 | NewLdexp = Builder.CreateLdexp(Src, Exp: SelectLHS, FMFSource: II); |
| 3261 | Select = Builder.CreateSelect(C: SelectCond, True: NewLdexp, False: Src); |
| 3262 | } else if (match(V: SelectLHS, P: m_ZeroInt())) { |
| 3263 | NewLdexp = Builder.CreateLdexp(Src, Exp: SelectRHS, FMFSource: II); |
| 3264 | Select = Builder.CreateSelect(C: SelectCond, True: Src, False: NewLdexp); |
| 3265 | } |
| 3266 | |
| 3267 | if (NewLdexp) { |
| 3268 | Select->takeName(V: II); |
| 3269 | return replaceInstUsesWith(I&: *II, V: Select); |
| 3270 | } |
| 3271 | } |
| 3272 | |
| 3273 | break; |
| 3274 | } |
| 3275 | case Intrinsic::ptrauth_auth: |
| 3276 | case Intrinsic::ptrauth_resign: { |
| 3277 | // We don't support this optimization on intrinsic calls with deactivation |
| 3278 | // symbols, which are represented using operand bundles. |
| 3279 | if (II->hasOperandBundles()) |
| 3280 | break; |
| 3281 | |
| 3282 | // (sign|resign) + (auth|resign) can be folded by omitting the middle |
| 3283 | // sign+auth component if the key and discriminator match. |
| 3284 | bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign; |
| 3285 | Value *Ptr = II->getArgOperand(i: 0); |
| 3286 | Value *Key = II->getArgOperand(i: 1); |
| 3287 | Value *Disc = II->getArgOperand(i: 2); |
| 3288 | |
| 3289 | // AuthKey will be the key we need to end up authenticating against in |
| 3290 | // whatever we replace this sequence with. |
| 3291 | Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr; |
| 3292 | if (const auto *CI = dyn_cast<CallBase>(Val: Ptr)) { |
| 3293 | // We don't support this optimization on intrinsic calls with deactivation |
| 3294 | // symbols, which are represented using operand bundles. |
| 3295 | if (CI->hasOperandBundles()) |
| 3296 | break; |
| 3297 | |
| 3298 | BasePtr = CI->getArgOperand(i: 0); |
| 3299 | if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) { |
| 3300 | if (CI->getArgOperand(i: 1) != Key || CI->getArgOperand(i: 2) != Disc) |
| 3301 | break; |
| 3302 | } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) { |
| 3303 | if (CI->getArgOperand(i: 3) != Key || CI->getArgOperand(i: 4) != Disc) |
| 3304 | break; |
| 3305 | AuthKey = CI->getArgOperand(i: 1); |
| 3306 | AuthDisc = CI->getArgOperand(i: 2); |
| 3307 | } else |
| 3308 | break; |
| 3309 | } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Val: Ptr)) { |
| 3310 | // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for |
| 3311 | // our purposes, so check for that too. |
| 3312 | const auto *CPA = dyn_cast<ConstantPtrAuth>(Val: PtrToInt->getOperand(i_nocapture: 0)); |
| 3313 | if (!CPA || !CPA->isKnownCompatibleWith(Key, Discriminator: Disc, DL)) |
| 3314 | break; |
| 3315 | |
| 3316 | // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr) |
| 3317 | if (NeedSign && isa<ConstantInt>(Val: II->getArgOperand(i: 4))) { |
| 3318 | auto *SignKey = cast<ConstantInt>(Val: II->getArgOperand(i: 3)); |
| 3319 | auto *SignDisc = cast<ConstantInt>(Val: II->getArgOperand(i: 4)); |
| 3320 | auto *Null = ConstantPointerNull::get(T: Builder.getPtrTy()); |
| 3321 | auto *NewCPA = ConstantPtrAuth::get(Ptr: CPA->getPointer(), Key: SignKey, |
| 3322 | Disc: SignDisc, /*AddrDisc=*/Null, |
| 3323 | /*DeactivationSymbol=*/Null); |
| 3324 | replaceInstUsesWith( |
| 3325 | I&: *II, V: ConstantExpr::getPointerCast(C: NewCPA, Ty: II->getType())); |
| 3326 | return eraseInstFromFunction(I&: *II); |
| 3327 | } |
| 3328 | |
| 3329 | // auth(ptrauth(p,k,d),k,d) -> p |
| 3330 | BasePtr = Builder.CreatePtrToInt(V: CPA->getPointer(), DestTy: II->getType()); |
| 3331 | } else |
| 3332 | break; |
| 3333 | |
| 3334 | unsigned NewIntrin; |
| 3335 | if (AuthKey && NeedSign) { |
| 3336 | // resign(0,1) + resign(1,2) = resign(0, 2) |
| 3337 | NewIntrin = Intrinsic::ptrauth_resign; |
| 3338 | } else if (AuthKey) { |
| 3339 | // resign(0,1) + auth(1) = auth(0) |
| 3340 | NewIntrin = Intrinsic::ptrauth_auth; |
| 3341 | } else if (NeedSign) { |
| 3342 | // sign(0) + resign(0, 1) = sign(1) |
| 3343 | NewIntrin = Intrinsic::ptrauth_sign; |
| 3344 | } else { |
| 3345 | // sign(0) + auth(0) = nop |
| 3346 | replaceInstUsesWith(I&: *II, V: BasePtr); |
| 3347 | return eraseInstFromFunction(I&: *II); |
| 3348 | } |
| 3349 | |
| 3350 | SmallVector<Value *, 4> CallArgs; |
| 3351 | CallArgs.push_back(Elt: BasePtr); |
| 3352 | if (AuthKey) { |
| 3353 | CallArgs.push_back(Elt: AuthKey); |
| 3354 | CallArgs.push_back(Elt: AuthDisc); |
| 3355 | } |
| 3356 | |
| 3357 | if (NeedSign) { |
| 3358 | CallArgs.push_back(Elt: II->getArgOperand(i: 3)); |
| 3359 | CallArgs.push_back(Elt: II->getArgOperand(i: 4)); |
| 3360 | } |
| 3361 | |
| 3362 | Function *NewFn = |
| 3363 | Intrinsic::getOrInsertDeclaration(M: II->getModule(), id: NewIntrin); |
| 3364 | return CallInst::Create(Func: NewFn, Args: CallArgs); |
| 3365 | } |
| 3366 | case Intrinsic::arm_neon_vtbl1: |
| 3367 | case Intrinsic::arm_neon_vtbl2: |
| 3368 | case Intrinsic::arm_neon_vtbl3: |
| 3369 | case Intrinsic::arm_neon_vtbl4: |
| 3370 | case Intrinsic::aarch64_neon_tbl1: |
| 3371 | case Intrinsic::aarch64_neon_tbl2: |
| 3372 | case Intrinsic::aarch64_neon_tbl3: |
| 3373 | case Intrinsic::aarch64_neon_tbl4: |
| 3374 | return simplifyNeonTbl(II&: *II, IC&: *this, /*IsExtension=*/false); |
| 3375 | case Intrinsic::arm_neon_vtbx1: |
| 3376 | case Intrinsic::arm_neon_vtbx2: |
| 3377 | case Intrinsic::arm_neon_vtbx3: |
| 3378 | case Intrinsic::arm_neon_vtbx4: |
| 3379 | case Intrinsic::aarch64_neon_tbx1: |
| 3380 | case Intrinsic::aarch64_neon_tbx2: |
| 3381 | case Intrinsic::aarch64_neon_tbx3: |
| 3382 | case Intrinsic::aarch64_neon_tbx4: |
| 3383 | return simplifyNeonTbl(II&: *II, IC&: *this, /*IsExtension=*/true); |
| 3384 | |
| 3385 | case Intrinsic::arm_neon_vmulls: |
| 3386 | case Intrinsic::arm_neon_vmullu: |
| 3387 | case Intrinsic::aarch64_neon_smull: |
| 3388 | case Intrinsic::aarch64_neon_umull: { |
| 3389 | Value *Arg0 = II->getArgOperand(i: 0); |
| 3390 | Value *Arg1 = II->getArgOperand(i: 1); |
| 3391 | |
| 3392 | // Handle mul by zero first: |
| 3393 | if (isa<ConstantAggregateZero>(Val: Arg0) || isa<ConstantAggregateZero>(Val: Arg1)) { |
| 3394 | return replaceInstUsesWith(I&: CI, V: ConstantAggregateZero::get(Ty: II->getType())); |
| 3395 | } |
| 3396 | |
| 3397 | // Check for constant LHS & RHS - in this case we just simplify. |
| 3398 | bool Zext = (IID == Intrinsic::arm_neon_vmullu || |
| 3399 | IID == Intrinsic::aarch64_neon_umull); |
| 3400 | VectorType *NewVT = cast<VectorType>(Val: II->getType()); |
| 3401 | if (Constant *CV0 = dyn_cast<Constant>(Val: Arg0)) { |
| 3402 | if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1)) { |
| 3403 | Value *V0 = Builder.CreateIntCast(V: CV0, DestTy: NewVT, /*isSigned=*/!Zext); |
| 3404 | Value *V1 = Builder.CreateIntCast(V: CV1, DestTy: NewVT, /*isSigned=*/!Zext); |
| 3405 | return replaceInstUsesWith(I&: CI, V: Builder.CreateMul(LHS: V0, RHS: V1)); |
| 3406 | } |
| 3407 | |
| 3408 | // Couldn't simplify - canonicalize constant to the RHS. |
| 3409 | std::swap(a&: Arg0, b&: Arg1); |
| 3410 | } |
| 3411 | |
| 3412 | // Handle mul by one: |
| 3413 | if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1)) |
| 3414 | if (ConstantInt *Splat = |
| 3415 | dyn_cast_or_null<ConstantInt>(Val: CV1->getSplatValue())) |
| 3416 | if (Splat->isOne()) |
| 3417 | return CastInst::CreateIntegerCast(S: Arg0, Ty: II->getType(), |
| 3418 | /*isSigned=*/!Zext); |
| 3419 | |
| 3420 | break; |
| 3421 | } |
| 3422 | case Intrinsic::arm_neon_aesd: |
| 3423 | case Intrinsic::arm_neon_aese: |
| 3424 | case Intrinsic::aarch64_crypto_aesd: |
| 3425 | case Intrinsic::aarch64_crypto_aese: |
| 3426 | case Intrinsic::aarch64_sve_aesd: |
| 3427 | case Intrinsic::aarch64_sve_aese: { |
| 3428 | Value *DataArg = II->getArgOperand(i: 0); |
| 3429 | Value *KeyArg = II->getArgOperand(i: 1); |
| 3430 | |
| 3431 | // Accept zero on either operand. |
| 3432 | if (!match(V: KeyArg, P: m_ZeroInt())) |
| 3433 | std::swap(a&: KeyArg, b&: DataArg); |
| 3434 | |
| 3435 | // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR |
| 3436 | Value *Data, *Key; |
| 3437 | if (match(V: KeyArg, P: m_ZeroInt()) && |
| 3438 | match(V: DataArg, P: m_Xor(L: m_Value(V&: Data), R: m_Value(V&: Key)))) { |
| 3439 | replaceOperand(I&: *II, OpNum: 0, V: Data); |
| 3440 | replaceOperand(I&: *II, OpNum: 1, V: Key); |
| 3441 | return II; |
| 3442 | } |
| 3443 | break; |
| 3444 | } |
| 3445 | case Intrinsic::arm_neon_vshifts: |
| 3446 | case Intrinsic::arm_neon_vshiftu: |
| 3447 | case Intrinsic::aarch64_neon_sshl: |
| 3448 | case Intrinsic::aarch64_neon_ushl: |
| 3449 | return foldNeonShift(II, IC&: *this); |
| 3450 | case Intrinsic::hexagon_V6_vandvrt: |
| 3451 | case Intrinsic::hexagon_V6_vandvrt_128B: { |
| 3452 | // Simplify Q -> V -> Q conversion. |
| 3453 | if (auto Op0 = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0))) { |
| 3454 | Intrinsic::ID ID0 = Op0->getIntrinsicID(); |
| 3455 | if (ID0 != Intrinsic::hexagon_V6_vandqrt && |
| 3456 | ID0 != Intrinsic::hexagon_V6_vandqrt_128B) |
| 3457 | break; |
| 3458 | Value *Bytes = Op0->getArgOperand(i: 1), *Mask = II->getArgOperand(i: 1); |
| 3459 | uint64_t Bytes1 = computeKnownBits(V: Bytes, CxtI: Op0).One.getZExtValue(); |
| 3460 | uint64_t Mask1 = computeKnownBits(V: Mask, CxtI: II).One.getZExtValue(); |
| 3461 | // Check if every byte has common bits in Bytes and Mask. |
| 3462 | uint64_t C = Bytes1 & Mask1; |
| 3463 | if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000)) |
| 3464 | return replaceInstUsesWith(I&: *II, V: Op0->getArgOperand(i: 0)); |
| 3465 | } |
| 3466 | break; |
| 3467 | } |
| 3468 | case Intrinsic::stackrestore: { |
| 3469 | enum class ClassifyResult { |
| 3470 | None, |
| 3471 | Alloca, |
| 3472 | StackRestore, |
| 3473 | CallWithSideEffects, |
| 3474 | }; |
| 3475 | auto Classify = [](const Instruction *I) { |
| 3476 | if (isa<AllocaInst>(Val: I)) |
| 3477 | return ClassifyResult::Alloca; |
| 3478 | |
| 3479 | if (auto *CI = dyn_cast<CallInst>(Val: I)) { |
| 3480 | if (auto *II = dyn_cast<IntrinsicInst>(Val: CI)) { |
| 3481 | if (II->getIntrinsicID() == Intrinsic::stackrestore) |
| 3482 | return ClassifyResult::StackRestore; |
| 3483 | |
| 3484 | if (II->mayHaveSideEffects()) |
| 3485 | return ClassifyResult::CallWithSideEffects; |
| 3486 | } else { |
| 3487 | // Consider all non-intrinsic calls to be side effects |
| 3488 | return ClassifyResult::CallWithSideEffects; |
| 3489 | } |
| 3490 | } |
| 3491 | |
| 3492 | return ClassifyResult::None; |
| 3493 | }; |
| 3494 | |
| 3495 | // If the stacksave and the stackrestore are in the same BB, and there is |
| 3496 | // no intervening call, alloca, or stackrestore of a different stacksave, |
| 3497 | // remove the restore. This can happen when variable allocas are DCE'd. |
| 3498 | if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0))) { |
| 3499 | if (SS->getIntrinsicID() == Intrinsic::stacksave && |
| 3500 | SS->getParent() == II->getParent()) { |
| 3501 | BasicBlock::iterator BI(SS); |
| 3502 | bool CannotRemove = false; |
| 3503 | for (++BI; &*BI != II; ++BI) { |
| 3504 | switch (Classify(&*BI)) { |
| 3505 | case ClassifyResult::None: |
| 3506 | // So far so good, look at next instructions. |
| 3507 | break; |
| 3508 | |
| 3509 | case ClassifyResult::StackRestore: |
| 3510 | // If we found an intervening stackrestore for a different |
| 3511 | // stacksave, we can't remove the stackrestore. Otherwise, continue. |
| 3512 | if (cast<IntrinsicInst>(Val&: *BI).getArgOperand(i: 0) != SS) |
| 3513 | CannotRemove = true; |
| 3514 | break; |
| 3515 | |
| 3516 | case ClassifyResult::Alloca: |
| 3517 | case ClassifyResult::CallWithSideEffects: |
| 3518 | // If we found an alloca, a non-intrinsic call, or an intrinsic |
| 3519 | // call with side effects, we can't remove the stackrestore. |
| 3520 | CannotRemove = true; |
| 3521 | break; |
| 3522 | } |
| 3523 | if (CannotRemove) |
| 3524 | break; |
| 3525 | } |
| 3526 | |
| 3527 | if (!CannotRemove) |
| 3528 | return eraseInstFromFunction(I&: CI); |
| 3529 | } |
| 3530 | } |
| 3531 | |
| 3532 | // Scan down this block to see if there is another stack restore in the |
| 3533 | // same block without an intervening call/alloca. |
| 3534 | BasicBlock::iterator BI(II); |
| 3535 | Instruction *TI = II->getParent()->getTerminator(); |
| 3536 | bool CannotRemove = false; |
| 3537 | for (++BI; &*BI != TI; ++BI) { |
| 3538 | switch (Classify(&*BI)) { |
| 3539 | case ClassifyResult::None: |
| 3540 | // So far so good, look at next instructions. |
| 3541 | break; |
| 3542 | |
| 3543 | case ClassifyResult::StackRestore: |
| 3544 | // If there is a stackrestore below this one, remove this one. |
| 3545 | return eraseInstFromFunction(I&: CI); |
| 3546 | |
| 3547 | case ClassifyResult::Alloca: |
| 3548 | case ClassifyResult::CallWithSideEffects: |
| 3549 | // If we found an alloca, a non-intrinsic call, or an intrinsic call |
| 3550 | // with side effects (such as llvm.stacksave and llvm.read_register), |
| 3551 | // we can't remove the stack restore. |
| 3552 | CannotRemove = true; |
| 3553 | break; |
| 3554 | } |
| 3555 | if (CannotRemove) |
| 3556 | break; |
| 3557 | } |
| 3558 | |
| 3559 | // If the stack restore is in a return, resume, or unwind block and if there |
| 3560 | // are no allocas or calls between the restore and the return, nuke the |
| 3561 | // restore. |
| 3562 | if (!CannotRemove && (isa<ReturnInst>(Val: TI) || isa<ResumeInst>(Val: TI))) |
| 3563 | return eraseInstFromFunction(I&: CI); |
| 3564 | break; |
| 3565 | } |
| 3566 | case Intrinsic::lifetime_end: |
| 3567 | // Asan needs to poison memory to detect invalid access which is possible |
| 3568 | // even for empty lifetime range. |
| 3569 | if (II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeAddress) || |
| 3570 | II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeMemory) || |
| 3571 | II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeHWAddress) || |
| 3572 | II->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeMemTag)) |
| 3573 | break; |
| 3574 | |
| 3575 | if (removeTriviallyEmptyRange(EndI&: *II, IC&: *this, IsStart: [](const IntrinsicInst &I) { |
| 3576 | return I.getIntrinsicID() == Intrinsic::lifetime_start; |
| 3577 | })) |
| 3578 | return nullptr; |
| 3579 | break; |
| 3580 | case Intrinsic::assume: { |
| 3581 | Value *IIOperand = II->getArgOperand(i: 0); |
| 3582 | SmallVector<OperandBundleDef, 4> OpBundles; |
| 3583 | II->getOperandBundlesAsDefs(Defs&: OpBundles); |
| 3584 | |
| 3585 | /// This will remove the boolean Condition from the assume given as |
| 3586 | /// argument and remove the assume if it becomes useless. |
| 3587 | /// always returns nullptr for use as a return values. |
| 3588 | auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * { |
| 3589 | assert(isa<AssumeInst>(Assume)); |
| 3590 | if (isAssumeWithEmptyBundle(Assume: *cast<AssumeInst>(Val: II))) |
| 3591 | return eraseInstFromFunction(I&: CI); |
| 3592 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: ConstantInt::getTrue(Context&: II->getContext())); |
| 3593 | return nullptr; |
| 3594 | }; |
| 3595 | // Remove an assume if it is followed by an identical assume. |
| 3596 | // TODO: Do we need this? Unless there are conflicting assumptions, the |
| 3597 | // computeKnownBits(IIOperand) below here eliminates redundant assumes. |
| 3598 | Instruction *Next = II->getNextNode(); |
| 3599 | if (match(V: Next, P: m_Intrinsic<Intrinsic::assume>(Op0: m_Specific(V: IIOperand)))) |
| 3600 | return RemoveConditionFromAssume(Next); |
| 3601 | |
| 3602 | // Canonicalize assume(a && b) -> assume(a); assume(b); |
| 3603 | // Note: New assumption intrinsics created here are registered by |
| 3604 | // the InstCombineIRInserter object. |
| 3605 | FunctionType *AssumeIntrinsicTy = II->getFunctionType(); |
| 3606 | Value *AssumeIntrinsic = II->getCalledOperand(); |
| 3607 | Value *A, *B; |
| 3608 | if (match(V: IIOperand, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
| 3609 | Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: A, OpBundles, |
| 3610 | Name: II->getName()); |
| 3611 | Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: B, Name: II->getName()); |
| 3612 | return eraseInstFromFunction(I&: *II); |
| 3613 | } |
| 3614 | // assume(!(a || b)) -> assume(!a); assume(!b); |
| 3615 | if (match(V: IIOperand, P: m_Not(V: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) { |
| 3616 | Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, |
| 3617 | Args: Builder.CreateNot(V: A), OpBundles, Name: II->getName()); |
| 3618 | Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, |
| 3619 | Args: Builder.CreateNot(V: B), Name: II->getName()); |
| 3620 | return eraseInstFromFunction(I&: *II); |
| 3621 | } |
| 3622 | |
| 3623 | // assume( (load addr) != null ) -> add 'nonnull' metadata to load |
| 3624 | // (if assume is valid at the load) |
| 3625 | Instruction *LHS; |
| 3626 | if (match(V: IIOperand, P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_NE, L: m_Instruction(I&: LHS), |
| 3627 | R: m_Zero())) && |
| 3628 | LHS->getOpcode() == Instruction::Load && |
| 3629 | LHS->getType()->isPointerTy() && |
| 3630 | isValidAssumeForContext(I: II, CxtI: LHS, DT: &DT)) { |
| 3631 | MDNode *MD = MDNode::get(Context&: II->getContext(), MDs: {}); |
| 3632 | LHS->setMetadata(KindID: LLVMContext::MD_nonnull, Node: MD); |
| 3633 | LHS->setMetadata(KindID: LLVMContext::MD_noundef, Node: MD); |
| 3634 | return RemoveConditionFromAssume(II); |
| 3635 | |
| 3636 | // TODO: apply nonnull return attributes to calls and invokes |
| 3637 | // TODO: apply range metadata for range check patterns? |
| 3638 | } |
| 3639 | |
| 3640 | for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) { |
| 3641 | OperandBundleUse OBU = II->getOperandBundleAt(Index: Idx); |
| 3642 | |
| 3643 | // Separate storage assumptions apply to the underlying allocations, not |
| 3644 | // any particular pointer within them. When evaluating the hints for AA |
| 3645 | // purposes we getUnderlyingObject them; by precomputing the answers here |
| 3646 | // we can avoid having to do so repeatedly there. |
| 3647 | if (OBU.getTagName() == "separate_storage" ) { |
| 3648 | assert(OBU.Inputs.size() == 2); |
| 3649 | auto MaybeSimplifyHint = [&](const Use &U) { |
| 3650 | Value *Hint = U.get(); |
| 3651 | // Not having a limit is safe because InstCombine removes unreachable |
| 3652 | // code. |
| 3653 | Value *UnderlyingObject = getUnderlyingObject(V: Hint, /*MaxLookup*/ 0); |
| 3654 | if (Hint != UnderlyingObject) |
| 3655 | replaceUse(U&: const_cast<Use &>(U), NewValue: UnderlyingObject); |
| 3656 | }; |
| 3657 | MaybeSimplifyHint(OBU.Inputs[0]); |
| 3658 | MaybeSimplifyHint(OBU.Inputs[1]); |
| 3659 | } |
| 3660 | |
| 3661 | // Try to remove redundant alignment assumptions. |
| 3662 | if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) { |
| 3663 | RetainedKnowledge RK = getKnowledgeFromOperandInAssume( |
| 3664 | Assume&: *cast<AssumeInst>(Val: II), Idx: II->arg_size() + Idx); |
| 3665 | if (!RK || RK.AttrKind != Attribute::Alignment || |
| 3666 | !isPowerOf2_64(Value: RK.ArgValue) || !isa<ConstantInt>(Val: RK.IRArgValue)) |
| 3667 | continue; |
| 3668 | |
| 3669 | // Remove align 1 bundles; they don't add any useful information. |
| 3670 | if (RK.ArgValue == 1) |
| 3671 | return CallBase::removeOperandBundle(CB: II, ID: OBU.getTagID()); |
| 3672 | |
| 3673 | // Don't try to remove align assumptions for pointers derived from |
| 3674 | // arguments. We might lose information if the function gets inline and |
| 3675 | // the align argument attribute disappears. |
| 3676 | Value *UO = getUnderlyingObject(V: RK.WasOn); |
| 3677 | if (!UO || isa<Argument>(Val: UO)) |
| 3678 | continue; |
| 3679 | |
| 3680 | // Compute known bits for the pointer, passing nullptr as context to |
| 3681 | // avoid computeKnownBits using the assumption we are about to remove |
| 3682 | // for reasoning. |
| 3683 | KnownBits Known = computeKnownBits(V: RK.WasOn, /*CtxI=*/CxtI: nullptr); |
| 3684 | unsigned TZ = std::min(a: Known.countMinTrailingZeros(), |
| 3685 | b: Value::MaxAlignmentExponent); |
| 3686 | if ((1ULL << TZ) < RK.ArgValue) |
| 3687 | continue; |
| 3688 | return CallBase::removeOperandBundle(CB: II, ID: OBU.getTagID()); |
| 3689 | } |
| 3690 | |
| 3691 | if (OBU.getTagName() == "nonnull" && OBU.Inputs.size() == 1) { |
| 3692 | RetainedKnowledge RK = getKnowledgeFromOperandInAssume( |
| 3693 | Assume&: *cast<AssumeInst>(Val: II), Idx: II->arg_size() + Idx); |
| 3694 | if (!RK || RK.AttrKind != Attribute::NonNull || |
| 3695 | !isKnownNonZero(V: RK.WasOn, |
| 3696 | Q: getSimplifyQuery().getWithInstruction(I: II))) |
| 3697 | continue; |
| 3698 | return CallBase::removeOperandBundle(CB: II, ID: OBU.getTagID()); |
| 3699 | } |
| 3700 | } |
| 3701 | |
| 3702 | // Convert nonnull assume like: |
| 3703 | // %A = icmp ne i32* %PTR, null |
| 3704 | // call void @llvm.assume(i1 %A) |
| 3705 | // into |
| 3706 | // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ] |
| 3707 | if (EnableKnowledgeRetention && |
| 3708 | match(V: IIOperand, |
| 3709 | P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_NE, L: m_Value(V&: A), R: m_Zero())) && |
| 3710 | A->getType()->isPointerTy()) { |
| 3711 | if (auto *Replacement = buildAssumeFromKnowledge( |
| 3712 | Knowledge: {RetainedKnowledge{Attribute::NonNull, 0, A}}, CtxI: Next, AC: &AC, DT: &DT)) { |
| 3713 | |
| 3714 | Replacement->insertBefore(InsertPos: Next->getIterator()); |
| 3715 | AC.registerAssumption(CI: Replacement); |
| 3716 | return RemoveConditionFromAssume(II); |
| 3717 | } |
| 3718 | } |
| 3719 | |
| 3720 | // Convert alignment assume like: |
| 3721 | // %B = ptrtoint i32* %A to i64 |
| 3722 | // %C = and i64 %B, Constant |
| 3723 | // %D = icmp eq i64 %C, 0 |
| 3724 | // call void @llvm.assume(i1 %D) |
| 3725 | // into |
| 3726 | // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)] |
| 3727 | uint64_t AlignMask = 1; |
| 3728 | if ((match(V: IIOperand, P: m_Not(V: m_Trunc(Op: m_Value(V&: A)))) || |
| 3729 | match(V: IIOperand, |
| 3730 | P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, |
| 3731 | L: m_And(L: m_Value(V&: A), R: m_ConstantInt(V&: AlignMask)), |
| 3732 | R: m_Zero())))) { |
| 3733 | if (isPowerOf2_64(Value: AlignMask + 1)) { |
| 3734 | uint64_t Offset = 0; |
| 3735 | match(V: A, P: m_Add(L: m_Value(V&: A), R: m_ConstantInt(V&: Offset))); |
| 3736 | if (match(V: A, P: m_PtrToIntOrAddr(Op: m_Value(V&: A)))) { |
| 3737 | /// Note: this doesn't preserve the offset information but merges |
| 3738 | /// offset and alignment. |
| 3739 | /// TODO: we can generate a GEP instead of merging the alignment with |
| 3740 | /// the offset. |
| 3741 | RetainedKnowledge RK{Attribute::Alignment, |
| 3742 | MinAlign(A: Offset, B: AlignMask + 1), A}; |
| 3743 | if (auto *Replacement = |
| 3744 | buildAssumeFromKnowledge(Knowledge: RK, CtxI: Next, AC: &AC, DT: &DT)) { |
| 3745 | |
| 3746 | Replacement->insertAfter(InsertPos: II->getIterator()); |
| 3747 | AC.registerAssumption(CI: Replacement); |
| 3748 | } |
| 3749 | return RemoveConditionFromAssume(II); |
| 3750 | } |
| 3751 | } |
| 3752 | } |
| 3753 | |
| 3754 | /// Canonicalize Knowledge in operand bundles. |
| 3755 | if (EnableKnowledgeRetention && II->hasOperandBundles()) { |
| 3756 | for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) { |
| 3757 | auto &BOI = II->bundle_op_info_begin()[Idx]; |
| 3758 | RetainedKnowledge RK = |
| 3759 | llvm::getKnowledgeFromBundle(Assume&: cast<AssumeInst>(Val&: *II), BOI); |
| 3760 | if (BOI.End - BOI.Begin > 2) |
| 3761 | continue; // Prevent reducing knowledge in an align with offset since |
| 3762 | // extracting a RetainedKnowledge from them looses offset |
| 3763 | // information |
| 3764 | RetainedKnowledge CanonRK = |
| 3765 | llvm::simplifyRetainedKnowledge(Assume: cast<AssumeInst>(Val: II), RK, |
| 3766 | AC: &getAssumptionCache(), |
| 3767 | DT: &getDominatorTree()); |
| 3768 | if (CanonRK == RK) |
| 3769 | continue; |
| 3770 | if (!CanonRK) { |
| 3771 | if (BOI.End - BOI.Begin > 0) { |
| 3772 | Worklist.pushValue(V: II->op_begin()[BOI.Begin]); |
| 3773 | Value::dropDroppableUse(U&: II->op_begin()[BOI.Begin]); |
| 3774 | } |
| 3775 | continue; |
| 3776 | } |
| 3777 | assert(RK.AttrKind == CanonRK.AttrKind); |
| 3778 | if (BOI.End - BOI.Begin > 0) |
| 3779 | II->op_begin()[BOI.Begin].set(CanonRK.WasOn); |
| 3780 | if (BOI.End - BOI.Begin > 1) |
| 3781 | II->op_begin()[BOI.Begin + 1].set(ConstantInt::get( |
| 3782 | Ty: Type::getInt64Ty(C&: II->getContext()), V: CanonRK.ArgValue)); |
| 3783 | if (RK.WasOn) |
| 3784 | Worklist.pushValue(V: RK.WasOn); |
| 3785 | return II; |
| 3786 | } |
| 3787 | } |
| 3788 | |
| 3789 | // If there is a dominating assume with the same condition as this one, |
| 3790 | // then this one is redundant, and should be removed. |
| 3791 | KnownBits Known(1); |
| 3792 | computeKnownBits(V: IIOperand, Known, CxtI: II); |
| 3793 | if (Known.isAllOnes() && isAssumeWithEmptyBundle(Assume: cast<AssumeInst>(Val&: *II))) |
| 3794 | return eraseInstFromFunction(I&: *II); |
| 3795 | |
| 3796 | // assume(false) is unreachable. |
| 3797 | if (match(V: IIOperand, P: m_CombineOr(L: m_Zero(), R: m_Undef()))) { |
| 3798 | CreateNonTerminatorUnreachable(InsertAt: II); |
| 3799 | return eraseInstFromFunction(I&: *II); |
| 3800 | } |
| 3801 | |
| 3802 | // Update the cache of affected values for this assumption (we might be |
| 3803 | // here because we just simplified the condition). |
| 3804 | AC.updateAffectedValues(CI: cast<AssumeInst>(Val: II)); |
| 3805 | break; |
| 3806 | } |
| 3807 | case Intrinsic::experimental_guard: { |
| 3808 | // Is this guard followed by another guard? We scan forward over a small |
| 3809 | // fixed window of instructions to handle common cases with conditions |
| 3810 | // computed between guards. |
| 3811 | Instruction *NextInst = II->getNextNode(); |
| 3812 | for (unsigned i = 0; i < GuardWideningWindow; i++) { |
| 3813 | // Note: Using context-free form to avoid compile time blow up |
| 3814 | if (!isSafeToSpeculativelyExecute(I: NextInst)) |
| 3815 | break; |
| 3816 | NextInst = NextInst->getNextNode(); |
| 3817 | } |
| 3818 | Value *NextCond = nullptr; |
| 3819 | if (match(V: NextInst, |
| 3820 | P: m_Intrinsic<Intrinsic::experimental_guard>(Op0: m_Value(V&: NextCond)))) { |
| 3821 | Value *CurrCond = II->getArgOperand(i: 0); |
| 3822 | |
| 3823 | // Remove a guard that it is immediately preceded by an identical guard. |
| 3824 | // Otherwise canonicalize guard(a); guard(b) -> guard(a & b). |
| 3825 | if (CurrCond != NextCond) { |
| 3826 | Instruction *MoveI = II->getNextNode(); |
| 3827 | while (MoveI != NextInst) { |
| 3828 | auto *Temp = MoveI; |
| 3829 | MoveI = MoveI->getNextNode(); |
| 3830 | Temp->moveBefore(InsertPos: II->getIterator()); |
| 3831 | } |
| 3832 | replaceOperand(I&: *II, OpNum: 0, V: Builder.CreateAnd(LHS: CurrCond, RHS: NextCond)); |
| 3833 | } |
| 3834 | eraseInstFromFunction(I&: *NextInst); |
| 3835 | return II; |
| 3836 | } |
| 3837 | break; |
| 3838 | } |
| 3839 | case Intrinsic::vector_insert: { |
| 3840 | Value *Vec = II->getArgOperand(i: 0); |
| 3841 | Value *SubVec = II->getArgOperand(i: 1); |
| 3842 | Value *Idx = II->getArgOperand(i: 2); |
| 3843 | auto *DstTy = dyn_cast<FixedVectorType>(Val: II->getType()); |
| 3844 | auto *VecTy = dyn_cast<FixedVectorType>(Val: Vec->getType()); |
| 3845 | auto *SubVecTy = dyn_cast<FixedVectorType>(Val: SubVec->getType()); |
| 3846 | |
| 3847 | // Only canonicalize if the destination vector, Vec, and SubVec are all |
| 3848 | // fixed vectors. |
| 3849 | if (DstTy && VecTy && SubVecTy) { |
| 3850 | unsigned DstNumElts = DstTy->getNumElements(); |
| 3851 | unsigned VecNumElts = VecTy->getNumElements(); |
| 3852 | unsigned SubVecNumElts = SubVecTy->getNumElements(); |
| 3853 | unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue(); |
| 3854 | |
| 3855 | // An insert that entirely overwrites Vec with SubVec is a nop. |
| 3856 | if (VecNumElts == SubVecNumElts) |
| 3857 | return replaceInstUsesWith(I&: CI, V: SubVec); |
| 3858 | |
| 3859 | // Widen SubVec into a vector of the same width as Vec, since |
| 3860 | // shufflevector requires the two input vectors to be the same width. |
| 3861 | // Elements beyond the bounds of SubVec within the widened vector are |
| 3862 | // undefined. |
| 3863 | SmallVector<int, 8> WidenMask; |
| 3864 | unsigned i; |
| 3865 | for (i = 0; i != SubVecNumElts; ++i) |
| 3866 | WidenMask.push_back(Elt: i); |
| 3867 | for (; i != VecNumElts; ++i) |
| 3868 | WidenMask.push_back(Elt: PoisonMaskElem); |
| 3869 | |
| 3870 | Value *WidenShuffle = Builder.CreateShuffleVector(V: SubVec, Mask: WidenMask); |
| 3871 | |
| 3872 | SmallVector<int, 8> Mask; |
| 3873 | for (unsigned i = 0; i != IdxN; ++i) |
| 3874 | Mask.push_back(Elt: i); |
| 3875 | for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i) |
| 3876 | Mask.push_back(Elt: i); |
| 3877 | for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i) |
| 3878 | Mask.push_back(Elt: i); |
| 3879 | |
| 3880 | Value *Shuffle = Builder.CreateShuffleVector(V1: Vec, V2: WidenShuffle, Mask); |
| 3881 | return replaceInstUsesWith(I&: CI, V: Shuffle); |
| 3882 | } |
| 3883 | break; |
| 3884 | } |
| 3885 | case Intrinsic::vector_extract: { |
| 3886 | Value *Vec = II->getArgOperand(i: 0); |
| 3887 | Value *Idx = II->getArgOperand(i: 1); |
| 3888 | |
| 3889 | Type *ReturnType = II->getType(); |
| 3890 | // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx), |
| 3891 | // ExtractIdx) |
| 3892 | unsigned = cast<ConstantInt>(Val: Idx)->getZExtValue(); |
| 3893 | Value *InsertTuple, *InsertIdx, *InsertValue; |
| 3894 | if (match(V: Vec, P: m_Intrinsic<Intrinsic::vector_insert>(Op0: m_Value(V&: InsertTuple), |
| 3895 | Op1: m_Value(V&: InsertValue), |
| 3896 | Op2: m_Value(V&: InsertIdx))) && |
| 3897 | InsertValue->getType() == ReturnType) { |
| 3898 | unsigned Index = cast<ConstantInt>(Val: InsertIdx)->getZExtValue(); |
| 3899 | // Case where we get the same index right after setting it. |
| 3900 | // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) --> |
| 3901 | // InsertValue |
| 3902 | if (ExtractIdx == Index) |
| 3903 | return replaceInstUsesWith(I&: CI, V: InsertValue); |
| 3904 | // If we are getting a different index than what was set in the |
| 3905 | // insert.vector intrinsic. We can just set the input tuple to the one up |
| 3906 | // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue, |
| 3907 | // InsertIndex), ExtractIndex) |
| 3908 | // --> extract.vector(InsertTuple, ExtractIndex) |
| 3909 | else |
| 3910 | return replaceOperand(I&: CI, OpNum: 0, V: InsertTuple); |
| 3911 | } |
| 3912 | |
| 3913 | auto *DstTy = dyn_cast<VectorType>(Val: ReturnType); |
| 3914 | auto *VecTy = dyn_cast<VectorType>(Val: Vec->getType()); |
| 3915 | |
| 3916 | if (DstTy && VecTy) { |
| 3917 | auto DstEltCnt = DstTy->getElementCount(); |
| 3918 | auto VecEltCnt = VecTy->getElementCount(); |
| 3919 | unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue(); |
| 3920 | |
| 3921 | // Extracting the entirety of Vec is a nop. |
| 3922 | if (DstEltCnt == VecTy->getElementCount()) { |
| 3923 | replaceInstUsesWith(I&: CI, V: Vec); |
| 3924 | return eraseInstFromFunction(I&: CI); |
| 3925 | } |
| 3926 | |
| 3927 | // Only canonicalize to shufflevector if the destination vector and |
| 3928 | // Vec are fixed vectors. |
| 3929 | if (VecEltCnt.isScalable() || DstEltCnt.isScalable()) |
| 3930 | break; |
| 3931 | |
| 3932 | SmallVector<int, 8> Mask; |
| 3933 | for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i) |
| 3934 | Mask.push_back(Elt: IdxN + i); |
| 3935 | |
| 3936 | Value *Shuffle = Builder.CreateShuffleVector(V: Vec, Mask); |
| 3937 | return replaceInstUsesWith(I&: CI, V: Shuffle); |
| 3938 | } |
| 3939 | break; |
| 3940 | } |
| 3941 | case Intrinsic::experimental_vp_reverse: { |
| 3942 | Value *X; |
| 3943 | Value *Vec = II->getArgOperand(i: 0); |
| 3944 | Value *Mask = II->getArgOperand(i: 1); |
| 3945 | if (!match(V: Mask, P: m_AllOnes())) |
| 3946 | break; |
| 3947 | Value *EVL = II->getArgOperand(i: 2); |
| 3948 | // TODO: Canonicalize experimental.vp.reverse after unop/binops? |
| 3949 | // rev(unop rev(X)) --> unop X |
| 3950 | if (match(V: Vec, |
| 3951 | P: m_OneUse(SubPattern: m_UnOp(X: m_Intrinsic<Intrinsic::experimental_vp_reverse>( |
| 3952 | Op0: m_Value(V&: X), Op1: m_AllOnes(), Op2: m_Specific(V: EVL)))))) { |
| 3953 | auto *OldUnOp = cast<UnaryOperator>(Val: Vec); |
| 3954 | auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags( |
| 3955 | Opc: OldUnOp->getOpcode(), V: X, CopyO: OldUnOp, Name: OldUnOp->getName(), |
| 3956 | InsertBefore: II->getIterator()); |
| 3957 | return replaceInstUsesWith(I&: CI, V: NewUnOp); |
| 3958 | } |
| 3959 | break; |
| 3960 | } |
| 3961 | case Intrinsic::vector_reduce_or: |
| 3962 | case Intrinsic::vector_reduce_and: { |
| 3963 | // Canonicalize logical or/and reductions: |
| 3964 | // Or reduction for i1 is represented as: |
| 3965 | // %val = bitcast <ReduxWidth x i1> to iReduxWidth |
| 3966 | // %res = cmp ne iReduxWidth %val, 0 |
| 3967 | // And reduction for i1 is represented as: |
| 3968 | // %val = bitcast <ReduxWidth x i1> to iReduxWidth |
| 3969 | // %res = cmp eq iReduxWidth %val, 11111 |
| 3970 | Value *Arg = II->getArgOperand(i: 0); |
| 3971 | Value *Vect; |
| 3972 | |
| 3973 | if (Value *NewOp = |
| 3974 | simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) { |
| 3975 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp); |
| 3976 | return II; |
| 3977 | } |
| 3978 | |
| 3979 | if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) { |
| 3980 | if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType())) |
| 3981 | if (FTy->getElementType() == Builder.getInt1Ty()) { |
| 3982 | Value *Res = Builder.CreateBitCast( |
| 3983 | V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements())); |
| 3984 | if (IID == Intrinsic::vector_reduce_and) { |
| 3985 | Res = Builder.CreateICmpEQ( |
| 3986 | LHS: Res, RHS: ConstantInt::getAllOnesValue(Ty: Res->getType())); |
| 3987 | } else { |
| 3988 | assert(IID == Intrinsic::vector_reduce_or && |
| 3989 | "Expected or reduction." ); |
| 3990 | Res = Builder.CreateIsNotNull(Arg: Res); |
| 3991 | } |
| 3992 | if (Arg != Vect) |
| 3993 | Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res, |
| 3994 | DestTy: II->getType()); |
| 3995 | return replaceInstUsesWith(I&: CI, V: Res); |
| 3996 | } |
| 3997 | } |
| 3998 | [[fallthrough]]; |
| 3999 | } |
| 4000 | case Intrinsic::vector_reduce_add: { |
| 4001 | if (IID == Intrinsic::vector_reduce_add) { |
| 4002 | // Convert vector_reduce_add(ZExt(<n x i1>)) to |
| 4003 | // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)). |
| 4004 | // Convert vector_reduce_add(SExt(<n x i1>)) to |
| 4005 | // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)). |
| 4006 | // Convert vector_reduce_add(<n x i1>) to |
| 4007 | // Trunc(ctpop(bitcast <n x i1> to in)). |
| 4008 | Value *Arg = II->getArgOperand(i: 0); |
| 4009 | Value *Vect; |
| 4010 | |
| 4011 | if (Value *NewOp = |
| 4012 | simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) { |
| 4013 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp); |
| 4014 | return II; |
| 4015 | } |
| 4016 | |
| 4017 | // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N) |
| 4018 | if (Value *Splat = getSplatValue(V: Arg)) { |
| 4019 | ElementCount VecToReduceCount = |
| 4020 | cast<VectorType>(Val: Arg->getType())->getElementCount(); |
| 4021 | if (VecToReduceCount.isFixed()) { |
| 4022 | unsigned VectorSize = VecToReduceCount.getFixedValue(); |
| 4023 | return BinaryOperator::CreateMul( |
| 4024 | V1: Splat, |
| 4025 | V2: ConstantInt::get(Ty: Splat->getType(), V: VectorSize, /*IsSigned=*/false, |
| 4026 | /*ImplicitTrunc=*/true)); |
| 4027 | } |
| 4028 | } |
| 4029 | |
| 4030 | if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) { |
| 4031 | if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType())) |
| 4032 | if (FTy->getElementType() == Builder.getInt1Ty()) { |
| 4033 | Value *V = Builder.CreateBitCast( |
| 4034 | V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements())); |
| 4035 | Value *Res = Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V); |
| 4036 | Res = Builder.CreateZExtOrTrunc(V: Res, DestTy: II->getType()); |
| 4037 | if (Arg != Vect && |
| 4038 | cast<Instruction>(Val: Arg)->getOpcode() == Instruction::SExt) |
| 4039 | Res = Builder.CreateNeg(V: Res); |
| 4040 | return replaceInstUsesWith(I&: CI, V: Res); |
| 4041 | } |
| 4042 | } |
| 4043 | } |
| 4044 | [[fallthrough]]; |
| 4045 | } |
| 4046 | case Intrinsic::vector_reduce_xor: { |
| 4047 | if (IID == Intrinsic::vector_reduce_xor) { |
| 4048 | // Exclusive disjunction reduction over the vector with |
| 4049 | // (potentially-extended) i1 element type is actually a |
| 4050 | // (potentially-extended) arithmetic `add` reduction over the original |
| 4051 | // non-extended value: |
| 4052 | // vector_reduce_xor(?ext(<n x i1>)) |
| 4053 | // --> |
| 4054 | // ?ext(vector_reduce_add(<n x i1>)) |
| 4055 | Value *Arg = II->getArgOperand(i: 0); |
| 4056 | Value *Vect; |
| 4057 | |
| 4058 | if (Value *NewOp = |
| 4059 | simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) { |
| 4060 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp); |
| 4061 | return II; |
| 4062 | } |
| 4063 | |
| 4064 | if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) { |
| 4065 | if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType())) |
| 4066 | if (VTy->getElementType() == Builder.getInt1Ty()) { |
| 4067 | Value *Res = Builder.CreateAddReduce(Src: Vect); |
| 4068 | if (Arg != Vect) |
| 4069 | Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res, |
| 4070 | DestTy: II->getType()); |
| 4071 | return replaceInstUsesWith(I&: CI, V: Res); |
| 4072 | } |
| 4073 | } |
| 4074 | } |
| 4075 | [[fallthrough]]; |
| 4076 | } |
| 4077 | case Intrinsic::vector_reduce_mul: { |
| 4078 | if (IID == Intrinsic::vector_reduce_mul) { |
| 4079 | // Multiplicative reduction over the vector with (potentially-extended) |
| 4080 | // i1 element type is actually a (potentially zero-extended) |
| 4081 | // logical `and` reduction over the original non-extended value: |
| 4082 | // vector_reduce_mul(?ext(<n x i1>)) |
| 4083 | // --> |
| 4084 | // zext(vector_reduce_and(<n x i1>)) |
| 4085 | Value *Arg = II->getArgOperand(i: 0); |
| 4086 | Value *Vect; |
| 4087 | |
| 4088 | if (Value *NewOp = |
| 4089 | simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) { |
| 4090 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp); |
| 4091 | return II; |
| 4092 | } |
| 4093 | |
| 4094 | if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) { |
| 4095 | if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType())) |
| 4096 | if (VTy->getElementType() == Builder.getInt1Ty()) { |
| 4097 | Value *Res = Builder.CreateAndReduce(Src: Vect); |
| 4098 | Res = Builder.CreateZExt(V: Res, DestTy: II->getType()); |
| 4099 | return replaceInstUsesWith(I&: CI, V: Res); |
| 4100 | } |
| 4101 | } |
| 4102 | } |
| 4103 | [[fallthrough]]; |
| 4104 | } |
| 4105 | case Intrinsic::vector_reduce_umin: |
| 4106 | case Intrinsic::vector_reduce_umax: { |
| 4107 | if (IID == Intrinsic::vector_reduce_umin || |
| 4108 | IID == Intrinsic::vector_reduce_umax) { |
| 4109 | // UMin/UMax reduction over the vector with (potentially-extended) |
| 4110 | // i1 element type is actually a (potentially-extended) |
| 4111 | // logical `and`/`or` reduction over the original non-extended value: |
| 4112 | // vector_reduce_u{min,max}(?ext(<n x i1>)) |
| 4113 | // --> |
| 4114 | // ?ext(vector_reduce_{and,or}(<n x i1>)) |
| 4115 | Value *Arg = II->getArgOperand(i: 0); |
| 4116 | Value *Vect; |
| 4117 | |
| 4118 | if (Value *NewOp = |
| 4119 | simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) { |
| 4120 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp); |
| 4121 | return II; |
| 4122 | } |
| 4123 | |
| 4124 | if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) { |
| 4125 | if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType())) |
| 4126 | if (VTy->getElementType() == Builder.getInt1Ty()) { |
| 4127 | Value *Res = IID == Intrinsic::vector_reduce_umin |
| 4128 | ? Builder.CreateAndReduce(Src: Vect) |
| 4129 | : Builder.CreateOrReduce(Src: Vect); |
| 4130 | if (Arg != Vect) |
| 4131 | Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res, |
| 4132 | DestTy: II->getType()); |
| 4133 | return replaceInstUsesWith(I&: CI, V: Res); |
| 4134 | } |
| 4135 | } |
| 4136 | } |
| 4137 | [[fallthrough]]; |
| 4138 | } |
| 4139 | case Intrinsic::vector_reduce_smin: |
| 4140 | case Intrinsic::vector_reduce_smax: { |
| 4141 | if (IID == Intrinsic::vector_reduce_smin || |
| 4142 | IID == Intrinsic::vector_reduce_smax) { |
| 4143 | // SMin/SMax reduction over the vector with (potentially-extended) |
| 4144 | // i1 element type is actually a (potentially-extended) |
| 4145 | // logical `and`/`or` reduction over the original non-extended value: |
| 4146 | // vector_reduce_s{min,max}(<n x i1>) |
| 4147 | // --> |
| 4148 | // vector_reduce_{or,and}(<n x i1>) |
| 4149 | // and |
| 4150 | // vector_reduce_s{min,max}(sext(<n x i1>)) |
| 4151 | // --> |
| 4152 | // sext(vector_reduce_{or,and}(<n x i1>)) |
| 4153 | // and |
| 4154 | // vector_reduce_s{min,max}(zext(<n x i1>)) |
| 4155 | // --> |
| 4156 | // zext(vector_reduce_{and,or}(<n x i1>)) |
| 4157 | Value *Arg = II->getArgOperand(i: 0); |
| 4158 | Value *Vect; |
| 4159 | |
| 4160 | if (Value *NewOp = |
| 4161 | simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) { |
| 4162 | replaceUse(U&: II->getOperandUse(i: 0), NewValue: NewOp); |
| 4163 | return II; |
| 4164 | } |
| 4165 | |
| 4166 | if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) { |
| 4167 | if (auto *VTy = dyn_cast<VectorType>(Val: Vect->getType())) |
| 4168 | if (VTy->getElementType() == Builder.getInt1Ty()) { |
| 4169 | Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd; |
| 4170 | if (Arg != Vect) |
| 4171 | ExtOpc = cast<CastInst>(Val: Arg)->getOpcode(); |
| 4172 | Value *Res = ((IID == Intrinsic::vector_reduce_smin) == |
| 4173 | (ExtOpc == Instruction::CastOps::ZExt)) |
| 4174 | ? Builder.CreateAndReduce(Src: Vect) |
| 4175 | : Builder.CreateOrReduce(Src: Vect); |
| 4176 | if (Arg != Vect) |
| 4177 | Res = Builder.CreateCast(Op: ExtOpc, V: Res, DestTy: II->getType()); |
| 4178 | return replaceInstUsesWith(I&: CI, V: Res); |
| 4179 | } |
| 4180 | } |
| 4181 | } |
| 4182 | [[fallthrough]]; |
| 4183 | } |
| 4184 | case Intrinsic::vector_reduce_fmax: |
| 4185 | case Intrinsic::vector_reduce_fmin: |
| 4186 | case Intrinsic::vector_reduce_fadd: |
| 4187 | case Intrinsic::vector_reduce_fmul: { |
| 4188 | bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd && |
| 4189 | IID != Intrinsic::vector_reduce_fmul) || |
| 4190 | II->hasAllowReassoc(); |
| 4191 | const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd || |
| 4192 | IID == Intrinsic::vector_reduce_fmul) |
| 4193 | ? 1 |
| 4194 | : 0; |
| 4195 | Value *Arg = II->getArgOperand(i: ArgIdx); |
| 4196 | if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) { |
| 4197 | replaceUse(U&: II->getOperandUse(i: ArgIdx), NewValue: NewOp); |
| 4198 | return nullptr; |
| 4199 | } |
| 4200 | break; |
| 4201 | } |
| 4202 | case Intrinsic::is_fpclass: { |
| 4203 | if (Instruction *I = foldIntrinsicIsFPClass(II&: *II)) |
| 4204 | return I; |
| 4205 | break; |
| 4206 | } |
| 4207 | case Intrinsic::threadlocal_address: { |
| 4208 | Align MinAlign = getKnownAlignment(V: II->getArgOperand(i: 0), DL, CxtI: II, AC: &AC, DT: &DT); |
| 4209 | MaybeAlign Align = II->getRetAlign(); |
| 4210 | if (MinAlign > Align.valueOrOne()) { |
| 4211 | II->addRetAttr(Attr: Attribute::getWithAlignment(Context&: II->getContext(), Alignment: MinAlign)); |
| 4212 | return II; |
| 4213 | } |
| 4214 | break; |
| 4215 | } |
| 4216 | case Intrinsic::frexp: { |
| 4217 | Value *X; |
| 4218 | // The first result is idempotent with the added complication of the struct |
| 4219 | // return, and the second result is zero because the value is already |
| 4220 | // normalized. |
| 4221 | if (match(V: II->getArgOperand(i: 0), P: m_ExtractValue<0>(V: m_Value(V&: X)))) { |
| 4222 | if (match(V: X, P: m_Intrinsic<Intrinsic::frexp>(Op0: m_Value()))) { |
| 4223 | X = Builder.CreateInsertValue( |
| 4224 | Agg: X, Val: Constant::getNullValue(Ty: II->getType()->getStructElementType(N: 1)), |
| 4225 | Idxs: 1); |
| 4226 | return replaceInstUsesWith(I&: *II, V: X); |
| 4227 | } |
| 4228 | } |
| 4229 | break; |
| 4230 | } |
| 4231 | case Intrinsic::get_active_lane_mask: { |
| 4232 | const APInt *Op0, *Op1; |
| 4233 | if (match(V: II->getOperand(i_nocapture: 0), P: m_StrictlyPositive(V&: Op0)) && |
| 4234 | match(V: II->getOperand(i_nocapture: 1), P: m_APInt(Res&: Op1))) { |
| 4235 | Type *OpTy = II->getOperand(i_nocapture: 0)->getType(); |
| 4236 | return replaceInstUsesWith( |
| 4237 | I&: *II, V: Builder.CreateIntrinsic( |
| 4238 | RetTy: II->getType(), ID: Intrinsic::get_active_lane_mask, |
| 4239 | Args: {Constant::getNullValue(Ty: OpTy), |
| 4240 | ConstantInt::get(Ty: OpTy, V: Op1->usub_sat(RHS: *Op0))})); |
| 4241 | } |
| 4242 | break; |
| 4243 | } |
| 4244 | case Intrinsic::experimental_get_vector_length: { |
| 4245 | // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes |
| 4246 | unsigned BitWidth = |
| 4247 | std::max(a: II->getArgOperand(i: 0)->getType()->getScalarSizeInBits(), |
| 4248 | b: II->getType()->getScalarSizeInBits()); |
| 4249 | ConstantRange Cnt = |
| 4250 | computeConstantRangeIncludingKnownBits(V: II->getArgOperand(i: 0), ForSigned: false, |
| 4251 | SQ: SQ.getWithInstruction(I: II)) |
| 4252 | .zextOrTrunc(BitWidth); |
| 4253 | ConstantRange MaxLanes = cast<ConstantInt>(Val: II->getArgOperand(i: 1)) |
| 4254 | ->getValue() |
| 4255 | .zextOrTrunc(width: Cnt.getBitWidth()); |
| 4256 | if (cast<ConstantInt>(Val: II->getArgOperand(i: 2))->isOne()) |
| 4257 | MaxLanes = MaxLanes.multiply( |
| 4258 | Other: getVScaleRange(F: II->getFunction(), BitWidth: Cnt.getBitWidth())); |
| 4259 | |
| 4260 | if (Cnt.icmp(Pred: CmpInst::ICMP_ULE, Other: MaxLanes)) |
| 4261 | return replaceInstUsesWith( |
| 4262 | I&: *II, V: Builder.CreateZExtOrTrunc(V: II->getArgOperand(i: 0), DestTy: II->getType())); |
| 4263 | return nullptr; |
| 4264 | } |
| 4265 | default: { |
| 4266 | // Handle target specific intrinsics |
| 4267 | std::optional<Instruction *> V = targetInstCombineIntrinsic(II&: *II); |
| 4268 | if (V) |
| 4269 | return *V; |
| 4270 | break; |
| 4271 | } |
| 4272 | } |
| 4273 | |
| 4274 | // Try to fold intrinsic into select/phi operands. This is legal if: |
| 4275 | // * The intrinsic is speculatable. |
| 4276 | // * The operand is one of the following: |
| 4277 | // - a phi. |
| 4278 | // - a select with a scalar condition. |
| 4279 | // - a select with a vector condition and II is not a cross lane operation. |
| 4280 | if (isSafeToSpeculativelyExecuteWithVariableReplaced(I: &CI)) { |
| 4281 | for (Value *Op : II->args()) { |
| 4282 | if (auto *Sel = dyn_cast<SelectInst>(Val: Op)) { |
| 4283 | bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy(); |
| 4284 | if (IsVectorCond && |
| 4285 | (!isNotCrossLaneOperation(I: II) || !II->getType()->isVectorTy())) |
| 4286 | continue; |
| 4287 | // Don't replace a scalar select with a more expensive vector select if |
| 4288 | // we can't simplify both arms of the select. |
| 4289 | bool SimplifyBothArms = |
| 4290 | !Op->getType()->isVectorTy() && II->getType()->isVectorTy(); |
| 4291 | if (Instruction *R = FoldOpIntoSelect( |
| 4292 | Op&: *II, SI: Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms)) |
| 4293 | return R; |
| 4294 | } |
| 4295 | if (auto *Phi = dyn_cast<PHINode>(Val: Op)) |
| 4296 | if (Instruction *R = foldOpIntoPhi(I&: *II, PN: Phi)) |
| 4297 | return R; |
| 4298 | } |
| 4299 | } |
| 4300 | |
| 4301 | if (Instruction *Shuf = foldShuffledIntrinsicOperands(II)) |
| 4302 | return Shuf; |
| 4303 | |
| 4304 | if (Value *Reverse = foldReversedIntrinsicOperands(II)) |
| 4305 | return replaceInstUsesWith(I&: *II, V: Reverse); |
| 4306 | |
| 4307 | if (Value *Res = foldIdempotentBinaryIntrinsicRecurrence(IC&: *this, II)) |
| 4308 | return replaceInstUsesWith(I&: *II, V: Res); |
| 4309 | |
| 4310 | // Some intrinsics (like experimental_gc_statepoint) can be used in invoke |
| 4311 | // context, so it is handled in visitCallBase and we should trigger it. |
| 4312 | return visitCallBase(Call&: *II); |
| 4313 | } |
| 4314 | |
| 4315 | // Fence instruction simplification |
| 4316 | Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) { |
| 4317 | auto *NFI = dyn_cast<FenceInst>(Val: FI.getNextNode()); |
| 4318 | // This check is solely here to handle arbitrary target-dependent syncscopes. |
| 4319 | // TODO: Can remove if does not matter in practice. |
| 4320 | if (NFI && FI.isIdenticalTo(I: NFI)) |
| 4321 | return eraseInstFromFunction(I&: FI); |
| 4322 | |
| 4323 | // Returns true if FI1 is identical or stronger fence than FI2. |
| 4324 | auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) { |
| 4325 | auto FI1SyncScope = FI1->getSyncScopeID(); |
| 4326 | // Consider same scope, where scope is global or single-thread. |
| 4327 | if (FI1SyncScope != FI2->getSyncScopeID() || |
| 4328 | (FI1SyncScope != SyncScope::System && |
| 4329 | FI1SyncScope != SyncScope::SingleThread)) |
| 4330 | return false; |
| 4331 | |
| 4332 | return isAtLeastOrStrongerThan(AO: FI1->getOrdering(), Other: FI2->getOrdering()); |
| 4333 | }; |
| 4334 | if (NFI && isIdenticalOrStrongerFence(NFI, &FI)) |
| 4335 | return eraseInstFromFunction(I&: FI); |
| 4336 | |
| 4337 | if (auto *PFI = dyn_cast_or_null<FenceInst>(Val: FI.getPrevNode())) |
| 4338 | if (isIdenticalOrStrongerFence(PFI, &FI)) |
| 4339 | return eraseInstFromFunction(I&: FI); |
| 4340 | return nullptr; |
| 4341 | } |
| 4342 | |
| 4343 | // InvokeInst simplification |
| 4344 | Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) { |
| 4345 | return visitCallBase(Call&: II); |
| 4346 | } |
| 4347 | |
| 4348 | // CallBrInst simplification |
| 4349 | Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) { |
| 4350 | return visitCallBase(Call&: CBI); |
| 4351 | } |
| 4352 | |
| 4353 | static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { |
| 4354 | if (!CI->hasFnAttr(Kind: "modular-format" )) |
| 4355 | return nullptr; |
| 4356 | |
| 4357 | SmallVector<StringRef> Args( |
| 4358 | llvm::split(Str: CI->getFnAttr(Kind: "modular-format" ).getValueAsString(), Separator: ',')); |
| 4359 | // TODO: Make use of the first two arguments |
| 4360 | unsigned FirstArgIdx; |
| 4361 | [[maybe_unused]] bool Error; |
| 4362 | Error = Args[2].getAsInteger(Radix: 10, Result&: FirstArgIdx); |
| 4363 | assert(!Error && "invalid first arg index" ); |
| 4364 | --FirstArgIdx; |
| 4365 | StringRef FnName = Args[3]; |
| 4366 | StringRef ImplName = Args[4]; |
| 4367 | ArrayRef<StringRef> AllAspects = ArrayRef<StringRef>(Args).drop_front(N: 5); |
| 4368 | |
| 4369 | if (AllAspects.empty()) |
| 4370 | return nullptr; |
| 4371 | |
| 4372 | SmallVector<StringRef> NeededAspects; |
| 4373 | for (StringRef Aspect : AllAspects) { |
| 4374 | if (Aspect == "float" ) { |
| 4375 | if (llvm::any_of( |
| 4376 | Range: llvm::make_range(x: std::next(x: CI->arg_begin(), n: FirstArgIdx), |
| 4377 | y: CI->arg_end()), |
| 4378 | P: [](Value *V) { return V->getType()->isFloatingPointTy(); })) |
| 4379 | NeededAspects.push_back(Elt: "float" ); |
| 4380 | } else { |
| 4381 | // Unknown aspects are always considered to be needed. |
| 4382 | NeededAspects.push_back(Elt: Aspect); |
| 4383 | } |
| 4384 | } |
| 4385 | |
| 4386 | if (NeededAspects.size() == AllAspects.size()) |
| 4387 | return nullptr; |
| 4388 | |
| 4389 | Module *M = CI->getModule(); |
| 4390 | LLVMContext &Ctx = M->getContext(); |
| 4391 | Function *Callee = CI->getCalledFunction(); |
| 4392 | FunctionCallee ModularFn = M->getOrInsertFunction( |
| 4393 | Name: FnName, T: Callee->getFunctionType(), |
| 4394 | AttributeList: Callee->getAttributes().removeFnAttribute(C&: Ctx, Kind: "modular-format" )); |
| 4395 | CallInst *New = cast<CallInst>(Val: CI->clone()); |
| 4396 | New->setCalledFunction(ModularFn); |
| 4397 | New->removeFnAttr(Kind: "modular-format" ); |
| 4398 | B.Insert(I: New); |
| 4399 | |
| 4400 | const auto ReferenceAspect = [&](StringRef Aspect) { |
| 4401 | SmallString<20> Name = ImplName; |
| 4402 | Name += '_'; |
| 4403 | Name += Aspect; |
| 4404 | Function *RelocNoneFn = |
| 4405 | Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::reloc_none); |
| 4406 | B.CreateCall(Callee: RelocNoneFn, |
| 4407 | Args: {MetadataAsValue::get(Context&: Ctx, MD: MDString::get(Context&: Ctx, Str: Name))}); |
| 4408 | }; |
| 4409 | |
| 4410 | llvm::sort(C&: NeededAspects); |
| 4411 | for (StringRef Request : NeededAspects) |
| 4412 | ReferenceAspect(Request); |
| 4413 | |
| 4414 | return New; |
| 4415 | } |
| 4416 | |
| 4417 | Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { |
| 4418 | if (!CI->getCalledFunction()) return nullptr; |
| 4419 | |
| 4420 | // Skip optimizing notail and musttail calls so |
| 4421 | // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants. |
| 4422 | // LibCallSimplifier::optimizeCall should try to preserve tail calls though. |
| 4423 | if (CI->isMustTailCall() || CI->isNoTailCall()) |
| 4424 | return nullptr; |
| 4425 | |
| 4426 | auto InstCombineRAUW = [this](Instruction *From, Value *With) { |
| 4427 | replaceInstUsesWith(I&: *From, V: With); |
| 4428 | }; |
| 4429 | auto InstCombineErase = [this](Instruction *I) { |
| 4430 | eraseInstFromFunction(I&: *I); |
| 4431 | }; |
| 4432 | LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI, |
| 4433 | InstCombineRAUW, InstCombineErase); |
| 4434 | if (Value *With = Simplifier.optimizeCall(CI, B&: Builder)) { |
| 4435 | ++NumSimplified; |
| 4436 | return CI->use_empty() ? CI : replaceInstUsesWith(I&: *CI, V: With); |
| 4437 | } |
| 4438 | if (Value *With = optimizeModularFormat(CI, B&: Builder)) { |
| 4439 | ++NumSimplified; |
| 4440 | return CI->use_empty() ? CI : replaceInstUsesWith(I&: *CI, V: With); |
| 4441 | } |
| 4442 | |
| 4443 | return nullptr; |
| 4444 | } |
| 4445 | |
| 4446 | static IntrinsicInst *findInitTrampolineFromAlloca(Value *TrampMem) { |
| 4447 | // Strip off at most one level of pointer casts, looking for an alloca. This |
| 4448 | // is good enough in practice and simpler than handling any number of casts. |
| 4449 | Value *Underlying = TrampMem->stripPointerCasts(); |
| 4450 | if (Underlying != TrampMem && |
| 4451 | (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem)) |
| 4452 | return nullptr; |
| 4453 | if (!isa<AllocaInst>(Val: Underlying)) |
| 4454 | return nullptr; |
| 4455 | |
| 4456 | IntrinsicInst *InitTrampoline = nullptr; |
| 4457 | for (User *U : TrampMem->users()) { |
| 4458 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U); |
| 4459 | if (!II) |
| 4460 | return nullptr; |
| 4461 | if (II->getIntrinsicID() == Intrinsic::init_trampoline) { |
| 4462 | if (InitTrampoline) |
| 4463 | // More than one init_trampoline writes to this value. Give up. |
| 4464 | return nullptr; |
| 4465 | InitTrampoline = II; |
| 4466 | continue; |
| 4467 | } |
| 4468 | if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) |
| 4469 | // Allow any number of calls to adjust.trampoline. |
| 4470 | continue; |
| 4471 | return nullptr; |
| 4472 | } |
| 4473 | |
| 4474 | // No call to init.trampoline found. |
| 4475 | if (!InitTrampoline) |
| 4476 | return nullptr; |
| 4477 | |
| 4478 | // Check that the alloca is being used in the expected way. |
| 4479 | if (InitTrampoline->getOperand(i_nocapture: 0) != TrampMem) |
| 4480 | return nullptr; |
| 4481 | |
| 4482 | return InitTrampoline; |
| 4483 | } |
| 4484 | |
| 4485 | static IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, |
| 4486 | Value *TrampMem) { |
| 4487 | // Visit all the previous instructions in the basic block, and try to find a |
| 4488 | // init.trampoline which has a direct path to the adjust.trampoline. |
| 4489 | for (BasicBlock::iterator I = AdjustTramp->getIterator(), |
| 4490 | E = AdjustTramp->getParent()->begin(); |
| 4491 | I != E;) { |
| 4492 | Instruction *Inst = &*--I; |
| 4493 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val&: I)) |
| 4494 | if (II->getIntrinsicID() == Intrinsic::init_trampoline && |
| 4495 | II->getOperand(i_nocapture: 0) == TrampMem) |
| 4496 | return II; |
| 4497 | if (Inst->mayWriteToMemory()) |
| 4498 | return nullptr; |
| 4499 | } |
| 4500 | return nullptr; |
| 4501 | } |
| 4502 | |
| 4503 | // Given a call to llvm.adjust.trampoline, find and return the corresponding |
| 4504 | // call to llvm.init.trampoline if the call to the trampoline can be optimized |
| 4505 | // to a direct call to a function. Otherwise return NULL. |
| 4506 | static IntrinsicInst *findInitTrampoline(Value *Callee) { |
| 4507 | Callee = Callee->stripPointerCasts(); |
| 4508 | IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Val: Callee); |
| 4509 | if (!AdjustTramp || |
| 4510 | AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) |
| 4511 | return nullptr; |
| 4512 | |
| 4513 | Value *TrampMem = AdjustTramp->getOperand(i_nocapture: 0); |
| 4514 | |
| 4515 | if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem)) |
| 4516 | return IT; |
| 4517 | if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem)) |
| 4518 | return IT; |
| 4519 | return nullptr; |
| 4520 | } |
| 4521 | |
| 4522 | Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) { |
| 4523 | const Value *Callee = Call.getCalledOperand(); |
| 4524 | const auto *IPC = dyn_cast<IntToPtrInst>(Val: Callee); |
| 4525 | if (!IPC || !IPC->isNoopCast(DL)) |
| 4526 | return nullptr; |
| 4527 | |
| 4528 | const auto *II = dyn_cast<IntrinsicInst>(Val: IPC->getOperand(i_nocapture: 0)); |
| 4529 | if (!II) |
| 4530 | return nullptr; |
| 4531 | |
| 4532 | Intrinsic::ID IIID = II->getIntrinsicID(); |
| 4533 | if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign) |
| 4534 | return nullptr; |
| 4535 | |
| 4536 | // Isolate the ptrauth bundle from the others. |
| 4537 | std::optional<OperandBundleUse> PtrAuthBundleOrNone; |
| 4538 | SmallVector<OperandBundleDef, 2> NewBundles; |
| 4539 | for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) { |
| 4540 | OperandBundleUse Bundle = Call.getOperandBundleAt(Index: BI); |
| 4541 | if (Bundle.getTagID() == LLVMContext::OB_ptrauth) |
| 4542 | PtrAuthBundleOrNone = Bundle; |
| 4543 | else |
| 4544 | NewBundles.emplace_back(Args&: Bundle); |
| 4545 | } |
| 4546 | |
| 4547 | if (!PtrAuthBundleOrNone) |
| 4548 | return nullptr; |
| 4549 | |
| 4550 | Value *NewCallee = nullptr; |
| 4551 | switch (IIID) { |
| 4552 | // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()] |
| 4553 | // assuming the call bundle and the sign operands match. |
| 4554 | case Intrinsic::ptrauth_resign: { |
| 4555 | // Resign result key should match bundle. |
| 4556 | if (II->getOperand(i_nocapture: 3) != PtrAuthBundleOrNone->Inputs[0]) |
| 4557 | return nullptr; |
| 4558 | // Resign result discriminator should match bundle. |
| 4559 | if (II->getOperand(i_nocapture: 4) != PtrAuthBundleOrNone->Inputs[1]) |
| 4560 | return nullptr; |
| 4561 | |
| 4562 | // Resign input (auth) key should also match: we can't change the key on |
| 4563 | // the new call we're generating, because we don't know what keys are valid. |
| 4564 | if (II->getOperand(i_nocapture: 1) != PtrAuthBundleOrNone->Inputs[0]) |
| 4565 | return nullptr; |
| 4566 | |
| 4567 | Value *NewBundleOps[] = {II->getOperand(i_nocapture: 1), II->getOperand(i_nocapture: 2)}; |
| 4568 | NewBundles.emplace_back(Args: "ptrauth" , Args&: NewBundleOps); |
| 4569 | NewCallee = II->getOperand(i_nocapture: 0); |
| 4570 | break; |
| 4571 | } |
| 4572 | |
| 4573 | // call(ptrauth.sign(p)), ["ptrauth"()] -> call p |
| 4574 | // assuming the call bundle and the sign operands match. |
| 4575 | // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign. |
| 4576 | case Intrinsic::ptrauth_sign: { |
| 4577 | // Sign key should match bundle. |
| 4578 | if (II->getOperand(i_nocapture: 1) != PtrAuthBundleOrNone->Inputs[0]) |
| 4579 | return nullptr; |
| 4580 | // Sign discriminator should match bundle. |
| 4581 | if (II->getOperand(i_nocapture: 2) != PtrAuthBundleOrNone->Inputs[1]) |
| 4582 | return nullptr; |
| 4583 | NewCallee = II->getOperand(i_nocapture: 0); |
| 4584 | break; |
| 4585 | } |
| 4586 | default: |
| 4587 | llvm_unreachable("unexpected intrinsic ID" ); |
| 4588 | } |
| 4589 | |
| 4590 | if (!NewCallee) |
| 4591 | return nullptr; |
| 4592 | |
| 4593 | NewCallee = Builder.CreateBitOrPointerCast(V: NewCallee, DestTy: Callee->getType()); |
| 4594 | CallBase *NewCall = CallBase::Create(CB: &Call, Bundles: NewBundles); |
| 4595 | NewCall->setCalledOperand(NewCallee); |
| 4596 | return NewCall; |
| 4597 | } |
| 4598 | |
| 4599 | Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) { |
| 4600 | auto *CPA = dyn_cast<ConstantPtrAuth>(Val: Call.getCalledOperand()); |
| 4601 | if (!CPA) |
| 4602 | return nullptr; |
| 4603 | |
| 4604 | auto *CalleeF = dyn_cast<Function>(Val: CPA->getPointer()); |
| 4605 | // If the ptrauth constant isn't based on a function pointer, bail out. |
| 4606 | if (!CalleeF) |
| 4607 | return nullptr; |
| 4608 | |
| 4609 | // Inspect the call ptrauth bundle to check it matches the ptrauth constant. |
| 4610 | auto PAB = Call.getOperandBundle(ID: LLVMContext::OB_ptrauth); |
| 4611 | if (!PAB) |
| 4612 | return nullptr; |
| 4613 | |
| 4614 | auto *Key = cast<ConstantInt>(Val: PAB->Inputs[0]); |
| 4615 | Value *Discriminator = PAB->Inputs[1]; |
| 4616 | |
| 4617 | // If the bundle doesn't match, this is probably going to fail to auth. |
| 4618 | if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL)) |
| 4619 | return nullptr; |
| 4620 | |
| 4621 | // If the bundle matches the constant, proceed in making this a direct call. |
| 4622 | auto *NewCall = CallBase::removeOperandBundle(CB: &Call, ID: LLVMContext::OB_ptrauth); |
| 4623 | NewCall->setCalledOperand(CalleeF); |
| 4624 | return NewCall; |
| 4625 | } |
| 4626 | |
| 4627 | bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, |
| 4628 | const TargetLibraryInfo *TLI) { |
| 4629 | // Note: We only handle cases which can't be driven from generic attributes |
| 4630 | // here. So, for example, nonnull and noalias (which are common properties |
| 4631 | // of some allocation functions) are expected to be handled via annotation |
| 4632 | // of the respective allocator declaration with generic attributes. |
| 4633 | bool Changed = false; |
| 4634 | |
| 4635 | if (!Call.getType()->isPointerTy()) |
| 4636 | return Changed; |
| 4637 | |
| 4638 | std::optional<APInt> Size = getAllocSize(CB: &Call, TLI); |
| 4639 | if (Size && *Size != 0) { |
| 4640 | // TODO: We really should just emit deref_or_null here and then |
| 4641 | // let the generic inference code combine that with nonnull. |
| 4642 | if (Call.hasRetAttr(Kind: Attribute::NonNull)) { |
| 4643 | Changed = !Call.hasRetAttr(Kind: Attribute::Dereferenceable); |
| 4644 | Call.addRetAttr(Attr: Attribute::getWithDereferenceableBytes( |
| 4645 | Context&: Call.getContext(), Bytes: Size->getLimitedValue())); |
| 4646 | } else { |
| 4647 | Changed = !Call.hasRetAttr(Kind: Attribute::DereferenceableOrNull); |
| 4648 | Call.addRetAttr(Attr: Attribute::getWithDereferenceableOrNullBytes( |
| 4649 | Context&: Call.getContext(), Bytes: Size->getLimitedValue())); |
| 4650 | } |
| 4651 | } |
| 4652 | |
| 4653 | // Add alignment attribute if alignment is a power of two constant. |
| 4654 | Value *Alignment = getAllocAlignment(V: &Call, TLI); |
| 4655 | if (!Alignment) |
| 4656 | return Changed; |
| 4657 | |
| 4658 | ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Val: Alignment); |
| 4659 | if (AlignOpC && AlignOpC->getValue().ult(RHS: llvm::Value::MaximumAlignment)) { |
| 4660 | uint64_t AlignmentVal = AlignOpC->getZExtValue(); |
| 4661 | if (llvm::isPowerOf2_64(Value: AlignmentVal)) { |
| 4662 | Align ExistingAlign = Call.getRetAlign().valueOrOne(); |
| 4663 | Align NewAlign = Align(AlignmentVal); |
| 4664 | if (NewAlign > ExistingAlign) { |
| 4665 | Call.addRetAttr( |
| 4666 | Attr: Attribute::getWithAlignment(Context&: Call.getContext(), Alignment: NewAlign)); |
| 4667 | Changed = true; |
| 4668 | } |
| 4669 | } |
| 4670 | } |
| 4671 | return Changed; |
| 4672 | } |
| 4673 | |
| 4674 | /// Improvements for call, callbr and invoke instructions. |
| 4675 | Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { |
| 4676 | bool Changed = annotateAnyAllocSite(Call, TLI: &TLI); |
| 4677 | |
| 4678 | // Mark any parameters that are known to be non-null with the nonnull |
| 4679 | // attribute. This is helpful for inlining calls to functions with null |
| 4680 | // checks on their arguments. |
| 4681 | SmallVector<unsigned, 4> ArgNos; |
| 4682 | unsigned ArgNo = 0; |
| 4683 | |
| 4684 | for (Value *V : Call.args()) { |
| 4685 | if (V->getType()->isPointerTy()) { |
| 4686 | // Simplify the nonnull operand if the parameter is known to be nonnull. |
| 4687 | // Otherwise, try to infer nonnull for it. |
| 4688 | bool HasDereferenceable = Call.getParamDereferenceableBytes(i: ArgNo) > 0; |
| 4689 | if (Call.paramHasAttr(ArgNo, Kind: Attribute::NonNull) || |
| 4690 | (HasDereferenceable && |
| 4691 | !NullPointerIsDefined(F: Call.getFunction(), |
| 4692 | AS: V->getType()->getPointerAddressSpace()))) { |
| 4693 | if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) { |
| 4694 | replaceOperand(I&: Call, OpNum: ArgNo, V: Res); |
| 4695 | Changed = true; |
| 4696 | } |
| 4697 | } else if (isKnownNonZero(V, |
| 4698 | Q: getSimplifyQuery().getWithInstruction(I: &Call))) { |
| 4699 | ArgNos.push_back(Elt: ArgNo); |
| 4700 | } |
| 4701 | } |
| 4702 | ArgNo++; |
| 4703 | } |
| 4704 | |
| 4705 | assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly." ); |
| 4706 | |
| 4707 | if (!ArgNos.empty()) { |
| 4708 | AttributeList AS = Call.getAttributes(); |
| 4709 | LLVMContext &Ctx = Call.getContext(); |
| 4710 | AS = AS.addParamAttribute(C&: Ctx, ArgNos, |
| 4711 | A: Attribute::get(Context&: Ctx, Kind: Attribute::NonNull)); |
| 4712 | Call.setAttributes(AS); |
| 4713 | Changed = true; |
| 4714 | } |
| 4715 | |
| 4716 | // If the callee is a pointer to a function, attempt to move any casts to the |
| 4717 | // arguments of the call/callbr/invoke. |
| 4718 | Value *Callee = Call.getCalledOperand(); |
| 4719 | Function *CalleeF = dyn_cast<Function>(Val: Callee); |
| 4720 | if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) && |
| 4721 | transformConstExprCastCall(Call)) |
| 4722 | return nullptr; |
| 4723 | |
| 4724 | if (CalleeF) { |
| 4725 | // Remove the convergent attr on calls when the callee is not convergent. |
| 4726 | if (Call.isConvergent() && !CalleeF->isConvergent() && |
| 4727 | !CalleeF->isIntrinsic()) { |
| 4728 | LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call |
| 4729 | << "\n" ); |
| 4730 | Call.setNotConvergent(); |
| 4731 | return &Call; |
| 4732 | } |
| 4733 | |
| 4734 | // If the call and callee calling conventions don't match, and neither one |
| 4735 | // of the calling conventions is compatible with C calling convention |
| 4736 | // this call must be unreachable, as the call is undefined. |
| 4737 | if ((CalleeF->getCallingConv() != Call.getCallingConv() && |
| 4738 | !(CalleeF->getCallingConv() == llvm::CallingConv::C && |
| 4739 | TargetLibraryInfoImpl::isCallingConvCCompatible(CI: &Call)) && |
| 4740 | !(Call.getCallingConv() == llvm::CallingConv::C && |
| 4741 | TargetLibraryInfoImpl::isCallingConvCCompatible(Callee: CalleeF))) && |
| 4742 | // Only do this for calls to a function with a body. A prototype may |
| 4743 | // not actually end up matching the implementation's calling conv for a |
| 4744 | // variety of reasons (e.g. it may be written in assembly). |
| 4745 | !CalleeF->isDeclaration()) { |
| 4746 | Instruction *OldCall = &Call; |
| 4747 | CreateNonTerminatorUnreachable(InsertAt: OldCall); |
| 4748 | // If OldCall does not return void then replaceInstUsesWith poison. |
| 4749 | // This allows ValueHandlers and custom metadata to adjust itself. |
| 4750 | if (!OldCall->getType()->isVoidTy()) |
| 4751 | replaceInstUsesWith(I&: *OldCall, V: PoisonValue::get(T: OldCall->getType())); |
| 4752 | if (isa<CallInst>(Val: OldCall)) |
| 4753 | return eraseInstFromFunction(I&: *OldCall); |
| 4754 | |
| 4755 | // We cannot remove an invoke or a callbr, because it would change thexi |
| 4756 | // CFG, just change the callee to a null pointer. |
| 4757 | cast<CallBase>(Val: OldCall)->setCalledFunction( |
| 4758 | FTy: CalleeF->getFunctionType(), |
| 4759 | Fn: Constant::getNullValue(Ty: CalleeF->getType())); |
| 4760 | return nullptr; |
| 4761 | } |
| 4762 | } |
| 4763 | |
| 4764 | // Calling a null function pointer is undefined if a null address isn't |
| 4765 | // dereferenceable. |
| 4766 | if ((isa<ConstantPointerNull>(Val: Callee) && |
| 4767 | !NullPointerIsDefined(F: Call.getFunction())) || |
| 4768 | isa<UndefValue>(Val: Callee)) { |
| 4769 | // If Call does not return void then replaceInstUsesWith poison. |
| 4770 | // This allows ValueHandlers and custom metadata to adjust itself. |
| 4771 | if (!Call.getType()->isVoidTy()) |
| 4772 | replaceInstUsesWith(I&: Call, V: PoisonValue::get(T: Call.getType())); |
| 4773 | |
| 4774 | if (Call.isTerminator()) { |
| 4775 | // Can't remove an invoke or callbr because we cannot change the CFG. |
| 4776 | return nullptr; |
| 4777 | } |
| 4778 | |
| 4779 | // This instruction is not reachable, just remove it. |
| 4780 | CreateNonTerminatorUnreachable(InsertAt: &Call); |
| 4781 | return eraseInstFromFunction(I&: Call); |
| 4782 | } |
| 4783 | |
| 4784 | if (IntrinsicInst *II = findInitTrampoline(Callee)) |
| 4785 | return transformCallThroughTrampoline(Call, Tramp&: *II); |
| 4786 | |
| 4787 | // Combine calls involving pointer authentication intrinsics. |
| 4788 | if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call)) |
| 4789 | return NewCall; |
| 4790 | |
| 4791 | // Combine calls to ptrauth constants. |
| 4792 | if (Instruction *NewCall = foldPtrAuthConstantCallee(Call)) |
| 4793 | return NewCall; |
| 4794 | |
| 4795 | if (isa<InlineAsm>(Val: Callee) && !Call.doesNotThrow()) { |
| 4796 | InlineAsm *IA = cast<InlineAsm>(Val: Callee); |
| 4797 | if (!IA->canThrow()) { |
| 4798 | // Normal inline asm calls cannot throw - mark them |
| 4799 | // 'nounwind'. |
| 4800 | Call.setDoesNotThrow(); |
| 4801 | Changed = true; |
| 4802 | } |
| 4803 | } |
| 4804 | |
| 4805 | // Try to optimize the call if possible, we require DataLayout for most of |
| 4806 | // this. None of these calls are seen as possibly dead so go ahead and |
| 4807 | // delete the instruction now. |
| 4808 | if (CallInst *CI = dyn_cast<CallInst>(Val: &Call)) { |
| 4809 | Instruction *I = tryOptimizeCall(CI); |
| 4810 | // If we changed something return the result, etc. Otherwise let |
| 4811 | // the fallthrough check. |
| 4812 | if (I) return eraseInstFromFunction(I&: *I); |
| 4813 | } |
| 4814 | |
| 4815 | if (!Call.use_empty() && !Call.isMustTailCall()) |
| 4816 | if (Value *ReturnedArg = Call.getReturnedArgOperand()) { |
| 4817 | Type *CallTy = Call.getType(); |
| 4818 | Type *RetArgTy = ReturnedArg->getType(); |
| 4819 | if (RetArgTy->canLosslesslyBitCastTo(Ty: CallTy)) |
| 4820 | return replaceInstUsesWith( |
| 4821 | I&: Call, V: Builder.CreateBitOrPointerCast(V: ReturnedArg, DestTy: CallTy)); |
| 4822 | } |
| 4823 | |
| 4824 | // Drop unnecessary callee_type metadata from calls that were converted |
| 4825 | // into direct calls. |
| 4826 | if (Call.getMetadata(KindID: LLVMContext::MD_callee_type) && !Call.isIndirectCall()) { |
| 4827 | Call.setMetadata(KindID: LLVMContext::MD_callee_type, Node: nullptr); |
| 4828 | Changed = true; |
| 4829 | } |
| 4830 | |
| 4831 | // Drop unnecessary kcfi operand bundles from calls that were converted |
| 4832 | // into direct calls. |
| 4833 | auto Bundle = Call.getOperandBundle(ID: LLVMContext::OB_kcfi); |
| 4834 | if (Bundle && !Call.isIndirectCall()) { |
| 4835 | DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi" , { |
| 4836 | if (CalleeF) { |
| 4837 | ConstantInt *FunctionType = nullptr; |
| 4838 | ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]); |
| 4839 | |
| 4840 | if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type)) |
| 4841 | FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0)); |
| 4842 | |
| 4843 | if (FunctionType && |
| 4844 | FunctionType->getZExtValue() != ExpectedType->getZExtValue()) |
| 4845 | dbgs() << Call.getModule()->getName() |
| 4846 | << ": warning: kcfi: " << Call.getCaller()->getName() |
| 4847 | << ": call to " << CalleeF->getName() |
| 4848 | << " using a mismatching function pointer type\n" ; |
| 4849 | } |
| 4850 | }); |
| 4851 | |
| 4852 | return CallBase::removeOperandBundle(CB: &Call, ID: LLVMContext::OB_kcfi); |
| 4853 | } |
| 4854 | |
| 4855 | if (isRemovableAlloc(V: &Call, TLI: &TLI)) |
| 4856 | return visitAllocSite(FI&: Call); |
| 4857 | |
| 4858 | // Handle intrinsics which can be used in both call and invoke context. |
| 4859 | switch (Call.getIntrinsicID()) { |
| 4860 | case Intrinsic::experimental_gc_statepoint: { |
| 4861 | GCStatepointInst &GCSP = *cast<GCStatepointInst>(Val: &Call); |
| 4862 | SmallPtrSet<Value *, 32> LiveGcValues; |
| 4863 | for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) { |
| 4864 | GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc); |
| 4865 | |
| 4866 | // Remove the relocation if unused. |
| 4867 | if (GCR.use_empty()) { |
| 4868 | eraseInstFromFunction(I&: GCR); |
| 4869 | continue; |
| 4870 | } |
| 4871 | |
| 4872 | Value *DerivedPtr = GCR.getDerivedPtr(); |
| 4873 | Value *BasePtr = GCR.getBasePtr(); |
| 4874 | |
| 4875 | // Undef is undef, even after relocation. |
| 4876 | if (isa<UndefValue>(Val: DerivedPtr) || isa<UndefValue>(Val: BasePtr)) { |
| 4877 | replaceInstUsesWith(I&: GCR, V: UndefValue::get(T: GCR.getType())); |
| 4878 | eraseInstFromFunction(I&: GCR); |
| 4879 | continue; |
| 4880 | } |
| 4881 | |
| 4882 | if (auto *PT = dyn_cast<PointerType>(Val: GCR.getType())) { |
| 4883 | // The relocation of null will be null for most any collector. |
| 4884 | // TODO: provide a hook for this in GCStrategy. There might be some |
| 4885 | // weird collector this property does not hold for. |
| 4886 | if (isa<ConstantPointerNull>(Val: DerivedPtr)) { |
| 4887 | // Use null-pointer of gc_relocate's type to replace it. |
| 4888 | replaceInstUsesWith(I&: GCR, V: ConstantPointerNull::get(T: PT)); |
| 4889 | eraseInstFromFunction(I&: GCR); |
| 4890 | continue; |
| 4891 | } |
| 4892 | |
| 4893 | // isKnownNonNull -> nonnull attribute |
| 4894 | if (!GCR.hasRetAttr(Kind: Attribute::NonNull) && |
| 4895 | isKnownNonZero(V: DerivedPtr, |
| 4896 | Q: getSimplifyQuery().getWithInstruction(I: &Call))) { |
| 4897 | GCR.addRetAttr(Kind: Attribute::NonNull); |
| 4898 | // We discovered new fact, re-check users. |
| 4899 | Worklist.pushUsersToWorkList(I&: GCR); |
| 4900 | } |
| 4901 | } |
| 4902 | |
| 4903 | // If we have two copies of the same pointer in the statepoint argument |
| 4904 | // list, canonicalize to one. This may let us common gc.relocates. |
| 4905 | if (GCR.getBasePtr() == GCR.getDerivedPtr() && |
| 4906 | GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) { |
| 4907 | auto *OpIntTy = GCR.getOperand(i_nocapture: 2)->getType(); |
| 4908 | GCR.setOperand(i_nocapture: 2, Val_nocapture: ConstantInt::get(Ty: OpIntTy, V: GCR.getBasePtrIndex())); |
| 4909 | } |
| 4910 | |
| 4911 | // TODO: bitcast(relocate(p)) -> relocate(bitcast(p)) |
| 4912 | // Canonicalize on the type from the uses to the defs |
| 4913 | |
| 4914 | // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...) |
| 4915 | LiveGcValues.insert(Ptr: BasePtr); |
| 4916 | LiveGcValues.insert(Ptr: DerivedPtr); |
| 4917 | } |
| 4918 | std::optional<OperandBundleUse> Bundle = |
| 4919 | GCSP.getOperandBundle(ID: LLVMContext::OB_gc_live); |
| 4920 | unsigned NumOfGCLives = LiveGcValues.size(); |
| 4921 | if (!Bundle || NumOfGCLives == Bundle->Inputs.size()) |
| 4922 | break; |
| 4923 | // We can reduce the size of gc live bundle. |
| 4924 | DenseMap<Value *, unsigned> Val2Idx; |
| 4925 | std::vector<Value *> NewLiveGc; |
| 4926 | for (Value *V : Bundle->Inputs) { |
| 4927 | auto [It, Inserted] = Val2Idx.try_emplace(Key: V); |
| 4928 | if (!Inserted) |
| 4929 | continue; |
| 4930 | if (LiveGcValues.count(Ptr: V)) { |
| 4931 | It->second = NewLiveGc.size(); |
| 4932 | NewLiveGc.push_back(x: V); |
| 4933 | } else |
| 4934 | It->second = NumOfGCLives; |
| 4935 | } |
| 4936 | // Update all gc.relocates |
| 4937 | for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) { |
| 4938 | GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc); |
| 4939 | Value *BasePtr = GCR.getBasePtr(); |
| 4940 | assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives && |
| 4941 | "Missed live gc for base pointer" ); |
| 4942 | auto *OpIntTy1 = GCR.getOperand(i_nocapture: 1)->getType(); |
| 4943 | GCR.setOperand(i_nocapture: 1, Val_nocapture: ConstantInt::get(Ty: OpIntTy1, V: Val2Idx[BasePtr])); |
| 4944 | Value *DerivedPtr = GCR.getDerivedPtr(); |
| 4945 | assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives && |
| 4946 | "Missed live gc for derived pointer" ); |
| 4947 | auto *OpIntTy2 = GCR.getOperand(i_nocapture: 2)->getType(); |
| 4948 | GCR.setOperand(i_nocapture: 2, Val_nocapture: ConstantInt::get(Ty: OpIntTy2, V: Val2Idx[DerivedPtr])); |
| 4949 | } |
| 4950 | // Create new statepoint instruction. |
| 4951 | OperandBundleDef NewBundle("gc-live" , std::move(NewLiveGc)); |
| 4952 | return CallBase::Create(CB: &Call, Bundle: NewBundle); |
| 4953 | } |
| 4954 | default: { break; } |
| 4955 | } |
| 4956 | |
| 4957 | return Changed ? &Call : nullptr; |
| 4958 | } |
| 4959 | |
| 4960 | /// If the callee is a constexpr cast of a function, attempt to move the cast to |
| 4961 | /// the arguments of the call/invoke. |
| 4962 | /// CallBrInst is not supported. |
| 4963 | bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { |
| 4964 | auto *Callee = |
| 4965 | dyn_cast<Function>(Val: Call.getCalledOperand()->stripPointerCasts()); |
| 4966 | if (!Callee) |
| 4967 | return false; |
| 4968 | |
| 4969 | assert(!isa<CallBrInst>(Call) && |
| 4970 | "CallBr's don't have a single point after a def to insert at" ); |
| 4971 | |
| 4972 | // Don't perform the transform for declarations, which may not be fully |
| 4973 | // accurate. For example, void @foo() is commonly used as a placeholder for |
| 4974 | // unknown prototypes. |
| 4975 | if (Callee->isDeclaration()) |
| 4976 | return false; |
| 4977 | |
| 4978 | // If this is a call to a thunk function, don't remove the cast. Thunks are |
| 4979 | // used to transparently forward all incoming parameters and outgoing return |
| 4980 | // values, so it's important to leave the cast in place. |
| 4981 | if (Callee->hasFnAttribute(Kind: "thunk" )) |
| 4982 | return false; |
| 4983 | |
| 4984 | // If this is a call to a naked function, the assembly might be |
| 4985 | // using an argument, or otherwise rely on the frame layout, |
| 4986 | // the function prototype will mismatch. |
| 4987 | if (Callee->hasFnAttribute(Kind: Attribute::Naked)) |
| 4988 | return false; |
| 4989 | |
| 4990 | // If this is a musttail call, the callee's prototype must match the caller's |
| 4991 | // prototype with the exception of pointee types. The code below doesn't |
| 4992 | // implement that, so we can't do this transform. |
| 4993 | // TODO: Do the transform if it only requires adding pointer casts. |
| 4994 | if (Call.isMustTailCall()) |
| 4995 | return false; |
| 4996 | |
| 4997 | Instruction *Caller = &Call; |
| 4998 | const AttributeList &CallerPAL = Call.getAttributes(); |
| 4999 | |
| 5000 | // Okay, this is a cast from a function to a different type. Unless doing so |
| 5001 | // would cause a type conversion of one of our arguments, change this call to |
| 5002 | // be a direct call with arguments casted to the appropriate types. |
| 5003 | FunctionType *FT = Callee->getFunctionType(); |
| 5004 | Type *OldRetTy = Caller->getType(); |
| 5005 | Type *NewRetTy = FT->getReturnType(); |
| 5006 | |
| 5007 | // Check to see if we are changing the return type... |
| 5008 | if (OldRetTy != NewRetTy) { |
| 5009 | |
| 5010 | if (NewRetTy->isStructTy()) |
| 5011 | return false; // TODO: Handle multiple return values. |
| 5012 | |
| 5013 | if (!CastInst::isBitOrNoopPointerCastable(SrcTy: NewRetTy, DestTy: OldRetTy, DL)) { |
| 5014 | if (!Caller->use_empty()) |
| 5015 | return false; // Cannot transform this return value. |
| 5016 | } |
| 5017 | |
| 5018 | if (!CallerPAL.isEmpty() && !Caller->use_empty()) { |
| 5019 | AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs()); |
| 5020 | if (RAttrs.overlaps(AM: AttributeFuncs::typeIncompatible( |
| 5021 | Ty: NewRetTy, AS: CallerPAL.getRetAttrs()))) |
| 5022 | return false; // Attribute not compatible with transformed value. |
| 5023 | } |
| 5024 | |
| 5025 | // If the callbase is an invoke instruction, and the return value is |
| 5026 | // used by a PHI node in a successor, we cannot change the return type of |
| 5027 | // the call because there is no place to put the cast instruction (without |
| 5028 | // breaking the critical edge). Bail out in this case. |
| 5029 | if (!Caller->use_empty()) { |
| 5030 | BasicBlock *PhisNotSupportedBlock = nullptr; |
| 5031 | if (auto *II = dyn_cast<InvokeInst>(Val: Caller)) |
| 5032 | PhisNotSupportedBlock = II->getNormalDest(); |
| 5033 | if (PhisNotSupportedBlock) |
| 5034 | for (User *U : Caller->users()) |
| 5035 | if (PHINode *PN = dyn_cast<PHINode>(Val: U)) |
| 5036 | if (PN->getParent() == PhisNotSupportedBlock) |
| 5037 | return false; |
| 5038 | } |
| 5039 | } |
| 5040 | |
| 5041 | unsigned NumActualArgs = Call.arg_size(); |
| 5042 | unsigned NumCommonArgs = std::min(a: FT->getNumParams(), b: NumActualArgs); |
| 5043 | |
| 5044 | // Prevent us turning: |
| 5045 | // declare void @takes_i32_inalloca(i32* inalloca) |
| 5046 | // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0) |
| 5047 | // |
| 5048 | // into: |
| 5049 | // call void @takes_i32_inalloca(i32* null) |
| 5050 | // |
| 5051 | // Similarly, avoid folding away bitcasts of byval calls. |
| 5052 | if (Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca) || |
| 5053 | Callee->getAttributes().hasAttrSomewhere(Kind: Attribute::Preallocated)) |
| 5054 | return false; |
| 5055 | |
| 5056 | auto AI = Call.arg_begin(); |
| 5057 | for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { |
| 5058 | Type *ParamTy = FT->getParamType(i); |
| 5059 | Type *ActTy = (*AI)->getType(); |
| 5060 | |
| 5061 | if (!CastInst::isBitOrNoopPointerCastable(SrcTy: ActTy, DestTy: ParamTy, DL)) |
| 5062 | return false; // Cannot transform this parameter value. |
| 5063 | |
| 5064 | // Check if there are any incompatible attributes we cannot drop safely. |
| 5065 | if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(ArgNo: i)) |
| 5066 | .overlaps(AM: AttributeFuncs::typeIncompatible( |
| 5067 | Ty: ParamTy, AS: CallerPAL.getParamAttrs(ArgNo: i), |
| 5068 | ASK: AttributeFuncs::ASK_UNSAFE_TO_DROP))) |
| 5069 | return false; // Attribute not compatible with transformed value. |
| 5070 | |
| 5071 | if (Call.isInAllocaArgument(ArgNo: i) || |
| 5072 | CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::Preallocated)) |
| 5073 | return false; // Cannot transform to and from inalloca/preallocated. |
| 5074 | |
| 5075 | if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::SwiftError)) |
| 5076 | return false; |
| 5077 | |
| 5078 | if (CallerPAL.hasParamAttr(ArgNo: i, Kind: Attribute::ByVal) != |
| 5079 | Callee->getAttributes().hasParamAttr(ArgNo: i, Kind: Attribute::ByVal)) |
| 5080 | return false; // Cannot transform to or from byval. |
| 5081 | } |
| 5082 | |
| 5083 | if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && |
| 5084 | !CallerPAL.isEmpty()) { |
| 5085 | // In this case we have more arguments than the new function type, but we |
| 5086 | // won't be dropping them. Check that these extra arguments have attributes |
| 5087 | // that are compatible with being a vararg call argument. |
| 5088 | unsigned SRetIdx; |
| 5089 | if (CallerPAL.hasAttrSomewhere(Kind: Attribute::StructRet, Index: &SRetIdx) && |
| 5090 | SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams()) |
| 5091 | return false; |
| 5092 | } |
| 5093 | |
| 5094 | // Okay, we decided that this is a safe thing to do: go ahead and start |
| 5095 | // inserting cast instructions as necessary. |
| 5096 | SmallVector<Value *, 8> Args; |
| 5097 | SmallVector<AttributeSet, 8> ArgAttrs; |
| 5098 | Args.reserve(N: NumActualArgs); |
| 5099 | ArgAttrs.reserve(N: NumActualArgs); |
| 5100 | |
| 5101 | // Get any return attributes. |
| 5102 | AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs()); |
| 5103 | |
| 5104 | // If the return value is not being used, the type may not be compatible |
| 5105 | // with the existing attributes. Wipe out any problematic attributes. |
| 5106 | RAttrs.remove( |
| 5107 | AM: AttributeFuncs::typeIncompatible(Ty: NewRetTy, AS: CallerPAL.getRetAttrs())); |
| 5108 | |
| 5109 | LLVMContext &Ctx = Call.getContext(); |
| 5110 | AI = Call.arg_begin(); |
| 5111 | for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { |
| 5112 | Type *ParamTy = FT->getParamType(i); |
| 5113 | |
| 5114 | Value *NewArg = *AI; |
| 5115 | if ((*AI)->getType() != ParamTy) |
| 5116 | NewArg = Builder.CreateBitOrPointerCast(V: *AI, DestTy: ParamTy); |
| 5117 | Args.push_back(Elt: NewArg); |
| 5118 | |
| 5119 | // Add any parameter attributes except the ones incompatible with the new |
| 5120 | // type. Note that we made sure all incompatible ones are safe to drop. |
| 5121 | AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible( |
| 5122 | Ty: ParamTy, AS: CallerPAL.getParamAttrs(ArgNo: i), ASK: AttributeFuncs::ASK_SAFE_TO_DROP); |
| 5123 | ArgAttrs.push_back( |
| 5124 | Elt: CallerPAL.getParamAttrs(ArgNo: i).removeAttributes(C&: Ctx, AttrsToRemove: IncompatibleAttrs)); |
| 5125 | } |
| 5126 | |
| 5127 | // If the function takes more arguments than the call was taking, add them |
| 5128 | // now. |
| 5129 | for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) { |
| 5130 | Args.push_back(Elt: Constant::getNullValue(Ty: FT->getParamType(i))); |
| 5131 | ArgAttrs.push_back(Elt: AttributeSet()); |
| 5132 | } |
| 5133 | |
| 5134 | // If we are removing arguments to the function, emit an obnoxious warning. |
| 5135 | if (FT->getNumParams() < NumActualArgs) { |
| 5136 | // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722 |
| 5137 | if (FT->isVarArg()) { |
| 5138 | // Add all of the arguments in their promoted form to the arg list. |
| 5139 | for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { |
| 5140 | Type *PTy = getPromotedType(Ty: (*AI)->getType()); |
| 5141 | Value *NewArg = *AI; |
| 5142 | if (PTy != (*AI)->getType()) { |
| 5143 | // Must promote to pass through va_arg area! |
| 5144 | Instruction::CastOps opcode = |
| 5145 | CastInst::getCastOpcode(Val: *AI, SrcIsSigned: false, Ty: PTy, DstIsSigned: false); |
| 5146 | NewArg = Builder.CreateCast(Op: opcode, V: *AI, DestTy: PTy); |
| 5147 | } |
| 5148 | Args.push_back(Elt: NewArg); |
| 5149 | |
| 5150 | // Add any parameter attributes. |
| 5151 | ArgAttrs.push_back(Elt: CallerPAL.getParamAttrs(ArgNo: i)); |
| 5152 | } |
| 5153 | } |
| 5154 | } |
| 5155 | |
| 5156 | AttributeSet FnAttrs = CallerPAL.getFnAttrs(); |
| 5157 | |
| 5158 | if (NewRetTy->isVoidTy()) |
| 5159 | Caller->setName("" ); // Void type should not have a name. |
| 5160 | |
| 5161 | assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) && |
| 5162 | "missing argument attributes" ); |
| 5163 | AttributeList NewCallerPAL = AttributeList::get( |
| 5164 | C&: Ctx, FnAttrs, RetAttrs: AttributeSet::get(C&: Ctx, B: RAttrs), ArgAttrs); |
| 5165 | |
| 5166 | SmallVector<OperandBundleDef, 1> OpBundles; |
| 5167 | Call.getOperandBundlesAsDefs(Defs&: OpBundles); |
| 5168 | |
| 5169 | CallBase *NewCall; |
| 5170 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: Caller)) { |
| 5171 | NewCall = Builder.CreateInvoke(Callee, NormalDest: II->getNormalDest(), |
| 5172 | UnwindDest: II->getUnwindDest(), Args, OpBundles); |
| 5173 | } else { |
| 5174 | NewCall = Builder.CreateCall(Callee, Args, OpBundles); |
| 5175 | cast<CallInst>(Val: NewCall)->setTailCallKind( |
| 5176 | cast<CallInst>(Val: Caller)->getTailCallKind()); |
| 5177 | } |
| 5178 | NewCall->takeName(V: Caller); |
| 5179 | NewCall->setCallingConv(Call.getCallingConv()); |
| 5180 | NewCall->setAttributes(NewCallerPAL); |
| 5181 | |
| 5182 | // Preserve prof metadata if any. |
| 5183 | NewCall->copyMetadata(SrcInst: *Caller, WL: {LLVMContext::MD_prof}); |
| 5184 | |
| 5185 | // Insert a cast of the return type as necessary. |
| 5186 | Instruction *NC = NewCall; |
| 5187 | Value *NV = NC; |
| 5188 | if (OldRetTy != NV->getType() && !Caller->use_empty()) { |
| 5189 | assert(!NV->getType()->isVoidTy()); |
| 5190 | NV = NC = CastInst::CreateBitOrPointerCast(S: NC, Ty: OldRetTy); |
| 5191 | NC->setDebugLoc(Caller->getDebugLoc()); |
| 5192 | |
| 5193 | auto OptInsertPt = NewCall->getInsertionPointAfterDef(); |
| 5194 | assert(OptInsertPt && "No place to insert cast" ); |
| 5195 | InsertNewInstBefore(New: NC, Old: *OptInsertPt); |
| 5196 | Worklist.pushUsersToWorkList(I&: *Caller); |
| 5197 | } |
| 5198 | |
| 5199 | if (!Caller->use_empty()) |
| 5200 | replaceInstUsesWith(I&: *Caller, V: NV); |
| 5201 | else if (Caller->hasValueHandle()) { |
| 5202 | if (OldRetTy == NV->getType()) |
| 5203 | ValueHandleBase::ValueIsRAUWd(Old: Caller, New: NV); |
| 5204 | else |
| 5205 | // We cannot call ValueIsRAUWd with a different type, and the |
| 5206 | // actual tracked value will disappear. |
| 5207 | ValueHandleBase::ValueIsDeleted(V: Caller); |
| 5208 | } |
| 5209 | |
| 5210 | eraseInstFromFunction(I&: *Caller); |
| 5211 | return true; |
| 5212 | } |
| 5213 | |
| 5214 | /// Turn a call to a function created by init_trampoline / adjust_trampoline |
| 5215 | /// intrinsic pair into a direct call to the underlying function. |
| 5216 | Instruction * |
| 5217 | InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call, |
| 5218 | IntrinsicInst &Tramp) { |
| 5219 | FunctionType *FTy = Call.getFunctionType(); |
| 5220 | AttributeList Attrs = Call.getAttributes(); |
| 5221 | |
| 5222 | // If the call already has the 'nest' attribute somewhere then give up - |
| 5223 | // otherwise 'nest' would occur twice after splicing in the chain. |
| 5224 | if (Attrs.hasAttrSomewhere(Kind: Attribute::Nest)) |
| 5225 | return nullptr; |
| 5226 | |
| 5227 | Function *NestF = cast<Function>(Val: Tramp.getArgOperand(i: 1)->stripPointerCasts()); |
| 5228 | FunctionType *NestFTy = NestF->getFunctionType(); |
| 5229 | |
| 5230 | AttributeList NestAttrs = NestF->getAttributes(); |
| 5231 | if (!NestAttrs.isEmpty()) { |
| 5232 | unsigned NestArgNo = 0; |
| 5233 | Type *NestTy = nullptr; |
| 5234 | AttributeSet NestAttr; |
| 5235 | |
| 5236 | // Look for a parameter marked with the 'nest' attribute. |
| 5237 | for (FunctionType::param_iterator I = NestFTy->param_begin(), |
| 5238 | E = NestFTy->param_end(); |
| 5239 | I != E; ++NestArgNo, ++I) { |
| 5240 | AttributeSet AS = NestAttrs.getParamAttrs(ArgNo: NestArgNo); |
| 5241 | if (AS.hasAttribute(Kind: Attribute::Nest)) { |
| 5242 | // Record the parameter type and any other attributes. |
| 5243 | NestTy = *I; |
| 5244 | NestAttr = AS; |
| 5245 | break; |
| 5246 | } |
| 5247 | } |
| 5248 | |
| 5249 | if (NestTy) { |
| 5250 | std::vector<Value*> NewArgs; |
| 5251 | std::vector<AttributeSet> NewArgAttrs; |
| 5252 | NewArgs.reserve(n: Call.arg_size() + 1); |
| 5253 | NewArgAttrs.reserve(n: Call.arg_size()); |
| 5254 | |
| 5255 | // Insert the nest argument into the call argument list, which may |
| 5256 | // mean appending it. Likewise for attributes. |
| 5257 | |
| 5258 | { |
| 5259 | unsigned ArgNo = 0; |
| 5260 | auto I = Call.arg_begin(), E = Call.arg_end(); |
| 5261 | do { |
| 5262 | if (ArgNo == NestArgNo) { |
| 5263 | // Add the chain argument and attributes. |
| 5264 | Value *NestVal = Tramp.getArgOperand(i: 2); |
| 5265 | if (NestVal->getType() != NestTy) |
| 5266 | NestVal = Builder.CreateBitCast(V: NestVal, DestTy: NestTy, Name: "nest" ); |
| 5267 | NewArgs.push_back(x: NestVal); |
| 5268 | NewArgAttrs.push_back(x: NestAttr); |
| 5269 | } |
| 5270 | |
| 5271 | if (I == E) |
| 5272 | break; |
| 5273 | |
| 5274 | // Add the original argument and attributes. |
| 5275 | NewArgs.push_back(x: *I); |
| 5276 | NewArgAttrs.push_back(x: Attrs.getParamAttrs(ArgNo)); |
| 5277 | |
| 5278 | ++ArgNo; |
| 5279 | ++I; |
| 5280 | } while (true); |
| 5281 | } |
| 5282 | |
| 5283 | // The trampoline may have been bitcast to a bogus type (FTy). |
| 5284 | // Handle this by synthesizing a new function type, equal to FTy |
| 5285 | // with the chain parameter inserted. |
| 5286 | |
| 5287 | std::vector<Type*> NewTypes; |
| 5288 | NewTypes.reserve(n: FTy->getNumParams()+1); |
| 5289 | |
| 5290 | // Insert the chain's type into the list of parameter types, which may |
| 5291 | // mean appending it. |
| 5292 | { |
| 5293 | unsigned ArgNo = 0; |
| 5294 | FunctionType::param_iterator I = FTy->param_begin(), |
| 5295 | E = FTy->param_end(); |
| 5296 | |
| 5297 | do { |
| 5298 | if (ArgNo == NestArgNo) |
| 5299 | // Add the chain's type. |
| 5300 | NewTypes.push_back(x: NestTy); |
| 5301 | |
| 5302 | if (I == E) |
| 5303 | break; |
| 5304 | |
| 5305 | // Add the original type. |
| 5306 | NewTypes.push_back(x: *I); |
| 5307 | |
| 5308 | ++ArgNo; |
| 5309 | ++I; |
| 5310 | } while (true); |
| 5311 | } |
| 5312 | |
| 5313 | // Replace the trampoline call with a direct call. Let the generic |
| 5314 | // code sort out any function type mismatches. |
| 5315 | FunctionType *NewFTy = |
| 5316 | FunctionType::get(Result: FTy->getReturnType(), Params: NewTypes, isVarArg: FTy->isVarArg()); |
| 5317 | AttributeList NewPAL = |
| 5318 | AttributeList::get(C&: FTy->getContext(), FnAttrs: Attrs.getFnAttrs(), |
| 5319 | RetAttrs: Attrs.getRetAttrs(), ArgAttrs: NewArgAttrs); |
| 5320 | |
| 5321 | SmallVector<OperandBundleDef, 1> OpBundles; |
| 5322 | Call.getOperandBundlesAsDefs(Defs&: OpBundles); |
| 5323 | |
| 5324 | Instruction *NewCaller; |
| 5325 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &Call)) { |
| 5326 | NewCaller = InvokeInst::Create(Ty: NewFTy, Func: NestF, IfNormal: II->getNormalDest(), |
| 5327 | IfException: II->getUnwindDest(), Args: NewArgs, Bundles: OpBundles); |
| 5328 | cast<InvokeInst>(Val: NewCaller)->setCallingConv(II->getCallingConv()); |
| 5329 | cast<InvokeInst>(Val: NewCaller)->setAttributes(NewPAL); |
| 5330 | } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Val: &Call)) { |
| 5331 | NewCaller = |
| 5332 | CallBrInst::Create(Ty: NewFTy, Func: NestF, DefaultDest: CBI->getDefaultDest(), |
| 5333 | IndirectDests: CBI->getIndirectDests(), Args: NewArgs, Bundles: OpBundles); |
| 5334 | cast<CallBrInst>(Val: NewCaller)->setCallingConv(CBI->getCallingConv()); |
| 5335 | cast<CallBrInst>(Val: NewCaller)->setAttributes(NewPAL); |
| 5336 | } else { |
| 5337 | NewCaller = CallInst::Create(Ty: NewFTy, Func: NestF, Args: NewArgs, Bundles: OpBundles); |
| 5338 | cast<CallInst>(Val: NewCaller)->setTailCallKind( |
| 5339 | cast<CallInst>(Val&: Call).getTailCallKind()); |
| 5340 | cast<CallInst>(Val: NewCaller)->setCallingConv( |
| 5341 | cast<CallInst>(Val&: Call).getCallingConv()); |
| 5342 | cast<CallInst>(Val: NewCaller)->setAttributes(NewPAL); |
| 5343 | } |
| 5344 | NewCaller->setDebugLoc(Call.getDebugLoc()); |
| 5345 | |
| 5346 | return NewCaller; |
| 5347 | } |
| 5348 | } |
| 5349 | |
| 5350 | // Replace the trampoline call with a direct call. Since there is no 'nest' |
| 5351 | // parameter, there is no need to adjust the argument list. Let the generic |
| 5352 | // code sort out any function type mismatches. |
| 5353 | Call.setCalledFunction(FTy, Fn: NestF); |
| 5354 | return &Call; |
| 5355 | } |
| 5356 | |