| 1 | //===- InstCombineSimplifyDemanded.cpp ------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains logic for simplifying instructions based on information |
| 10 | // about how they are used. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "InstCombineInternal.h" |
| 15 | #include "llvm/Analysis/ValueTracking.h" |
| 16 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
| 17 | #include "llvm/IR/IntrinsicInst.h" |
| 18 | #include "llvm/IR/PatternMatch.h" |
| 19 | #include "llvm/IR/ProfDataUtils.h" |
| 20 | #include "llvm/Support/KnownBits.h" |
| 21 | #include "llvm/Transforms/InstCombine/InstCombiner.h" |
| 22 | |
| 23 | using namespace llvm; |
| 24 | using namespace llvm::PatternMatch; |
| 25 | |
| 26 | #define DEBUG_TYPE "instcombine" |
| 27 | |
| 28 | static cl::opt<bool> |
| 29 | VerifyKnownBits("instcombine-verify-known-bits" , |
| 30 | cl::desc("Verify that computeKnownBits() and " |
| 31 | "SimplifyDemandedBits() are consistent" ), |
| 32 | cl::Hidden, cl::init(Val: false)); |
| 33 | |
| 34 | static cl::opt<unsigned> SimplifyDemandedVectorEltsDepthLimit( |
| 35 | "instcombine-simplify-vector-elts-depth" , |
| 36 | cl::desc( |
| 37 | "Depth limit when simplifying vector instructions and their operands" ), |
| 38 | cl::Hidden, cl::init(Val: 10)); |
| 39 | |
| 40 | /// Check to see if the specified operand of the specified instruction is a |
| 41 | /// constant integer. If so, check to see if there are any bits set in the |
| 42 | /// constant that are not demanded. If so, shrink the constant and return true. |
| 43 | static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, |
| 44 | const APInt &Demanded) { |
| 45 | assert(I && "No instruction?" ); |
| 46 | assert(OpNo < I->getNumOperands() && "Operand index too large" ); |
| 47 | |
| 48 | // The operand must be a constant integer or splat integer. |
| 49 | Value *Op = I->getOperand(i: OpNo); |
| 50 | const APInt *C; |
| 51 | if (!match(V: Op, P: m_APInt(Res&: C))) |
| 52 | return false; |
| 53 | |
| 54 | // If there are no bits set that aren't demanded, nothing to do. |
| 55 | if (C->isSubsetOf(RHS: Demanded)) |
| 56 | return false; |
| 57 | |
| 58 | // This instruction is producing bits that are not demanded. Shrink the RHS. |
| 59 | I->setOperand(i: OpNo, Val: ConstantInt::get(Ty: Op->getType(), V: *C & Demanded)); |
| 60 | |
| 61 | return true; |
| 62 | } |
| 63 | |
| 64 | /// Let N = 2 * M. |
| 65 | /// Given an N-bit integer representing a pack of two M-bit integers, |
| 66 | /// we can select one of the packed integers by right-shifting by either |
| 67 | /// zero or M (which is the most straightforward to check if M is a power |
| 68 | /// of 2), and then isolating the lower M bits. In this case, we can |
| 69 | /// represent the shift as a select on whether the shr amount is nonzero. |
| 70 | static Value *simplifyShiftSelectingPackedElement(Instruction *I, |
| 71 | const APInt &DemandedMask, |
| 72 | InstCombinerImpl &IC, |
| 73 | unsigned Depth) { |
| 74 | assert(I->getOpcode() == Instruction::LShr && |
| 75 | "Only lshr instruction supported" ); |
| 76 | |
| 77 | uint64_t ShlAmt; |
| 78 | Value *Upper, *Lower; |
| 79 | if (!match(V: I->getOperand(i: 0), |
| 80 | P: m_OneUse(SubPattern: m_c_DisjointOr( |
| 81 | L: m_OneUse(SubPattern: m_Shl(L: m_Value(V&: Upper), R: m_ConstantInt(V&: ShlAmt))), |
| 82 | R: m_Value(V&: Lower))))) |
| 83 | return nullptr; |
| 84 | |
| 85 | if (!isPowerOf2_64(Value: ShlAmt)) |
| 86 | return nullptr; |
| 87 | |
| 88 | const uint64_t DemandedBitWidth = DemandedMask.getActiveBits(); |
| 89 | if (DemandedBitWidth > ShlAmt) |
| 90 | return nullptr; |
| 91 | |
| 92 | // Check that upper demanded bits are not lost from lshift. |
| 93 | if (Upper->getType()->getScalarSizeInBits() < ShlAmt + DemandedBitWidth) |
| 94 | return nullptr; |
| 95 | |
| 96 | KnownBits KnownLowerBits = IC.computeKnownBits(V: Lower, CxtI: I, Depth); |
| 97 | if (!KnownLowerBits.getMaxValue().isIntN(N: ShlAmt)) |
| 98 | return nullptr; |
| 99 | |
| 100 | Value *ShrAmt = I->getOperand(i: 1); |
| 101 | KnownBits KnownShrBits = IC.computeKnownBits(V: ShrAmt, CxtI: I, Depth); |
| 102 | |
| 103 | // Verify that ShrAmt is either exactly ShlAmt (which is a power of 2) or |
| 104 | // zero. |
| 105 | if (~KnownShrBits.Zero != ShlAmt) |
| 106 | return nullptr; |
| 107 | |
| 108 | IRBuilderBase::InsertPointGuard Guard(IC.Builder); |
| 109 | IC.Builder.SetInsertPoint(I); |
| 110 | Value *ShrAmtZ = |
| 111 | IC.Builder.CreateICmpEQ(LHS: ShrAmt, RHS: Constant::getNullValue(Ty: ShrAmt->getType()), |
| 112 | Name: ShrAmt->getName() + ".z" ); |
| 113 | // There is no existing !prof metadata we can derive the !prof metadata for |
| 114 | // this select. |
| 115 | Value *Select = IC.Builder.CreateSelectWithUnknownProfile(C: ShrAmtZ, True: Lower, |
| 116 | False: Upper, DEBUG_TYPE); |
| 117 | Select->takeName(V: I); |
| 118 | return Select; |
| 119 | } |
| 120 | |
| 121 | /// Returns the bitwidth of the given scalar or pointer type. For vector types, |
| 122 | /// returns the element type's bitwidth. |
| 123 | static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { |
| 124 | if (unsigned BitWidth = Ty->getScalarSizeInBits()) |
| 125 | return BitWidth; |
| 126 | |
| 127 | return DL.getPointerTypeSizeInBits(Ty); |
| 128 | } |
| 129 | |
| 130 | /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if |
| 131 | /// the instruction has any properties that allow us to simplify its operands. |
| 132 | bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst, |
| 133 | KnownBits &Known) { |
| 134 | APInt DemandedMask(APInt::getAllOnes(numBits: Known.getBitWidth())); |
| 135 | Value *V = SimplifyDemandedUseBits(I: &Inst, DemandedMask, Known, |
| 136 | Q: SQ.getWithInstruction(I: &Inst)); |
| 137 | if (!V) return false; |
| 138 | if (V == &Inst) return true; |
| 139 | replaceInstUsesWith(I&: Inst, V); |
| 140 | return true; |
| 141 | } |
| 142 | |
| 143 | /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if |
| 144 | /// the instruction has any properties that allow us to simplify its operands. |
| 145 | bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) { |
| 146 | KnownBits Known(getBitWidth(Ty: Inst.getType(), DL)); |
| 147 | return SimplifyDemandedInstructionBits(Inst, Known); |
| 148 | } |
| 149 | |
| 150 | bool InstCombinerImpl::SimplifyDemandedInstructionFPClass(Instruction &Inst) { |
| 151 | KnownFPClass Known; |
| 152 | |
| 153 | Value *V = |
| 154 | SimplifyDemandedUseFPClass(I: &Inst, DemandedMask: fcAllFlags, Known, /*CtxI=*/CxtI: &Inst); |
| 155 | if (!V) |
| 156 | return false; |
| 157 | if (V == &Inst) |
| 158 | return true; |
| 159 | replaceInstUsesWith(I&: Inst, V); |
| 160 | return true; |
| 161 | } |
| 162 | |
| 163 | /// This form of SimplifyDemandedBits simplifies the specified instruction |
| 164 | /// operand if possible, updating it in place. It returns true if it made any |
| 165 | /// change and false otherwise. |
| 166 | bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo, |
| 167 | const APInt &DemandedMask, |
| 168 | KnownBits &Known, |
| 169 | const SimplifyQuery &Q, |
| 170 | unsigned Depth) { |
| 171 | Use &U = I->getOperandUse(i: OpNo); |
| 172 | Value *V = U.get(); |
| 173 | if (isa<Constant>(Val: V)) { |
| 174 | llvm::computeKnownBits(V, Known, Q, Depth); |
| 175 | return false; |
| 176 | } |
| 177 | |
| 178 | Known.resetAll(); |
| 179 | if (DemandedMask.isZero()) { |
| 180 | // Not demanding any bits from V. |
| 181 | replaceUse(U, NewValue: UndefValue::get(T: V->getType())); |
| 182 | return true; |
| 183 | } |
| 184 | |
| 185 | Instruction *VInst = dyn_cast<Instruction>(Val: V); |
| 186 | if (!VInst) { |
| 187 | llvm::computeKnownBits(V, Known, Q, Depth); |
| 188 | return false; |
| 189 | } |
| 190 | |
| 191 | if (Depth == MaxAnalysisRecursionDepth) |
| 192 | return false; |
| 193 | |
| 194 | Value *NewVal; |
| 195 | if (VInst->hasOneUse()) { |
| 196 | // If the instruction has one use, we can directly simplify it. |
| 197 | NewVal = SimplifyDemandedUseBits(I: VInst, DemandedMask, Known, Q, Depth); |
| 198 | } else { |
| 199 | // If there are multiple uses of this instruction, then we can simplify |
| 200 | // VInst to some other value, but not modify the instruction. |
| 201 | NewVal = |
| 202 | SimplifyMultipleUseDemandedBits(I: VInst, DemandedMask, Known, Q, Depth); |
| 203 | } |
| 204 | if (!NewVal) return false; |
| 205 | if (Instruction* OpInst = dyn_cast<Instruction>(Val&: U)) |
| 206 | salvageDebugInfo(I&: *OpInst); |
| 207 | |
| 208 | replaceUse(U, NewValue: NewVal); |
| 209 | return true; |
| 210 | } |
| 211 | |
| 212 | /// This function attempts to replace V with a simpler value based on the |
| 213 | /// demanded bits. When this function is called, it is known that only the bits |
| 214 | /// set in DemandedMask of the result of V are ever used downstream. |
| 215 | /// Consequently, depending on the mask and V, it may be possible to replace V |
| 216 | /// with a constant or one of its operands. In such cases, this function does |
| 217 | /// the replacement and returns true. In all other cases, it returns false after |
| 218 | /// analyzing the expression and setting KnownOne and known to be one in the |
| 219 | /// expression. Known.Zero contains all the bits that are known to be zero in |
| 220 | /// the expression. These are provided to potentially allow the caller (which |
| 221 | /// might recursively be SimplifyDemandedBits itself) to simplify the |
| 222 | /// expression. |
| 223 | /// Known.One and Known.Zero always follow the invariant that: |
| 224 | /// Known.One & Known.Zero == 0. |
| 225 | /// That is, a bit can't be both 1 and 0. The bits in Known.One and Known.Zero |
| 226 | /// are accurate even for bits not in DemandedMask. Note |
| 227 | /// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all |
| 228 | /// be the same. |
| 229 | /// |
| 230 | /// This returns null if it did not change anything and it permits no |
| 231 | /// simplification. This returns V itself if it did some simplification of V's |
| 232 | /// operands based on the information about what bits are demanded. This returns |
| 233 | /// some other non-null value if it found out that V is equal to another value |
| 234 | /// in the context where the specified bits are demanded, but not for all users. |
| 235 | Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I, |
| 236 | const APInt &DemandedMask, |
| 237 | KnownBits &Known, |
| 238 | const SimplifyQuery &Q, |
| 239 | unsigned Depth) { |
| 240 | assert(I != nullptr && "Null pointer of Value???" ); |
| 241 | assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth" ); |
| 242 | uint32_t BitWidth = DemandedMask.getBitWidth(); |
| 243 | Type *VTy = I->getType(); |
| 244 | assert( |
| 245 | (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && |
| 246 | Known.getBitWidth() == BitWidth && |
| 247 | "Value *V, DemandedMask and Known must have same BitWidth" ); |
| 248 | |
| 249 | KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth); |
| 250 | |
| 251 | // Update flags after simplifying an operand based on the fact that some high |
| 252 | // order bits are not demanded. |
| 253 | auto disableWrapFlagsBasedOnUnusedHighBits = [](Instruction *I, |
| 254 | unsigned NLZ) { |
| 255 | if (NLZ > 0) { |
| 256 | // Disable the nsw and nuw flags here: We can no longer guarantee that |
| 257 | // we won't wrap after simplification. Removing the nsw/nuw flags is |
| 258 | // legal here because the top bit is not demanded. |
| 259 | I->setHasNoSignedWrap(false); |
| 260 | I->setHasNoUnsignedWrap(false); |
| 261 | } |
| 262 | return I; |
| 263 | }; |
| 264 | |
| 265 | // If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care |
| 266 | // about the high bits of the operands. |
| 267 | auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) { |
| 268 | unsigned NLZ = DemandedMask.countl_zero(); |
| 269 | // Right fill the mask of bits for the operands to demand the most |
| 270 | // significant bit and all those below it. |
| 271 | DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); |
| 272 | if (ShrinkDemandedConstant(I, OpNo: 0, Demanded: DemandedFromOps) || |
| 273 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromOps, Known&: LHSKnown, Q, Depth: Depth + 1) || |
| 274 | ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromOps) || |
| 275 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedFromOps, Known&: RHSKnown, Q, Depth: Depth + 1)) { |
| 276 | disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); |
| 277 | return true; |
| 278 | } |
| 279 | return false; |
| 280 | }; |
| 281 | |
| 282 | switch (I->getOpcode()) { |
| 283 | default: |
| 284 | llvm::computeKnownBits(V: I, Known, Q, Depth); |
| 285 | break; |
| 286 | case Instruction::And: { |
| 287 | // If either the LHS or the RHS are Zero, the result is zero. |
| 288 | if (SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: RHSKnown, Q, Depth: Depth + 1) || |
| 289 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMask & ~RHSKnown.Zero, Known&: LHSKnown, Q, |
| 290 | Depth: Depth + 1)) |
| 291 | return I; |
| 292 | |
| 293 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, |
| 294 | SQ: Q, Depth); |
| 295 | |
| 296 | // If the client is only demanding bits that we know, return the known |
| 297 | // constant. |
| 298 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
| 299 | return Constant::getIntegerValue(Ty: VTy, V: Known.One); |
| 300 | |
| 301 | // If all of the demanded bits are known 1 on one side, return the other. |
| 302 | // These bits cannot contribute to the result of the 'and'. |
| 303 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One)) |
| 304 | return I->getOperand(i: 0); |
| 305 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One)) |
| 306 | return I->getOperand(i: 1); |
| 307 | |
| 308 | // If the RHS is a constant, see if we can simplify it. |
| 309 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedMask & ~LHSKnown.Zero)) |
| 310 | return I; |
| 311 | |
| 312 | break; |
| 313 | } |
| 314 | case Instruction::Or: { |
| 315 | // If either the LHS or the RHS are One, the result is One. |
| 316 | if (SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: RHSKnown, Q, Depth: Depth + 1) || |
| 317 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMask & ~RHSKnown.One, Known&: LHSKnown, Q, |
| 318 | Depth: Depth + 1)) { |
| 319 | // Disjoint flag may not longer hold. |
| 320 | I->dropPoisonGeneratingFlags(); |
| 321 | return I; |
| 322 | } |
| 323 | |
| 324 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, |
| 325 | SQ: Q, Depth); |
| 326 | |
| 327 | // If the client is only demanding bits that we know, return the known |
| 328 | // constant. |
| 329 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
| 330 | return Constant::getIntegerValue(Ty: VTy, V: Known.One); |
| 331 | |
| 332 | // If all of the demanded bits are known zero on one side, return the other. |
| 333 | // These bits cannot contribute to the result of the 'or'. |
| 334 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero)) |
| 335 | return I->getOperand(i: 0); |
| 336 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero)) |
| 337 | return I->getOperand(i: 1); |
| 338 | |
| 339 | // If the RHS is a constant, see if we can simplify it. |
| 340 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedMask)) |
| 341 | return I; |
| 342 | |
| 343 | // Infer disjoint flag if no common bits are set. |
| 344 | if (!cast<PossiblyDisjointInst>(Val: I)->isDisjoint()) { |
| 345 | WithCache<const Value *> LHSCache(I->getOperand(i: 0), LHSKnown), |
| 346 | RHSCache(I->getOperand(i: 1), RHSKnown); |
| 347 | if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ: Q)) { |
| 348 | cast<PossiblyDisjointInst>(Val: I)->setIsDisjoint(true); |
| 349 | return I; |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | break; |
| 354 | } |
| 355 | case Instruction::Xor: { |
| 356 | if (SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: RHSKnown, Q, Depth: Depth + 1) || |
| 357 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask, Known&: LHSKnown, Q, Depth: Depth + 1)) |
| 358 | return I; |
| 359 | Value *LHS, *RHS; |
| 360 | if (DemandedMask == 1 && |
| 361 | match(V: I->getOperand(i: 0), P: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: LHS))) && |
| 362 | match(V: I->getOperand(i: 1), P: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: RHS)))) { |
| 363 | // (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1 |
| 364 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 365 | Builder.SetInsertPoint(I); |
| 366 | auto *Xor = Builder.CreateXor(LHS, RHS); |
| 367 | return Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V: Xor); |
| 368 | } |
| 369 | |
| 370 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, |
| 371 | SQ: Q, Depth); |
| 372 | |
| 373 | // If the client is only demanding bits that we know, return the known |
| 374 | // constant. |
| 375 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
| 376 | return Constant::getIntegerValue(Ty: VTy, V: Known.One); |
| 377 | |
| 378 | // If all of the demanded bits are known zero on one side, return the other. |
| 379 | // These bits cannot contribute to the result of the 'xor'. |
| 380 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero)) |
| 381 | return I->getOperand(i: 0); |
| 382 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero)) |
| 383 | return I->getOperand(i: 1); |
| 384 | |
| 385 | // If all of the demanded bits are known to be zero on one side or the |
| 386 | // other, turn this into an *inclusive* or. |
| 387 | // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 |
| 388 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.Zero)) { |
| 389 | Instruction *Or = |
| 390 | BinaryOperator::CreateOr(V1: I->getOperand(i: 0), V2: I->getOperand(i: 1)); |
| 391 | if (DemandedMask.isAllOnes()) |
| 392 | cast<PossiblyDisjointInst>(Val: Or)->setIsDisjoint(true); |
| 393 | Or->takeName(V: I); |
| 394 | return InsertNewInstWith(New: Or, Old: I->getIterator()); |
| 395 | } |
| 396 | |
| 397 | // If all of the demanded bits on one side are known, and all of the set |
| 398 | // bits on that side are also known to be set on the other side, turn this |
| 399 | // into an AND, as we know the bits will be cleared. |
| 400 | // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 |
| 401 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero|RHSKnown.One) && |
| 402 | RHSKnown.One.isSubsetOf(RHS: LHSKnown.One)) { |
| 403 | Constant *AndC = Constant::getIntegerValue(Ty: VTy, |
| 404 | V: ~RHSKnown.One & DemandedMask); |
| 405 | Instruction *And = BinaryOperator::CreateAnd(V1: I->getOperand(i: 0), V2: AndC); |
| 406 | return InsertNewInstWith(New: And, Old: I->getIterator()); |
| 407 | } |
| 408 | |
| 409 | // If the RHS is a constant, see if we can change it. Don't alter a -1 |
| 410 | // constant because that's a canonical 'not' op, and that is better for |
| 411 | // combining, SCEV, and codegen. |
| 412 | const APInt *C; |
| 413 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: C)) && !C->isAllOnes()) { |
| 414 | if ((*C | ~DemandedMask).isAllOnes()) { |
| 415 | // Force bits to 1 to create a 'not' op. |
| 416 | I->setOperand(i: 1, Val: ConstantInt::getAllOnesValue(Ty: VTy)); |
| 417 | return I; |
| 418 | } |
| 419 | // If we can't turn this into a 'not', try to shrink the constant. |
| 420 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedMask)) |
| 421 | return I; |
| 422 | } |
| 423 | |
| 424 | // If our LHS is an 'and' and if it has one use, and if any of the bits we |
| 425 | // are flipping are known to be set, then the xor is just resetting those |
| 426 | // bits to zero. We can just knock out bits from the 'and' and the 'xor', |
| 427 | // simplifying both of them. |
| 428 | if (Instruction *LHSInst = dyn_cast<Instruction>(Val: I->getOperand(i: 0))) { |
| 429 | ConstantInt *AndRHS, *XorRHS; |
| 430 | if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && |
| 431 | match(V: I->getOperand(i: 1), P: m_ConstantInt(CI&: XorRHS)) && |
| 432 | match(V: LHSInst->getOperand(i: 1), P: m_ConstantInt(CI&: AndRHS)) && |
| 433 | (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) { |
| 434 | APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask); |
| 435 | |
| 436 | Constant *AndC = ConstantInt::get(Ty: VTy, V: NewMask & AndRHS->getValue()); |
| 437 | Instruction *NewAnd = BinaryOperator::CreateAnd(V1: I->getOperand(i: 0), V2: AndC); |
| 438 | InsertNewInstWith(New: NewAnd, Old: I->getIterator()); |
| 439 | |
| 440 | Constant *XorC = ConstantInt::get(Ty: VTy, V: NewMask & XorRHS->getValue()); |
| 441 | Instruction *NewXor = BinaryOperator::CreateXor(V1: NewAnd, V2: XorC); |
| 442 | return InsertNewInstWith(New: NewXor, Old: I->getIterator()); |
| 443 | } |
| 444 | } |
| 445 | break; |
| 446 | } |
| 447 | case Instruction::Select: { |
| 448 | if (SimplifyDemandedBits(I, OpNo: 2, DemandedMask, Known&: RHSKnown, Q, Depth: Depth + 1) || |
| 449 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: LHSKnown, Q, Depth: Depth + 1)) |
| 450 | return I; |
| 451 | |
| 452 | // If the operands are constants, see if we can simplify them. |
| 453 | // This is similar to ShrinkDemandedConstant, but for a select we want to |
| 454 | // try to keep the selected constants the same as icmp value constants, if |
| 455 | // we can. This helps not break apart (or helps put back together) |
| 456 | // canonical patterns like min and max. |
| 457 | auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo, |
| 458 | const APInt &DemandedMask) { |
| 459 | const APInt *SelC; |
| 460 | if (!match(V: I->getOperand(i: OpNo), P: m_APInt(Res&: SelC))) |
| 461 | return false; |
| 462 | |
| 463 | // Get the constant out of the ICmp, if there is one. |
| 464 | // Only try this when exactly 1 operand is a constant (if both operands |
| 465 | // are constant, the icmp should eventually simplify). Otherwise, we may |
| 466 | // invert the transform that reduces set bits and infinite-loop. |
| 467 | Value *X; |
| 468 | const APInt *CmpC; |
| 469 | if (!match(V: I->getOperand(i: 0), P: m_ICmp(L: m_Value(V&: X), R: m_APInt(Res&: CmpC))) || |
| 470 | isa<Constant>(Val: X) || CmpC->getBitWidth() != SelC->getBitWidth()) |
| 471 | return ShrinkDemandedConstant(I, OpNo, Demanded: DemandedMask); |
| 472 | |
| 473 | // If the constant is already the same as the ICmp, leave it as-is. |
| 474 | if (*CmpC == *SelC) |
| 475 | return false; |
| 476 | // If the constants are not already the same, but can be with the demand |
| 477 | // mask, use the constant value from the ICmp. |
| 478 | if ((*CmpC & DemandedMask) == (*SelC & DemandedMask)) { |
| 479 | I->setOperand(i: OpNo, Val: ConstantInt::get(Ty: I->getType(), V: *CmpC)); |
| 480 | return true; |
| 481 | } |
| 482 | return ShrinkDemandedConstant(I, OpNo, Demanded: DemandedMask); |
| 483 | }; |
| 484 | if (CanonicalizeSelectConstant(I, 1, DemandedMask) || |
| 485 | CanonicalizeSelectConstant(I, 2, DemandedMask)) |
| 486 | return I; |
| 487 | |
| 488 | // Only known if known in both the LHS and RHS. |
| 489 | adjustKnownBitsForSelectArm(Known&: LHSKnown, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 1), |
| 490 | /*Invert=*/false, Q, Depth); |
| 491 | adjustKnownBitsForSelectArm(Known&: RHSKnown, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 2), |
| 492 | /*Invert=*/true, Q, Depth); |
| 493 | Known = LHSKnown.intersectWith(RHS: RHSKnown); |
| 494 | break; |
| 495 | } |
| 496 | case Instruction::Trunc: { |
| 497 | // If we do not demand the high bits of a right-shifted and truncated value, |
| 498 | // then we may be able to truncate it before the shift. |
| 499 | Value *X; |
| 500 | const APInt *C; |
| 501 | if (match(V: I->getOperand(i: 0), P: m_OneUse(SubPattern: m_LShr(L: m_Value(V&: X), R: m_APInt(Res&: C))))) { |
| 502 | // The shift amount must be valid (not poison) in the narrow type, and |
| 503 | // it must not be greater than the high bits demanded of the result. |
| 504 | if (C->ult(RHS: VTy->getScalarSizeInBits()) && |
| 505 | C->ule(RHS: DemandedMask.countl_zero())) { |
| 506 | // trunc (lshr X, C) --> lshr (trunc X), C |
| 507 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 508 | Builder.SetInsertPoint(I); |
| 509 | Value *Trunc = Builder.CreateTrunc(V: X, DestTy: VTy); |
| 510 | return Builder.CreateLShr(LHS: Trunc, RHS: C->getZExtValue()); |
| 511 | } |
| 512 | } |
| 513 | } |
| 514 | [[fallthrough]]; |
| 515 | case Instruction::ZExt: { |
| 516 | unsigned SrcBitWidth = I->getOperand(i: 0)->getType()->getScalarSizeInBits(); |
| 517 | |
| 518 | APInt InputDemandedMask = DemandedMask.zextOrTrunc(width: SrcBitWidth); |
| 519 | KnownBits InputKnown(SrcBitWidth); |
| 520 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: InputDemandedMask, Known&: InputKnown, Q, |
| 521 | Depth: Depth + 1)) { |
| 522 | // For zext nneg, we may have dropped the instruction which made the |
| 523 | // input non-negative. |
| 524 | I->dropPoisonGeneratingFlags(); |
| 525 | return I; |
| 526 | } |
| 527 | assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?" ); |
| 528 | if (I->getOpcode() == Instruction::ZExt && I->hasNonNeg() && |
| 529 | !InputKnown.isNegative()) |
| 530 | InputKnown.makeNonNegative(); |
| 531 | Known = InputKnown.zextOrTrunc(BitWidth); |
| 532 | |
| 533 | break; |
| 534 | } |
| 535 | case Instruction::SExt: { |
| 536 | // Compute the bits in the result that are not present in the input. |
| 537 | unsigned SrcBitWidth = I->getOperand(i: 0)->getType()->getScalarSizeInBits(); |
| 538 | |
| 539 | APInt InputDemandedBits = DemandedMask.trunc(width: SrcBitWidth); |
| 540 | |
| 541 | // If any of the sign extended bits are demanded, we know that the sign |
| 542 | // bit is demanded. |
| 543 | if (DemandedMask.getActiveBits() > SrcBitWidth) |
| 544 | InputDemandedBits.setBit(SrcBitWidth-1); |
| 545 | |
| 546 | KnownBits InputKnown(SrcBitWidth); |
| 547 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: InputDemandedBits, Known&: InputKnown, Q, Depth: Depth + 1)) |
| 548 | return I; |
| 549 | |
| 550 | // If the input sign bit is known zero, or if the NewBits are not demanded |
| 551 | // convert this into a zero extension. |
| 552 | if (InputKnown.isNonNegative() || |
| 553 | DemandedMask.getActiveBits() <= SrcBitWidth) { |
| 554 | // Convert to ZExt cast. |
| 555 | CastInst *NewCast = new ZExtInst(I->getOperand(i: 0), VTy); |
| 556 | NewCast->takeName(V: I); |
| 557 | return InsertNewInstWith(New: NewCast, Old: I->getIterator()); |
| 558 | } |
| 559 | |
| 560 | // If the sign bit of the input is known set or clear, then we know the |
| 561 | // top bits of the result. |
| 562 | Known = InputKnown.sext(BitWidth); |
| 563 | break; |
| 564 | } |
| 565 | case Instruction::Add: { |
| 566 | if ((DemandedMask & 1) == 0) { |
| 567 | // If we do not need the low bit, try to convert bool math to logic: |
| 568 | // add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN |
| 569 | Value *X, *Y; |
| 570 | if (match(V: I, P: m_c_Add(L: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))), |
| 571 | R: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: Y))))) && |
| 572 | X->getType()->isIntOrIntVectorTy(BitWidth: 1) && X->getType() == Y->getType()) { |
| 573 | // Truth table for inputs and output signbits: |
| 574 | // X:0 | X:1 |
| 575 | // ---------- |
| 576 | // Y:0 | 0 | 0 | |
| 577 | // Y:1 | -1 | 0 | |
| 578 | // ---------- |
| 579 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 580 | Builder.SetInsertPoint(I); |
| 581 | Value *AndNot = Builder.CreateAnd(LHS: Builder.CreateNot(V: X), RHS: Y); |
| 582 | return Builder.CreateSExt(V: AndNot, DestTy: VTy); |
| 583 | } |
| 584 | |
| 585 | // add iN (sext i1 X), (sext i1 Y) --> sext (X | Y) to iN |
| 586 | if (match(V: I, P: m_Add(L: m_SExt(Op: m_Value(V&: X)), R: m_SExt(Op: m_Value(V&: Y)))) && |
| 587 | X->getType()->isIntOrIntVectorTy(BitWidth: 1) && X->getType() == Y->getType() && |
| 588 | (I->getOperand(i: 0)->hasOneUse() || I->getOperand(i: 1)->hasOneUse())) { |
| 589 | |
| 590 | // Truth table for inputs and output signbits: |
| 591 | // X:0 | X:1 |
| 592 | // ----------- |
| 593 | // Y:0 | -1 | -1 | |
| 594 | // Y:1 | -1 | 0 | |
| 595 | // ----------- |
| 596 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 597 | Builder.SetInsertPoint(I); |
| 598 | Value *Or = Builder.CreateOr(LHS: X, RHS: Y); |
| 599 | return Builder.CreateSExt(V: Or, DestTy: VTy); |
| 600 | } |
| 601 | } |
| 602 | |
| 603 | // Right fill the mask of bits for the operands to demand the most |
| 604 | // significant bit and all those below it. |
| 605 | unsigned NLZ = DemandedMask.countl_zero(); |
| 606 | APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); |
| 607 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromOps) || |
| 608 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedFromOps, Known&: RHSKnown, Q, Depth: Depth + 1)) |
| 609 | return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); |
| 610 | |
| 611 | // If low order bits are not demanded and known to be zero in one operand, |
| 612 | // then we don't need to demand them from the other operand, since they |
| 613 | // can't cause overflow into any bits that are demanded in the result. |
| 614 | unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one(); |
| 615 | APInt DemandedFromLHS = DemandedFromOps; |
| 616 | DemandedFromLHS.clearLowBits(loBits: NTZ); |
| 617 | if (ShrinkDemandedConstant(I, OpNo: 0, Demanded: DemandedFromLHS) || |
| 618 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromLHS, Known&: LHSKnown, Q, Depth: Depth + 1)) |
| 619 | return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); |
| 620 | |
| 621 | unsigned NtzLHS = (~DemandedMask & LHSKnown.Zero).countr_one(); |
| 622 | APInt DemandedFromRHS = DemandedFromOps; |
| 623 | DemandedFromRHS.clearLowBits(loBits: NtzLHS); |
| 624 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromRHS)) |
| 625 | return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); |
| 626 | |
| 627 | // If we are known to be adding zeros to every bit below |
| 628 | // the highest demanded bit, we just return the other side. |
| 629 | if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero)) |
| 630 | return I->getOperand(i: 0); |
| 631 | if (DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero)) |
| 632 | return I->getOperand(i: 1); |
| 633 | |
| 634 | // (add X, C) --> (xor X, C) IFF C is equal to the top bit of the DemandMask |
| 635 | { |
| 636 | const APInt *C; |
| 637 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: C)) && |
| 638 | C->isOneBitSet(BitNo: DemandedMask.getActiveBits() - 1)) { |
| 639 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 640 | Builder.SetInsertPoint(I); |
| 641 | return Builder.CreateXor(LHS: I->getOperand(i: 0), RHS: ConstantInt::get(Ty: VTy, V: *C)); |
| 642 | } |
| 643 | } |
| 644 | |
| 645 | // Otherwise just compute the known bits of the result. |
| 646 | bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap(); |
| 647 | bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap(); |
| 648 | Known = KnownBits::add(LHS: LHSKnown, RHS: RHSKnown, NSW, NUW); |
| 649 | break; |
| 650 | } |
| 651 | case Instruction::Sub: { |
| 652 | // Right fill the mask of bits for the operands to demand the most |
| 653 | // significant bit and all those below it. |
| 654 | unsigned NLZ = DemandedMask.countl_zero(); |
| 655 | APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); |
| 656 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromOps) || |
| 657 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedFromOps, Known&: RHSKnown, Q, Depth: Depth + 1)) |
| 658 | return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); |
| 659 | |
| 660 | // If low order bits are not demanded and are known to be zero in RHS, |
| 661 | // then we don't need to demand them from LHS, since they can't cause a |
| 662 | // borrow from any bits that are demanded in the result. |
| 663 | unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one(); |
| 664 | APInt DemandedFromLHS = DemandedFromOps; |
| 665 | DemandedFromLHS.clearLowBits(loBits: NTZ); |
| 666 | if (ShrinkDemandedConstant(I, OpNo: 0, Demanded: DemandedFromLHS) || |
| 667 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromLHS, Known&: LHSKnown, Q, Depth: Depth + 1)) |
| 668 | return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); |
| 669 | |
| 670 | // If we are known to be subtracting zeros from every bit below |
| 671 | // the highest demanded bit, we just return the other side. |
| 672 | if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero)) |
| 673 | return I->getOperand(i: 0); |
| 674 | // We can't do this with the LHS for subtraction, unless we are only |
| 675 | // demanding the LSB. |
| 676 | if (DemandedFromOps.isOne() && DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero)) |
| 677 | return I->getOperand(i: 1); |
| 678 | |
| 679 | // Canonicalize sub mask, X -> ~X |
| 680 | const APInt *LHSC; |
| 681 | if (match(V: I->getOperand(i: 0), P: m_LowBitMask(V&: LHSC)) && |
| 682 | DemandedFromOps.isSubsetOf(RHS: *LHSC)) { |
| 683 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 684 | Builder.SetInsertPoint(I); |
| 685 | return Builder.CreateNot(V: I->getOperand(i: 1)); |
| 686 | } |
| 687 | |
| 688 | // Otherwise just compute the known bits of the result. |
| 689 | bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap(); |
| 690 | bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap(); |
| 691 | Known = KnownBits::sub(LHS: LHSKnown, RHS: RHSKnown, NSW, NUW); |
| 692 | break; |
| 693 | } |
| 694 | case Instruction::Mul: { |
| 695 | APInt DemandedFromOps; |
| 696 | if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps)) |
| 697 | return I; |
| 698 | |
| 699 | if (DemandedMask.isPowerOf2()) { |
| 700 | // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1. |
| 701 | // If we demand exactly one bit N and we have "X * (C' << N)" where C' is |
| 702 | // odd (has LSB set), then the left-shifted low bit of X is the answer. |
| 703 | unsigned CTZ = DemandedMask.countr_zero(); |
| 704 | const APInt *C; |
| 705 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: C)) && C->countr_zero() == CTZ) { |
| 706 | Constant *ShiftC = ConstantInt::get(Ty: VTy, V: CTZ); |
| 707 | Instruction *Shl = BinaryOperator::CreateShl(V1: I->getOperand(i: 0), V2: ShiftC); |
| 708 | return InsertNewInstWith(New: Shl, Old: I->getIterator()); |
| 709 | } |
| 710 | } |
| 711 | // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because: |
| 712 | // X * X is odd iff X is odd. |
| 713 | // 'Quadratic Reciprocity': X * X -> 0 for bit[1] |
| 714 | if (I->getOperand(i: 0) == I->getOperand(i: 1) && DemandedMask.ult(RHS: 4)) { |
| 715 | Constant *One = ConstantInt::get(Ty: VTy, V: 1); |
| 716 | Instruction *And1 = BinaryOperator::CreateAnd(V1: I->getOperand(i: 0), V2: One); |
| 717 | return InsertNewInstWith(New: And1, Old: I->getIterator()); |
| 718 | } |
| 719 | |
| 720 | llvm::computeKnownBits(V: I, Known, Q, Depth); |
| 721 | break; |
| 722 | } |
| 723 | case Instruction::Shl: { |
| 724 | const APInt *SA; |
| 725 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) { |
| 726 | const APInt *ShrAmt; |
| 727 | if (match(V: I->getOperand(i: 0), P: m_Shr(L: m_Value(), R: m_APInt(Res&: ShrAmt)))) |
| 728 | if (Instruction *Shr = dyn_cast<Instruction>(Val: I->getOperand(i: 0))) |
| 729 | if (Value *R = simplifyShrShlDemandedBits(Shr, ShrOp1: *ShrAmt, Shl: I, ShlOp1: *SA, |
| 730 | DemandedMask, Known)) |
| 731 | return R; |
| 732 | |
| 733 | // Do not simplify if shl is part of funnel-shift pattern |
| 734 | if (I->hasOneUse()) { |
| 735 | auto *Inst = dyn_cast<Instruction>(Val: I->user_back()); |
| 736 | if (Inst && Inst->getOpcode() == BinaryOperator::Or) { |
| 737 | if (auto Opt = convertOrOfShiftsToFunnelShift(Or&: *Inst)) { |
| 738 | auto [IID, FShiftArgs] = *Opt; |
| 739 | if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) && |
| 740 | FShiftArgs[0] == FShiftArgs[1]) { |
| 741 | llvm::computeKnownBits(V: I, Known, Q, Depth); |
| 742 | break; |
| 743 | } |
| 744 | } |
| 745 | } |
| 746 | } |
| 747 | |
| 748 | // We only want bits that already match the signbit then we don't |
| 749 | // need to shift. |
| 750 | uint64_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth - 1); |
| 751 | if (DemandedMask.countr_zero() >= ShiftAmt) { |
| 752 | if (I->hasNoSignedWrap()) { |
| 753 | unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero(); |
| 754 | unsigned SignBits = |
| 755 | ComputeNumSignBits(Op: I->getOperand(i: 0), CxtI: Q.CxtI, Depth: Depth + 1); |
| 756 | if (SignBits > ShiftAmt && SignBits - ShiftAmt >= NumHiDemandedBits) |
| 757 | return I->getOperand(i: 0); |
| 758 | } |
| 759 | |
| 760 | // If we can pre-shift a right-shifted constant to the left without |
| 761 | // losing any high bits and we don't demand the low bits, then eliminate |
| 762 | // the left-shift: |
| 763 | // (C >> X) << LeftShiftAmtC --> (C << LeftShiftAmtC) >> X |
| 764 | Value *X; |
| 765 | Constant *C; |
| 766 | if (match(V: I->getOperand(i: 0), P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) { |
| 767 | Constant *LeftShiftAmtC = ConstantInt::get(Ty: VTy, V: ShiftAmt); |
| 768 | Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: Instruction::Shl, LHS: C, |
| 769 | RHS: LeftShiftAmtC, DL); |
| 770 | if (ConstantFoldBinaryOpOperands(Opcode: Instruction::LShr, LHS: NewC, |
| 771 | RHS: LeftShiftAmtC, DL) == C) { |
| 772 | Instruction *Lshr = BinaryOperator::CreateLShr(V1: NewC, V2: X); |
| 773 | return InsertNewInstWith(New: Lshr, Old: I->getIterator()); |
| 774 | } |
| 775 | } |
| 776 | } |
| 777 | |
| 778 | APInt DemandedMaskIn(DemandedMask.lshr(shiftAmt: ShiftAmt)); |
| 779 | |
| 780 | // If the shift is NUW/NSW, then it does demand the high bits. |
| 781 | ShlOperator *IOp = cast<ShlOperator>(Val: I); |
| 782 | if (IOp->hasNoSignedWrap()) |
| 783 | DemandedMaskIn.setHighBits(ShiftAmt+1); |
| 784 | else if (IOp->hasNoUnsignedWrap()) |
| 785 | DemandedMaskIn.setHighBits(ShiftAmt); |
| 786 | |
| 787 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known, Q, Depth: Depth + 1)) |
| 788 | return I; |
| 789 | |
| 790 | Known = KnownBits::shl(LHS: Known, |
| 791 | RHS: KnownBits::makeConstant(C: APInt(BitWidth, ShiftAmt)), |
| 792 | /* NUW */ IOp->hasNoUnsignedWrap(), |
| 793 | /* NSW */ IOp->hasNoSignedWrap()); |
| 794 | } else { |
| 795 | // This is a variable shift, so we can't shift the demand mask by a known |
| 796 | // amount. But if we are not demanding high bits, then we are not |
| 797 | // demanding those bits from the pre-shifted operand either. |
| 798 | if (unsigned CTLZ = DemandedMask.countl_zero()) { |
| 799 | APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ)); |
| 800 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromOp, Known, Q, Depth: Depth + 1)) { |
| 801 | // We can't guarantee that nsw/nuw hold after simplifying the operand. |
| 802 | I->dropPoisonGeneratingFlags(); |
| 803 | return I; |
| 804 | } |
| 805 | } |
| 806 | llvm::computeKnownBits(V: I, Known, Q, Depth); |
| 807 | } |
| 808 | break; |
| 809 | } |
| 810 | case Instruction::LShr: { |
| 811 | const APInt *SA; |
| 812 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) { |
| 813 | uint64_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth-1); |
| 814 | |
| 815 | // Do not simplify if lshr is part of funnel-shift pattern |
| 816 | if (I->hasOneUse()) { |
| 817 | auto *Inst = dyn_cast<Instruction>(Val: I->user_back()); |
| 818 | if (Inst && Inst->getOpcode() == BinaryOperator::Or) { |
| 819 | if (auto Opt = convertOrOfShiftsToFunnelShift(Or&: *Inst)) { |
| 820 | auto [IID, FShiftArgs] = *Opt; |
| 821 | if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) && |
| 822 | FShiftArgs[0] == FShiftArgs[1]) { |
| 823 | llvm::computeKnownBits(V: I, Known, Q, Depth); |
| 824 | break; |
| 825 | } |
| 826 | } |
| 827 | } |
| 828 | } |
| 829 | |
| 830 | // If we are just demanding the shifted sign bit and below, then this can |
| 831 | // be treated as an ASHR in disguise. |
| 832 | if (DemandedMask.countl_zero() >= ShiftAmt) { |
| 833 | // If we only want bits that already match the signbit then we don't |
| 834 | // need to shift. |
| 835 | unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero(); |
| 836 | unsigned SignBits = |
| 837 | ComputeNumSignBits(Op: I->getOperand(i: 0), CxtI: Q.CxtI, Depth: Depth + 1); |
| 838 | if (SignBits >= NumHiDemandedBits) |
| 839 | return I->getOperand(i: 0); |
| 840 | |
| 841 | // If we can pre-shift a left-shifted constant to the right without |
| 842 | // losing any low bits (we already know we don't demand the high bits), |
| 843 | // then eliminate the right-shift: |
| 844 | // (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X |
| 845 | Value *X; |
| 846 | Constant *C; |
| 847 | if (match(V: I->getOperand(i: 0), P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X)))) { |
| 848 | Constant *RightShiftAmtC = ConstantInt::get(Ty: VTy, V: ShiftAmt); |
| 849 | Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: Instruction::LShr, LHS: C, |
| 850 | RHS: RightShiftAmtC, DL); |
| 851 | if (ConstantFoldBinaryOpOperands(Opcode: Instruction::Shl, LHS: NewC, |
| 852 | RHS: RightShiftAmtC, DL) == C) { |
| 853 | Instruction *Shl = BinaryOperator::CreateShl(V1: NewC, V2: X); |
| 854 | return InsertNewInstWith(New: Shl, Old: I->getIterator()); |
| 855 | } |
| 856 | } |
| 857 | |
| 858 | const APInt *Factor; |
| 859 | if (match(V: I->getOperand(i: 0), |
| 860 | P: m_OneUse(SubPattern: m_Mul(L: m_Value(V&: X), R: m_APInt(Res&: Factor)))) && |
| 861 | Factor->countr_zero() >= ShiftAmt) { |
| 862 | BinaryOperator *Mul = BinaryOperator::CreateMul( |
| 863 | V1: X, V2: ConstantInt::get(Ty: X->getType(), V: Factor->lshr(shiftAmt: ShiftAmt))); |
| 864 | return InsertNewInstWith(New: Mul, Old: I->getIterator()); |
| 865 | } |
| 866 | } |
| 867 | |
| 868 | // Unsigned shift right. |
| 869 | APInt DemandedMaskIn(DemandedMask.shl(shiftAmt: ShiftAmt)); |
| 870 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known, Q, Depth: Depth + 1)) { |
| 871 | // exact flag may not longer hold. |
| 872 | I->dropPoisonGeneratingFlags(); |
| 873 | return I; |
| 874 | } |
| 875 | Known >>= ShiftAmt; |
| 876 | if (ShiftAmt) |
| 877 | Known.Zero.setHighBits(ShiftAmt); // high bits known zero. |
| 878 | break; |
| 879 | } |
| 880 | if (Value *V = |
| 881 | simplifyShiftSelectingPackedElement(I, DemandedMask, IC&: *this, Depth)) |
| 882 | return V; |
| 883 | |
| 884 | llvm::computeKnownBits(V: I, Known, Q, Depth); |
| 885 | break; |
| 886 | } |
| 887 | case Instruction::AShr: { |
| 888 | unsigned SignBits = ComputeNumSignBits(Op: I->getOperand(i: 0), CxtI: Q.CxtI, Depth: Depth + 1); |
| 889 | |
| 890 | // If we only want bits that already match the signbit then we don't need |
| 891 | // to shift. |
| 892 | unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero(); |
| 893 | if (SignBits >= NumHiDemandedBits) |
| 894 | return I->getOperand(i: 0); |
| 895 | |
| 896 | // If this is an arithmetic shift right and only the low-bit is set, we can |
| 897 | // always convert this into a logical shr, even if the shift amount is |
| 898 | // variable. The low bit of the shift cannot be an input sign bit unless |
| 899 | // the shift amount is >= the size of the datatype, which is undefined. |
| 900 | if (DemandedMask.isOne()) { |
| 901 | // Perform the logical shift right. |
| 902 | Instruction *NewVal = BinaryOperator::CreateLShr( |
| 903 | V1: I->getOperand(i: 0), V2: I->getOperand(i: 1), Name: I->getName()); |
| 904 | return InsertNewInstWith(New: NewVal, Old: I->getIterator()); |
| 905 | } |
| 906 | |
| 907 | const APInt *SA; |
| 908 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) { |
| 909 | uint32_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth-1); |
| 910 | |
| 911 | // Signed shift right. |
| 912 | APInt DemandedMaskIn(DemandedMask.shl(shiftAmt: ShiftAmt)); |
| 913 | // If any of the bits being shifted in are demanded, then we should set |
| 914 | // the sign bit as demanded. |
| 915 | bool ShiftedInBitsDemanded = DemandedMask.countl_zero() < ShiftAmt; |
| 916 | if (ShiftedInBitsDemanded) |
| 917 | DemandedMaskIn.setSignBit(); |
| 918 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known, Q, Depth: Depth + 1)) { |
| 919 | // exact flag may not longer hold. |
| 920 | I->dropPoisonGeneratingFlags(); |
| 921 | return I; |
| 922 | } |
| 923 | |
| 924 | // If the input sign bit is known to be zero, or if none of the shifted in |
| 925 | // bits are demanded, turn this into an unsigned shift right. |
| 926 | if (Known.Zero[BitWidth - 1] || !ShiftedInBitsDemanded) { |
| 927 | BinaryOperator *LShr = BinaryOperator::CreateLShr(V1: I->getOperand(i: 0), |
| 928 | V2: I->getOperand(i: 1)); |
| 929 | LShr->setIsExact(cast<BinaryOperator>(Val: I)->isExact()); |
| 930 | LShr->takeName(V: I); |
| 931 | return InsertNewInstWith(New: LShr, Old: I->getIterator()); |
| 932 | } |
| 933 | |
| 934 | Known = KnownBits::ashr( |
| 935 | LHS: Known, RHS: KnownBits::makeConstant(C: APInt(BitWidth, ShiftAmt)), |
| 936 | ShAmtNonZero: ShiftAmt != 0, Exact: I->isExact()); |
| 937 | } else { |
| 938 | llvm::computeKnownBits(V: I, Known, Q, Depth); |
| 939 | } |
| 940 | break; |
| 941 | } |
| 942 | case Instruction::UDiv: { |
| 943 | // UDiv doesn't demand low bits that are zero in the divisor. |
| 944 | const APInt *SA; |
| 945 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) { |
| 946 | // TODO: Take the demanded mask of the result into account. |
| 947 | unsigned RHSTrailingZeros = SA->countr_zero(); |
| 948 | APInt DemandedMaskIn = |
| 949 | APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - RHSTrailingZeros); |
| 950 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known&: LHSKnown, Q, Depth: Depth + 1)) { |
| 951 | // We can't guarantee that "exact" is still true after changing the |
| 952 | // the dividend. |
| 953 | I->dropPoisonGeneratingFlags(); |
| 954 | return I; |
| 955 | } |
| 956 | |
| 957 | Known = KnownBits::udiv(LHS: LHSKnown, RHS: KnownBits::makeConstant(C: *SA), |
| 958 | Exact: cast<BinaryOperator>(Val: I)->isExact()); |
| 959 | } else { |
| 960 | llvm::computeKnownBits(V: I, Known, Q, Depth); |
| 961 | } |
| 962 | break; |
| 963 | } |
| 964 | case Instruction::SRem: { |
| 965 | const APInt *Rem; |
| 966 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: Rem)) && Rem->isPowerOf2()) { |
| 967 | if (DemandedMask.ult(RHS: *Rem)) // srem won't affect demanded bits |
| 968 | return I->getOperand(i: 0); |
| 969 | |
| 970 | APInt LowBits = *Rem - 1; |
| 971 | APInt Mask2 = LowBits | APInt::getSignMask(BitWidth); |
| 972 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: Mask2, Known&: LHSKnown, Q, Depth: Depth + 1)) |
| 973 | return I; |
| 974 | Known = KnownBits::srem(LHS: LHSKnown, RHS: KnownBits::makeConstant(C: *Rem)); |
| 975 | break; |
| 976 | } |
| 977 | |
| 978 | llvm::computeKnownBits(V: I, Known, Q, Depth); |
| 979 | break; |
| 980 | } |
| 981 | case Instruction::Call: { |
| 982 | bool KnownBitsComputed = false; |
| 983 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
| 984 | switch (II->getIntrinsicID()) { |
| 985 | case Intrinsic::abs: { |
| 986 | if (DemandedMask == 1) |
| 987 | return II->getArgOperand(i: 0); |
| 988 | break; |
| 989 | } |
| 990 | case Intrinsic::ctpop: { |
| 991 | // Checking if the number of clear bits is odd (parity)? If the type has |
| 992 | // an even number of bits, that's the same as checking if the number of |
| 993 | // set bits is odd, so we can eliminate the 'not' op. |
| 994 | Value *X; |
| 995 | if (DemandedMask == 1 && VTy->getScalarSizeInBits() % 2 == 0 && |
| 996 | match(V: II->getArgOperand(i: 0), P: m_Not(V: m_Value(V&: X)))) { |
| 997 | Function *Ctpop = Intrinsic::getOrInsertDeclaration( |
| 998 | M: II->getModule(), id: Intrinsic::ctpop, Tys: VTy); |
| 999 | return InsertNewInstWith(New: CallInst::Create(Func: Ctpop, Args: {X}), Old: I->getIterator()); |
| 1000 | } |
| 1001 | break; |
| 1002 | } |
| 1003 | case Intrinsic::bswap: { |
| 1004 | // If the only bits demanded come from one byte of the bswap result, |
| 1005 | // just shift the input byte into position to eliminate the bswap. |
| 1006 | unsigned NLZ = DemandedMask.countl_zero(); |
| 1007 | unsigned NTZ = DemandedMask.countr_zero(); |
| 1008 | |
| 1009 | // Round NTZ down to the next byte. If we have 11 trailing zeros, then |
| 1010 | // we need all the bits down to bit 8. Likewise, round NLZ. If we |
| 1011 | // have 14 leading zeros, round to 8. |
| 1012 | NLZ = alignDown(Value: NLZ, Align: 8); |
| 1013 | NTZ = alignDown(Value: NTZ, Align: 8); |
| 1014 | // If we need exactly one byte, we can do this transformation. |
| 1015 | if (BitWidth - NLZ - NTZ == 8) { |
| 1016 | // Replace this with either a left or right shift to get the byte into |
| 1017 | // the right place. |
| 1018 | Instruction *NewVal; |
| 1019 | if (NLZ > NTZ) |
| 1020 | NewVal = BinaryOperator::CreateLShr( |
| 1021 | V1: II->getArgOperand(i: 0), V2: ConstantInt::get(Ty: VTy, V: NLZ - NTZ)); |
| 1022 | else |
| 1023 | NewVal = BinaryOperator::CreateShl( |
| 1024 | V1: II->getArgOperand(i: 0), V2: ConstantInt::get(Ty: VTy, V: NTZ - NLZ)); |
| 1025 | NewVal->takeName(V: I); |
| 1026 | return InsertNewInstWith(New: NewVal, Old: I->getIterator()); |
| 1027 | } |
| 1028 | break; |
| 1029 | } |
| 1030 | case Intrinsic::ptrmask: { |
| 1031 | unsigned MaskWidth = I->getOperand(i: 1)->getType()->getScalarSizeInBits(); |
| 1032 | RHSKnown = KnownBits(MaskWidth); |
| 1033 | // If either the LHS or the RHS are Zero, the result is zero. |
| 1034 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask, Known&: LHSKnown, Q, Depth: Depth + 1) || |
| 1035 | SimplifyDemandedBits( |
| 1036 | I, OpNo: 1, DemandedMask: (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(width: MaskWidth), |
| 1037 | Known&: RHSKnown, Q, Depth: Depth + 1)) |
| 1038 | return I; |
| 1039 | |
| 1040 | // TODO: Should be 1-extend |
| 1041 | RHSKnown = RHSKnown.anyextOrTrunc(BitWidth); |
| 1042 | |
| 1043 | Known = LHSKnown & RHSKnown; |
| 1044 | KnownBitsComputed = true; |
| 1045 | |
| 1046 | // If the client is only demanding bits we know to be zero, return |
| 1047 | // `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer |
| 1048 | // provenance, but making the mask zero will be easily optimizable in |
| 1049 | // the backend. |
| 1050 | if (DemandedMask.isSubsetOf(RHS: Known.Zero) && |
| 1051 | !match(V: I->getOperand(i: 1), P: m_Zero())) |
| 1052 | return replaceOperand( |
| 1053 | I&: *I, OpNum: 1, V: Constant::getNullValue(Ty: I->getOperand(i: 1)->getType())); |
| 1054 | |
| 1055 | // Mask in demanded space does nothing. |
| 1056 | // NOTE: We may have attributes associated with the return value of the |
| 1057 | // llvm.ptrmask intrinsic that will be lost when we just return the |
| 1058 | // operand. We should try to preserve them. |
| 1059 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero)) |
| 1060 | return I->getOperand(i: 0); |
| 1061 | |
| 1062 | // If the RHS is a constant, see if we can simplify it. |
| 1063 | if (ShrinkDemandedConstant( |
| 1064 | I, OpNo: 1, Demanded: (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(width: MaskWidth))) |
| 1065 | return I; |
| 1066 | |
| 1067 | // Combine: |
| 1068 | // (ptrmask (getelementptr i8, ptr p, imm i), imm mask) |
| 1069 | // -> (ptrmask (getelementptr i8, ptr p, imm (i & mask)), imm mask) |
| 1070 | // where only the low bits known to be zero in the pointer are changed |
| 1071 | Value *InnerPtr; |
| 1072 | uint64_t GEPIndex; |
| 1073 | uint64_t PtrMaskImmediate; |
| 1074 | if (match(V: I, P: m_Intrinsic<Intrinsic::ptrmask>( |
| 1075 | Op0: m_PtrAdd(PointerOp: m_Value(V&: InnerPtr), OffsetOp: m_ConstantInt(V&: GEPIndex)), |
| 1076 | Op1: m_ConstantInt(V&: PtrMaskImmediate)))) { |
| 1077 | |
| 1078 | LHSKnown = computeKnownBits(V: InnerPtr, CxtI: I, Depth: Depth + 1); |
| 1079 | if (!LHSKnown.isZero()) { |
| 1080 | const unsigned trailingZeros = LHSKnown.countMinTrailingZeros(); |
| 1081 | uint64_t PointerAlignBits = (uint64_t(1) << trailingZeros) - 1; |
| 1082 | |
| 1083 | uint64_t HighBitsGEPIndex = GEPIndex & ~PointerAlignBits; |
| 1084 | uint64_t MaskedLowBitsGEPIndex = |
| 1085 | GEPIndex & PointerAlignBits & PtrMaskImmediate; |
| 1086 | |
| 1087 | uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex; |
| 1088 | |
| 1089 | if (MaskedGEPIndex != GEPIndex) { |
| 1090 | auto *GEP = cast<GEPOperator>(Val: II->getArgOperand(i: 0)); |
| 1091 | Builder.SetInsertPoint(I); |
| 1092 | Type *GEPIndexType = |
| 1093 | DL.getIndexType(PtrTy: GEP->getPointerOperand()->getType()); |
| 1094 | Value *MaskedGEP = Builder.CreateGEP( |
| 1095 | Ty: GEP->getSourceElementType(), Ptr: InnerPtr, |
| 1096 | IdxList: ConstantInt::get(Ty: GEPIndexType, V: MaskedGEPIndex), |
| 1097 | Name: GEP->getName(), NW: GEP->isInBounds()); |
| 1098 | |
| 1099 | replaceOperand(I&: *I, OpNum: 0, V: MaskedGEP); |
| 1100 | return I; |
| 1101 | } |
| 1102 | } |
| 1103 | } |
| 1104 | |
| 1105 | break; |
| 1106 | } |
| 1107 | |
| 1108 | case Intrinsic::fshr: |
| 1109 | case Intrinsic::fshl: { |
| 1110 | const APInt *SA; |
| 1111 | if (!match(V: I->getOperand(i: 2), P: m_APInt(Res&: SA))) |
| 1112 | break; |
| 1113 | |
| 1114 | // Normalize to funnel shift left. APInt shifts of BitWidth are well- |
| 1115 | // defined, so no need to special-case zero shifts here. |
| 1116 | uint64_t ShiftAmt = SA->urem(RHS: BitWidth); |
| 1117 | if (II->getIntrinsicID() == Intrinsic::fshr) |
| 1118 | ShiftAmt = BitWidth - ShiftAmt; |
| 1119 | |
| 1120 | APInt DemandedMaskLHS(DemandedMask.lshr(shiftAmt: ShiftAmt)); |
| 1121 | APInt DemandedMaskRHS(DemandedMask.shl(shiftAmt: BitWidth - ShiftAmt)); |
| 1122 | if (I->getOperand(i: 0) != I->getOperand(i: 1)) { |
| 1123 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskLHS, Known&: LHSKnown, Q, |
| 1124 | Depth: Depth + 1) || |
| 1125 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedMaskRHS, Known&: RHSKnown, Q, |
| 1126 | Depth: Depth + 1)) { |
| 1127 | // Range attribute or metadata may no longer hold. |
| 1128 | I->dropPoisonGeneratingAnnotations(); |
| 1129 | return I; |
| 1130 | } |
| 1131 | } else { // fshl is a rotate |
| 1132 | // Avoid converting rotate into funnel shift. |
| 1133 | // Only simplify if one operand is constant. |
| 1134 | LHSKnown = computeKnownBits(V: I->getOperand(i: 0), CxtI: I, Depth: Depth + 1); |
| 1135 | if (DemandedMaskLHS.isSubsetOf(RHS: LHSKnown.Zero | LHSKnown.One) && |
| 1136 | !match(V: I->getOperand(i: 0), P: m_SpecificInt(V: LHSKnown.One))) { |
| 1137 | replaceOperand(I&: *I, OpNum: 0, V: Constant::getIntegerValue(Ty: VTy, V: LHSKnown.One)); |
| 1138 | return I; |
| 1139 | } |
| 1140 | |
| 1141 | RHSKnown = computeKnownBits(V: I->getOperand(i: 1), CxtI: I, Depth: Depth + 1); |
| 1142 | if (DemandedMaskRHS.isSubsetOf(RHS: RHSKnown.Zero | RHSKnown.One) && |
| 1143 | !match(V: I->getOperand(i: 1), P: m_SpecificInt(V: RHSKnown.One))) { |
| 1144 | replaceOperand(I&: *I, OpNum: 1, V: Constant::getIntegerValue(Ty: VTy, V: RHSKnown.One)); |
| 1145 | return I; |
| 1146 | } |
| 1147 | } |
| 1148 | |
| 1149 | LHSKnown <<= ShiftAmt; |
| 1150 | RHSKnown >>= BitWidth - ShiftAmt; |
| 1151 | Known = LHSKnown.unionWith(RHS: RHSKnown); |
| 1152 | KnownBitsComputed = true; |
| 1153 | break; |
| 1154 | } |
| 1155 | case Intrinsic::umax: { |
| 1156 | // UMax(A, C) == A if ... |
| 1157 | // The lowest non-zero bit of DemandMask is higher than the highest |
| 1158 | // non-zero bit of C. |
| 1159 | const APInt *C; |
| 1160 | unsigned CTZ = DemandedMask.countr_zero(); |
| 1161 | if (match(V: II->getArgOperand(i: 1), P: m_APInt(Res&: C)) && |
| 1162 | CTZ >= C->getActiveBits()) |
| 1163 | return II->getArgOperand(i: 0); |
| 1164 | break; |
| 1165 | } |
| 1166 | case Intrinsic::umin: { |
| 1167 | // UMin(A, C) == A if ... |
| 1168 | // The lowest non-zero bit of DemandMask is higher than the highest |
| 1169 | // non-one bit of C. |
| 1170 | // This comes from using DeMorgans on the above umax example. |
| 1171 | const APInt *C; |
| 1172 | unsigned CTZ = DemandedMask.countr_zero(); |
| 1173 | if (match(V: II->getArgOperand(i: 1), P: m_APInt(Res&: C)) && |
| 1174 | CTZ >= C->getBitWidth() - C->countl_one()) |
| 1175 | return II->getArgOperand(i: 0); |
| 1176 | break; |
| 1177 | } |
| 1178 | default: { |
| 1179 | // Handle target specific intrinsics |
| 1180 | std::optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic( |
| 1181 | II&: *II, DemandedMask, Known, KnownBitsComputed); |
| 1182 | if (V) |
| 1183 | return *V; |
| 1184 | break; |
| 1185 | } |
| 1186 | } |
| 1187 | } |
| 1188 | |
| 1189 | if (!KnownBitsComputed) |
| 1190 | llvm::computeKnownBits(V: I, Known, Q, Depth); |
| 1191 | break; |
| 1192 | } |
| 1193 | } |
| 1194 | |
| 1195 | if (I->getType()->isPointerTy()) { |
| 1196 | Align Alignment = I->getPointerAlignment(DL); |
| 1197 | Known.Zero.setLowBits(Log2(A: Alignment)); |
| 1198 | } |
| 1199 | |
| 1200 | // If the client is only demanding bits that we know, return the known |
| 1201 | // constant. We can't directly simplify pointers as a constant because of |
| 1202 | // pointer provenance. |
| 1203 | // TODO: We could return `(inttoptr const)` for pointers. |
| 1204 | if (!I->getType()->isPointerTy() && |
| 1205 | DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
| 1206 | return Constant::getIntegerValue(Ty: VTy, V: Known.One); |
| 1207 | |
| 1208 | if (VerifyKnownBits) { |
| 1209 | KnownBits ReferenceKnown = llvm::computeKnownBits(V: I, Q, Depth); |
| 1210 | if (Known != ReferenceKnown) { |
| 1211 | errs() << "Mismatched known bits for " << *I << " in " |
| 1212 | << I->getFunction()->getName() << "\n" ; |
| 1213 | errs() << "computeKnownBits(): " << ReferenceKnown << "\n" ; |
| 1214 | errs() << "SimplifyDemandedBits(): " << Known << "\n" ; |
| 1215 | std::abort(); |
| 1216 | } |
| 1217 | } |
| 1218 | |
| 1219 | return nullptr; |
| 1220 | } |
| 1221 | |
| 1222 | /// Helper routine of SimplifyDemandedUseBits. It computes Known |
| 1223 | /// bits. It also tries to handle simplifications that can be done based on |
| 1224 | /// DemandedMask, but without modifying the Instruction. |
| 1225 | Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( |
| 1226 | Instruction *I, const APInt &DemandedMask, KnownBits &Known, |
| 1227 | const SimplifyQuery &Q, unsigned Depth) { |
| 1228 | unsigned BitWidth = DemandedMask.getBitWidth(); |
| 1229 | Type *ITy = I->getType(); |
| 1230 | |
| 1231 | KnownBits LHSKnown(BitWidth); |
| 1232 | KnownBits RHSKnown(BitWidth); |
| 1233 | |
| 1234 | // Despite the fact that we can't simplify this instruction in all User's |
| 1235 | // context, we can at least compute the known bits, and we can |
| 1236 | // do simplifications that apply to *just* the one user if we know that |
| 1237 | // this instruction has a simpler value in that context. |
| 1238 | switch (I->getOpcode()) { |
| 1239 | case Instruction::And: { |
| 1240 | llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1); |
| 1241 | llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1); |
| 1242 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, |
| 1243 | SQ: Q, Depth); |
| 1244 | computeKnownBitsFromContext(V: I, Known, Q, Depth); |
| 1245 | |
| 1246 | // If the client is only demanding bits that we know, return the known |
| 1247 | // constant. |
| 1248 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
| 1249 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); |
| 1250 | |
| 1251 | // If all of the demanded bits are known 1 on one side, return the other. |
| 1252 | // These bits cannot contribute to the result of the 'and' in this context. |
| 1253 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One)) |
| 1254 | return I->getOperand(i: 0); |
| 1255 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One)) |
| 1256 | return I->getOperand(i: 1); |
| 1257 | |
| 1258 | break; |
| 1259 | } |
| 1260 | case Instruction::Or: { |
| 1261 | llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1); |
| 1262 | llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1); |
| 1263 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, |
| 1264 | SQ: Q, Depth); |
| 1265 | computeKnownBitsFromContext(V: I, Known, Q, Depth); |
| 1266 | |
| 1267 | // If the client is only demanding bits that we know, return the known |
| 1268 | // constant. |
| 1269 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
| 1270 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); |
| 1271 | |
| 1272 | // We can simplify (X|Y) -> X or Y in the user's context if we know that |
| 1273 | // only bits from X or Y are demanded. |
| 1274 | // If all of the demanded bits are known zero on one side, return the other. |
| 1275 | // These bits cannot contribute to the result of the 'or' in this context. |
| 1276 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero)) |
| 1277 | return I->getOperand(i: 0); |
| 1278 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero)) |
| 1279 | return I->getOperand(i: 1); |
| 1280 | |
| 1281 | break; |
| 1282 | } |
| 1283 | case Instruction::Xor: { |
| 1284 | llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1); |
| 1285 | llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1); |
| 1286 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, |
| 1287 | SQ: Q, Depth); |
| 1288 | computeKnownBitsFromContext(V: I, Known, Q, Depth); |
| 1289 | |
| 1290 | // If the client is only demanding bits that we know, return the known |
| 1291 | // constant. |
| 1292 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
| 1293 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); |
| 1294 | |
| 1295 | // We can simplify (X^Y) -> X or Y in the user's context if we know that |
| 1296 | // only bits from X or Y are demanded. |
| 1297 | // If all of the demanded bits are known zero on one side, return the other. |
| 1298 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero)) |
| 1299 | return I->getOperand(i: 0); |
| 1300 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero)) |
| 1301 | return I->getOperand(i: 1); |
| 1302 | |
| 1303 | break; |
| 1304 | } |
| 1305 | case Instruction::Add: { |
| 1306 | unsigned NLZ = DemandedMask.countl_zero(); |
| 1307 | APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); |
| 1308 | |
| 1309 | // If an operand adds zeros to every bit below the highest demanded bit, |
| 1310 | // that operand doesn't change the result. Return the other side. |
| 1311 | llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1); |
| 1312 | if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero)) |
| 1313 | return I->getOperand(i: 0); |
| 1314 | |
| 1315 | llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1); |
| 1316 | if (DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero)) |
| 1317 | return I->getOperand(i: 1); |
| 1318 | |
| 1319 | bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap(); |
| 1320 | bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap(); |
| 1321 | Known = KnownBits::add(LHS: LHSKnown, RHS: RHSKnown, NSW, NUW); |
| 1322 | computeKnownBitsFromContext(V: I, Known, Q, Depth); |
| 1323 | break; |
| 1324 | } |
| 1325 | case Instruction::Sub: { |
| 1326 | unsigned NLZ = DemandedMask.countl_zero(); |
| 1327 | APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); |
| 1328 | |
| 1329 | // If an operand subtracts zeros from every bit below the highest demanded |
| 1330 | // bit, that operand doesn't change the result. Return the other side. |
| 1331 | llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1); |
| 1332 | if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero)) |
| 1333 | return I->getOperand(i: 0); |
| 1334 | |
| 1335 | bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap(); |
| 1336 | bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap(); |
| 1337 | llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1); |
| 1338 | Known = KnownBits::sub(LHS: LHSKnown, RHS: RHSKnown, NSW, NUW); |
| 1339 | computeKnownBitsFromContext(V: I, Known, Q, Depth); |
| 1340 | break; |
| 1341 | } |
| 1342 | case Instruction::AShr: { |
| 1343 | // Compute the Known bits to simplify things downstream. |
| 1344 | llvm::computeKnownBits(V: I, Known, Q, Depth); |
| 1345 | |
| 1346 | // If this user is only demanding bits that we know, return the known |
| 1347 | // constant. |
| 1348 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) |
| 1349 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); |
| 1350 | |
| 1351 | // If the right shift operand 0 is a result of a left shift by the same |
| 1352 | // amount, this is probably a zero/sign extension, which may be unnecessary, |
| 1353 | // if we do not demand any of the new sign bits. So, return the original |
| 1354 | // operand instead. |
| 1355 | const APInt *ShiftRC; |
| 1356 | const APInt *ShiftLC; |
| 1357 | Value *X; |
| 1358 | unsigned BitWidth = DemandedMask.getBitWidth(); |
| 1359 | if (match(V: I, |
| 1360 | P: m_AShr(L: m_Shl(L: m_Value(V&: X), R: m_APInt(Res&: ShiftLC)), R: m_APInt(Res&: ShiftRC))) && |
| 1361 | ShiftLC == ShiftRC && ShiftLC->ult(RHS: BitWidth) && |
| 1362 | DemandedMask.isSubsetOf(RHS: APInt::getLowBitsSet( |
| 1363 | numBits: BitWidth, loBitsSet: BitWidth - ShiftRC->getZExtValue()))) { |
| 1364 | return X; |
| 1365 | } |
| 1366 | |
| 1367 | break; |
| 1368 | } |
| 1369 | default: |
| 1370 | // Compute the Known bits to simplify things downstream. |
| 1371 | llvm::computeKnownBits(V: I, Known, Q, Depth); |
| 1372 | |
| 1373 | // If this user is only demanding bits that we know, return the known |
| 1374 | // constant. |
| 1375 | if (DemandedMask.isSubsetOf(RHS: Known.Zero|Known.One)) |
| 1376 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); |
| 1377 | |
| 1378 | break; |
| 1379 | } |
| 1380 | |
| 1381 | return nullptr; |
| 1382 | } |
| 1383 | |
| 1384 | /// Helper routine of SimplifyDemandedUseBits. It tries to simplify |
| 1385 | /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into |
| 1386 | /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign |
| 1387 | /// of "C2-C1". |
| 1388 | /// |
| 1389 | /// Suppose E1 and E2 are generally different in bits S={bm, bm+1, |
| 1390 | /// ..., bn}, without considering the specific value X is holding. |
| 1391 | /// This transformation is legal iff one of following conditions is hold: |
| 1392 | /// 1) All the bit in S are 0, in this case E1 == E2. |
| 1393 | /// 2) We don't care those bits in S, per the input DemandedMask. |
| 1394 | /// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the |
| 1395 | /// rest bits. |
| 1396 | /// |
| 1397 | /// Currently we only test condition 2). |
| 1398 | /// |
| 1399 | /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was |
| 1400 | /// not successful. |
| 1401 | Value *InstCombinerImpl::simplifyShrShlDemandedBits( |
| 1402 | Instruction *Shr, const APInt &ShrOp1, Instruction *Shl, |
| 1403 | const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) { |
| 1404 | if (!ShlOp1 || !ShrOp1) |
| 1405 | return nullptr; // No-op. |
| 1406 | |
| 1407 | Value *VarX = Shr->getOperand(i: 0); |
| 1408 | Type *Ty = VarX->getType(); |
| 1409 | unsigned BitWidth = Ty->getScalarSizeInBits(); |
| 1410 | if (ShlOp1.uge(RHS: BitWidth) || ShrOp1.uge(RHS: BitWidth)) |
| 1411 | return nullptr; // Undef. |
| 1412 | |
| 1413 | unsigned ShlAmt = ShlOp1.getZExtValue(); |
| 1414 | unsigned ShrAmt = ShrOp1.getZExtValue(); |
| 1415 | |
| 1416 | Known.One.clearAllBits(); |
| 1417 | Known.Zero.setLowBits(ShlAmt - 1); |
| 1418 | Known.Zero &= DemandedMask; |
| 1419 | |
| 1420 | APInt BitMask1(APInt::getAllOnes(numBits: BitWidth)); |
| 1421 | APInt BitMask2(APInt::getAllOnes(numBits: BitWidth)); |
| 1422 | |
| 1423 | bool isLshr = (Shr->getOpcode() == Instruction::LShr); |
| 1424 | BitMask1 = isLshr ? (BitMask1.lshr(shiftAmt: ShrAmt) << ShlAmt) : |
| 1425 | (BitMask1.ashr(ShiftAmt: ShrAmt) << ShlAmt); |
| 1426 | |
| 1427 | if (ShrAmt <= ShlAmt) { |
| 1428 | BitMask2 <<= (ShlAmt - ShrAmt); |
| 1429 | } else { |
| 1430 | BitMask2 = isLshr ? BitMask2.lshr(shiftAmt: ShrAmt - ShlAmt): |
| 1431 | BitMask2.ashr(ShiftAmt: ShrAmt - ShlAmt); |
| 1432 | } |
| 1433 | |
| 1434 | // Check if condition-2 (see the comment to this function) is satified. |
| 1435 | if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) { |
| 1436 | if (ShrAmt == ShlAmt) |
| 1437 | return VarX; |
| 1438 | |
| 1439 | if (!Shr->hasOneUse()) |
| 1440 | return nullptr; |
| 1441 | |
| 1442 | BinaryOperator *New; |
| 1443 | if (ShrAmt < ShlAmt) { |
| 1444 | Constant *Amt = ConstantInt::get(Ty: VarX->getType(), V: ShlAmt - ShrAmt); |
| 1445 | New = BinaryOperator::CreateShl(V1: VarX, V2: Amt); |
| 1446 | BinaryOperator *Orig = cast<BinaryOperator>(Val: Shl); |
| 1447 | New->setHasNoSignedWrap(Orig->hasNoSignedWrap()); |
| 1448 | New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap()); |
| 1449 | } else { |
| 1450 | Constant *Amt = ConstantInt::get(Ty: VarX->getType(), V: ShrAmt - ShlAmt); |
| 1451 | New = isLshr ? BinaryOperator::CreateLShr(V1: VarX, V2: Amt) : |
| 1452 | BinaryOperator::CreateAShr(V1: VarX, V2: Amt); |
| 1453 | if (cast<BinaryOperator>(Val: Shr)->isExact()) |
| 1454 | New->setIsExact(true); |
| 1455 | } |
| 1456 | |
| 1457 | return InsertNewInstWith(New, Old: Shl->getIterator()); |
| 1458 | } |
| 1459 | |
| 1460 | return nullptr; |
| 1461 | } |
| 1462 | |
| 1463 | /// The specified value produces a vector with any number of elements. |
| 1464 | /// This method analyzes which elements of the operand are poison and |
| 1465 | /// returns that information in PoisonElts. |
| 1466 | /// |
| 1467 | /// DemandedElts contains the set of elements that are actually used by the |
| 1468 | /// caller, and by default (AllowMultipleUsers equals false) the value is |
| 1469 | /// simplified only if it has a single caller. If AllowMultipleUsers is set |
| 1470 | /// to true, DemandedElts refers to the union of sets of elements that are |
| 1471 | /// used by all callers. |
| 1472 | /// |
| 1473 | /// If the information about demanded elements can be used to simplify the |
| 1474 | /// operation, the operation is simplified, then the resultant value is |
| 1475 | /// returned. This returns null if no change was made. |
| 1476 | Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, |
| 1477 | APInt DemandedElts, |
| 1478 | APInt &PoisonElts, |
| 1479 | unsigned Depth, |
| 1480 | bool AllowMultipleUsers) { |
| 1481 | // Cannot analyze scalable type. The number of vector elements is not a |
| 1482 | // compile-time constant. |
| 1483 | if (isa<ScalableVectorType>(Val: V->getType())) |
| 1484 | return nullptr; |
| 1485 | |
| 1486 | unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements(); |
| 1487 | APInt EltMask(APInt::getAllOnes(numBits: VWidth)); |
| 1488 | assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!" ); |
| 1489 | |
| 1490 | if (match(V, P: m_Poison())) { |
| 1491 | // If the entire vector is poison, just return this info. |
| 1492 | PoisonElts = EltMask; |
| 1493 | return nullptr; |
| 1494 | } |
| 1495 | |
| 1496 | if (DemandedElts.isZero()) { // If nothing is demanded, provide poison. |
| 1497 | PoisonElts = EltMask; |
| 1498 | return PoisonValue::get(T: V->getType()); |
| 1499 | } |
| 1500 | |
| 1501 | PoisonElts = 0; |
| 1502 | |
| 1503 | if (auto *C = dyn_cast<Constant>(Val: V)) { |
| 1504 | // Check if this is identity. If so, return 0 since we are not simplifying |
| 1505 | // anything. |
| 1506 | if (DemandedElts.isAllOnes()) |
| 1507 | return nullptr; |
| 1508 | |
| 1509 | Type *EltTy = cast<VectorType>(Val: V->getType())->getElementType(); |
| 1510 | Constant *Poison = PoisonValue::get(T: EltTy); |
| 1511 | SmallVector<Constant*, 16> Elts; |
| 1512 | for (unsigned i = 0; i != VWidth; ++i) { |
| 1513 | if (!DemandedElts[i]) { // If not demanded, set to poison. |
| 1514 | Elts.push_back(Elt: Poison); |
| 1515 | PoisonElts.setBit(i); |
| 1516 | continue; |
| 1517 | } |
| 1518 | |
| 1519 | Constant *Elt = C->getAggregateElement(Elt: i); |
| 1520 | if (!Elt) return nullptr; |
| 1521 | |
| 1522 | Elts.push_back(Elt); |
| 1523 | if (isa<PoisonValue>(Val: Elt)) // Already poison. |
| 1524 | PoisonElts.setBit(i); |
| 1525 | } |
| 1526 | |
| 1527 | // If we changed the constant, return it. |
| 1528 | Constant *NewCV = ConstantVector::get(V: Elts); |
| 1529 | return NewCV != C ? NewCV : nullptr; |
| 1530 | } |
| 1531 | |
| 1532 | // Limit search depth. |
| 1533 | if (Depth == SimplifyDemandedVectorEltsDepthLimit) |
| 1534 | return nullptr; |
| 1535 | |
| 1536 | if (!AllowMultipleUsers) { |
| 1537 | // If multiple users are using the root value, proceed with |
| 1538 | // simplification conservatively assuming that all elements |
| 1539 | // are needed. |
| 1540 | if (!V->hasOneUse()) { |
| 1541 | // Quit if we find multiple users of a non-root value though. |
| 1542 | // They'll be handled when it's their turn to be visited by |
| 1543 | // the main instcombine process. |
| 1544 | if (Depth != 0) |
| 1545 | // TODO: Just compute the PoisonElts information recursively. |
| 1546 | return nullptr; |
| 1547 | |
| 1548 | // Conservatively assume that all elements are needed. |
| 1549 | DemandedElts = EltMask; |
| 1550 | } |
| 1551 | } |
| 1552 | |
| 1553 | Instruction *I = dyn_cast<Instruction>(Val: V); |
| 1554 | if (!I) return nullptr; // Only analyze instructions. |
| 1555 | |
| 1556 | bool MadeChange = false; |
| 1557 | auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum, |
| 1558 | APInt Demanded, APInt &Undef) { |
| 1559 | auto *II = dyn_cast<IntrinsicInst>(Val: Inst); |
| 1560 | Value *Op = II ? II->getArgOperand(i: OpNum) : Inst->getOperand(i: OpNum); |
| 1561 | if (Value *V = SimplifyDemandedVectorElts(V: Op, DemandedElts: Demanded, PoisonElts&: Undef, Depth: Depth + 1)) { |
| 1562 | replaceOperand(I&: *Inst, OpNum, V); |
| 1563 | MadeChange = true; |
| 1564 | } |
| 1565 | }; |
| 1566 | |
| 1567 | APInt PoisonElts2(VWidth, 0); |
| 1568 | APInt PoisonElts3(VWidth, 0); |
| 1569 | switch (I->getOpcode()) { |
| 1570 | default: break; |
| 1571 | |
| 1572 | case Instruction::GetElementPtr: { |
| 1573 | // The LangRef requires that struct geps have all constant indices. As |
| 1574 | // such, we can't convert any operand to partial undef. |
| 1575 | auto mayIndexStructType = [](GetElementPtrInst &GEP) { |
| 1576 | for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP); |
| 1577 | I != E; I++) |
| 1578 | if (I.isStruct()) |
| 1579 | return true; |
| 1580 | return false; |
| 1581 | }; |
| 1582 | if (mayIndexStructType(cast<GetElementPtrInst>(Val&: *I))) |
| 1583 | break; |
| 1584 | |
| 1585 | // Conservatively track the demanded elements back through any vector |
| 1586 | // operands we may have. We know there must be at least one, or we |
| 1587 | // wouldn't have a vector result to get here. Note that we intentionally |
| 1588 | // merge the undef bits here since gepping with either an poison base or |
| 1589 | // index results in poison. |
| 1590 | for (unsigned i = 0; i < I->getNumOperands(); i++) { |
| 1591 | if (i == 0 ? match(V: I->getOperand(i), P: m_Undef()) |
| 1592 | : match(V: I->getOperand(i), P: m_Poison())) { |
| 1593 | // If the entire vector is undefined, just return this info. |
| 1594 | PoisonElts = EltMask; |
| 1595 | return nullptr; |
| 1596 | } |
| 1597 | if (I->getOperand(i)->getType()->isVectorTy()) { |
| 1598 | APInt PoisonEltsOp(VWidth, 0); |
| 1599 | simplifyAndSetOp(I, i, DemandedElts, PoisonEltsOp); |
| 1600 | // gep(x, undef) is not undef, so skip considering idx ops here |
| 1601 | // Note that we could propagate poison, but we can't distinguish between |
| 1602 | // undef & poison bits ATM |
| 1603 | if (i == 0) |
| 1604 | PoisonElts |= PoisonEltsOp; |
| 1605 | } |
| 1606 | } |
| 1607 | |
| 1608 | break; |
| 1609 | } |
| 1610 | case Instruction::InsertElement: { |
| 1611 | // If this is a variable index, we don't know which element it overwrites. |
| 1612 | // demand exactly the same input as we produce. |
| 1613 | ConstantInt *Idx = dyn_cast<ConstantInt>(Val: I->getOperand(i: 2)); |
| 1614 | if (!Idx) { |
| 1615 | // Note that we can't propagate undef elt info, because we don't know |
| 1616 | // which elt is getting updated. |
| 1617 | simplifyAndSetOp(I, 0, DemandedElts, PoisonElts2); |
| 1618 | break; |
| 1619 | } |
| 1620 | |
| 1621 | // The element inserted overwrites whatever was there, so the input demanded |
| 1622 | // set is simpler than the output set. |
| 1623 | unsigned IdxNo = Idx->getZExtValue(); |
| 1624 | APInt PreInsertDemandedElts = DemandedElts; |
| 1625 | if (IdxNo < VWidth) |
| 1626 | PreInsertDemandedElts.clearBit(BitPosition: IdxNo); |
| 1627 | |
| 1628 | // If we only demand the element that is being inserted and that element |
| 1629 | // was extracted from the same index in another vector with the same type, |
| 1630 | // replace this insert with that other vector. |
| 1631 | // Note: This is attempted before the call to simplifyAndSetOp because that |
| 1632 | // may change PoisonElts to a value that does not match with Vec. |
| 1633 | Value *Vec; |
| 1634 | if (PreInsertDemandedElts == 0 && |
| 1635 | match(V: I->getOperand(i: 1), |
| 1636 | P: m_ExtractElt(Val: m_Value(V&: Vec), Idx: m_SpecificInt(V: IdxNo))) && |
| 1637 | Vec->getType() == I->getType()) { |
| 1638 | return Vec; |
| 1639 | } |
| 1640 | |
| 1641 | simplifyAndSetOp(I, 0, PreInsertDemandedElts, PoisonElts); |
| 1642 | |
| 1643 | // If this is inserting an element that isn't demanded, remove this |
| 1644 | // insertelement. |
| 1645 | if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { |
| 1646 | Worklist.push(I); |
| 1647 | return I->getOperand(i: 0); |
| 1648 | } |
| 1649 | |
| 1650 | // The inserted element is defined. |
| 1651 | PoisonElts.clearBit(BitPosition: IdxNo); |
| 1652 | break; |
| 1653 | } |
| 1654 | case Instruction::ShuffleVector: { |
| 1655 | auto *Shuffle = cast<ShuffleVectorInst>(Val: I); |
| 1656 | assert(Shuffle->getOperand(0)->getType() == |
| 1657 | Shuffle->getOperand(1)->getType() && |
| 1658 | "Expected shuffle operands to have same type" ); |
| 1659 | unsigned OpWidth = cast<FixedVectorType>(Val: Shuffle->getOperand(i_nocapture: 0)->getType()) |
| 1660 | ->getNumElements(); |
| 1661 | // Handle trivial case of a splat. Only check the first element of LHS |
| 1662 | // operand. |
| 1663 | if (all_of(Range: Shuffle->getShuffleMask(), P: equal_to(Arg: 0)) && |
| 1664 | DemandedElts.isAllOnes()) { |
| 1665 | if (!isa<PoisonValue>(Val: I->getOperand(i: 1))) { |
| 1666 | I->setOperand(i: 1, Val: PoisonValue::get(T: I->getOperand(i: 1)->getType())); |
| 1667 | MadeChange = true; |
| 1668 | } |
| 1669 | APInt LeftDemanded(OpWidth, 1); |
| 1670 | APInt LHSPoisonElts(OpWidth, 0); |
| 1671 | simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts); |
| 1672 | if (LHSPoisonElts[0]) |
| 1673 | PoisonElts = EltMask; |
| 1674 | else |
| 1675 | PoisonElts.clearAllBits(); |
| 1676 | break; |
| 1677 | } |
| 1678 | |
| 1679 | APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0); |
| 1680 | for (unsigned i = 0; i < VWidth; i++) { |
| 1681 | if (DemandedElts[i]) { |
| 1682 | unsigned MaskVal = Shuffle->getMaskValue(Elt: i); |
| 1683 | if (MaskVal != -1u) { |
| 1684 | assert(MaskVal < OpWidth * 2 && |
| 1685 | "shufflevector mask index out of range!" ); |
| 1686 | if (MaskVal < OpWidth) |
| 1687 | LeftDemanded.setBit(MaskVal); |
| 1688 | else |
| 1689 | RightDemanded.setBit(MaskVal - OpWidth); |
| 1690 | } |
| 1691 | } |
| 1692 | } |
| 1693 | |
| 1694 | APInt LHSPoisonElts(OpWidth, 0); |
| 1695 | simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts); |
| 1696 | |
| 1697 | APInt RHSPoisonElts(OpWidth, 0); |
| 1698 | simplifyAndSetOp(I, 1, RightDemanded, RHSPoisonElts); |
| 1699 | |
| 1700 | // If this shuffle does not change the vector length and the elements |
| 1701 | // demanded by this shuffle are an identity mask, then this shuffle is |
| 1702 | // unnecessary. |
| 1703 | // |
| 1704 | // We are assuming canonical form for the mask, so the source vector is |
| 1705 | // operand 0 and operand 1 is not used. |
| 1706 | // |
| 1707 | // Note that if an element is demanded and this shuffle mask is undefined |
| 1708 | // for that element, then the shuffle is not considered an identity |
| 1709 | // operation. The shuffle prevents poison from the operand vector from |
| 1710 | // leaking to the result by replacing poison with an undefined value. |
| 1711 | if (VWidth == OpWidth) { |
| 1712 | bool IsIdentityShuffle = true; |
| 1713 | for (unsigned i = 0; i < VWidth; i++) { |
| 1714 | unsigned MaskVal = Shuffle->getMaskValue(Elt: i); |
| 1715 | if (DemandedElts[i] && i != MaskVal) { |
| 1716 | IsIdentityShuffle = false; |
| 1717 | break; |
| 1718 | } |
| 1719 | } |
| 1720 | if (IsIdentityShuffle) |
| 1721 | return Shuffle->getOperand(i_nocapture: 0); |
| 1722 | } |
| 1723 | |
| 1724 | bool NewPoisonElts = false; |
| 1725 | unsigned LHSIdx = -1u, LHSValIdx = -1u; |
| 1726 | unsigned RHSIdx = -1u, RHSValIdx = -1u; |
| 1727 | bool LHSUniform = true; |
| 1728 | bool RHSUniform = true; |
| 1729 | for (unsigned i = 0; i < VWidth; i++) { |
| 1730 | unsigned MaskVal = Shuffle->getMaskValue(Elt: i); |
| 1731 | if (MaskVal == -1u) { |
| 1732 | PoisonElts.setBit(i); |
| 1733 | } else if (!DemandedElts[i]) { |
| 1734 | NewPoisonElts = true; |
| 1735 | PoisonElts.setBit(i); |
| 1736 | } else if (MaskVal < OpWidth) { |
| 1737 | if (LHSPoisonElts[MaskVal]) { |
| 1738 | NewPoisonElts = true; |
| 1739 | PoisonElts.setBit(i); |
| 1740 | } else { |
| 1741 | LHSIdx = LHSIdx == -1u ? i : OpWidth; |
| 1742 | LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth; |
| 1743 | LHSUniform = LHSUniform && (MaskVal == i); |
| 1744 | } |
| 1745 | } else { |
| 1746 | if (RHSPoisonElts[MaskVal - OpWidth]) { |
| 1747 | NewPoisonElts = true; |
| 1748 | PoisonElts.setBit(i); |
| 1749 | } else { |
| 1750 | RHSIdx = RHSIdx == -1u ? i : OpWidth; |
| 1751 | RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth; |
| 1752 | RHSUniform = RHSUniform && (MaskVal - OpWidth == i); |
| 1753 | } |
| 1754 | } |
| 1755 | } |
| 1756 | |
| 1757 | // Try to transform shuffle with constant vector and single element from |
| 1758 | // this constant vector to single insertelement instruction. |
| 1759 | // shufflevector V, C, <v1, v2, .., ci, .., vm> -> |
| 1760 | // insertelement V, C[ci], ci-n |
| 1761 | if (OpWidth == |
| 1762 | cast<FixedVectorType>(Val: Shuffle->getType())->getNumElements()) { |
| 1763 | Value *Op = nullptr; |
| 1764 | Constant *Value = nullptr; |
| 1765 | unsigned Idx = -1u; |
| 1766 | |
| 1767 | // Find constant vector with the single element in shuffle (LHS or RHS). |
| 1768 | if (LHSIdx < OpWidth && RHSUniform) { |
| 1769 | if (auto *CV = dyn_cast<ConstantVector>(Val: Shuffle->getOperand(i_nocapture: 0))) { |
| 1770 | Op = Shuffle->getOperand(i_nocapture: 1); |
| 1771 | Value = CV->getOperand(i_nocapture: LHSValIdx); |
| 1772 | Idx = LHSIdx; |
| 1773 | } |
| 1774 | } |
| 1775 | if (RHSIdx < OpWidth && LHSUniform) { |
| 1776 | if (auto *CV = dyn_cast<ConstantVector>(Val: Shuffle->getOperand(i_nocapture: 1))) { |
| 1777 | Op = Shuffle->getOperand(i_nocapture: 0); |
| 1778 | Value = CV->getOperand(i_nocapture: RHSValIdx); |
| 1779 | Idx = RHSIdx; |
| 1780 | } |
| 1781 | } |
| 1782 | // Found constant vector with single element - convert to insertelement. |
| 1783 | if (Op && Value) { |
| 1784 | Instruction *New = InsertElementInst::Create( |
| 1785 | Vec: Op, NewElt: Value, Idx: ConstantInt::get(Ty: Type::getInt64Ty(C&: I->getContext()), V: Idx), |
| 1786 | NameStr: Shuffle->getName()); |
| 1787 | InsertNewInstWith(New, Old: Shuffle->getIterator()); |
| 1788 | return New; |
| 1789 | } |
| 1790 | } |
| 1791 | if (NewPoisonElts) { |
| 1792 | // Add additional discovered undefs. |
| 1793 | SmallVector<int, 16> Elts; |
| 1794 | for (unsigned i = 0; i < VWidth; ++i) { |
| 1795 | if (PoisonElts[i]) |
| 1796 | Elts.push_back(Elt: PoisonMaskElem); |
| 1797 | else |
| 1798 | Elts.push_back(Elt: Shuffle->getMaskValue(Elt: i)); |
| 1799 | } |
| 1800 | Shuffle->setShuffleMask(Elts); |
| 1801 | MadeChange = true; |
| 1802 | } |
| 1803 | break; |
| 1804 | } |
| 1805 | case Instruction::Select: { |
| 1806 | // If this is a vector select, try to transform the select condition based |
| 1807 | // on the current demanded elements. |
| 1808 | SelectInst *Sel = cast<SelectInst>(Val: I); |
| 1809 | if (Sel->getCondition()->getType()->isVectorTy()) { |
| 1810 | // TODO: We are not doing anything with PoisonElts based on this call. |
| 1811 | // It is overwritten below based on the other select operands. If an |
| 1812 | // element of the select condition is known undef, then we are free to |
| 1813 | // choose the output value from either arm of the select. If we know that |
| 1814 | // one of those values is undef, then the output can be undef. |
| 1815 | simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); |
| 1816 | } |
| 1817 | |
| 1818 | // Next, see if we can transform the arms of the select. |
| 1819 | APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts); |
| 1820 | if (auto *CV = dyn_cast<ConstantVector>(Val: Sel->getCondition())) { |
| 1821 | for (unsigned i = 0; i < VWidth; i++) { |
| 1822 | Constant *CElt = CV->getAggregateElement(Elt: i); |
| 1823 | |
| 1824 | // isNullValue() always returns false when called on a ConstantExpr. |
| 1825 | if (CElt->isNullValue()) |
| 1826 | DemandedLHS.clearBit(BitPosition: i); |
| 1827 | else if (CElt->isOneValue()) |
| 1828 | DemandedRHS.clearBit(BitPosition: i); |
| 1829 | } |
| 1830 | } |
| 1831 | |
| 1832 | simplifyAndSetOp(I, 1, DemandedLHS, PoisonElts2); |
| 1833 | simplifyAndSetOp(I, 2, DemandedRHS, PoisonElts3); |
| 1834 | |
| 1835 | // Output elements are undefined if the element from each arm is undefined. |
| 1836 | // TODO: This can be improved. See comment in select condition handling. |
| 1837 | PoisonElts = PoisonElts2 & PoisonElts3; |
| 1838 | break; |
| 1839 | } |
| 1840 | case Instruction::BitCast: { |
| 1841 | // Vector->vector casts only. |
| 1842 | VectorType *VTy = dyn_cast<VectorType>(Val: I->getOperand(i: 0)->getType()); |
| 1843 | if (!VTy) break; |
| 1844 | unsigned InVWidth = cast<FixedVectorType>(Val: VTy)->getNumElements(); |
| 1845 | APInt InputDemandedElts(InVWidth, 0); |
| 1846 | PoisonElts2 = APInt(InVWidth, 0); |
| 1847 | unsigned Ratio; |
| 1848 | |
| 1849 | if (VWidth == InVWidth) { |
| 1850 | // If we are converting from <4 x i32> -> <4 x f32>, we demand the same |
| 1851 | // elements as are demanded of us. |
| 1852 | Ratio = 1; |
| 1853 | InputDemandedElts = DemandedElts; |
| 1854 | } else if ((VWidth % InVWidth) == 0) { |
| 1855 | // If the number of elements in the output is a multiple of the number of |
| 1856 | // elements in the input then an input element is live if any of the |
| 1857 | // corresponding output elements are live. |
| 1858 | Ratio = VWidth / InVWidth; |
| 1859 | for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) |
| 1860 | if (DemandedElts[OutIdx]) |
| 1861 | InputDemandedElts.setBit(OutIdx / Ratio); |
| 1862 | } else if ((InVWidth % VWidth) == 0) { |
| 1863 | // If the number of elements in the input is a multiple of the number of |
| 1864 | // elements in the output then an input element is live if the |
| 1865 | // corresponding output element is live. |
| 1866 | Ratio = InVWidth / VWidth; |
| 1867 | for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) |
| 1868 | if (DemandedElts[InIdx / Ratio]) |
| 1869 | InputDemandedElts.setBit(InIdx); |
| 1870 | } else { |
| 1871 | // Unsupported so far. |
| 1872 | break; |
| 1873 | } |
| 1874 | |
| 1875 | simplifyAndSetOp(I, 0, InputDemandedElts, PoisonElts2); |
| 1876 | |
| 1877 | if (VWidth == InVWidth) { |
| 1878 | PoisonElts = PoisonElts2; |
| 1879 | } else if ((VWidth % InVWidth) == 0) { |
| 1880 | // If the number of elements in the output is a multiple of the number of |
| 1881 | // elements in the input then an output element is undef if the |
| 1882 | // corresponding input element is undef. |
| 1883 | for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) |
| 1884 | if (PoisonElts2[OutIdx / Ratio]) |
| 1885 | PoisonElts.setBit(OutIdx); |
| 1886 | } else if ((InVWidth % VWidth) == 0) { |
| 1887 | // If the number of elements in the input is a multiple of the number of |
| 1888 | // elements in the output then an output element is undef if all of the |
| 1889 | // corresponding input elements are undef. |
| 1890 | for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { |
| 1891 | APInt SubUndef = PoisonElts2.lshr(shiftAmt: OutIdx * Ratio).zextOrTrunc(width: Ratio); |
| 1892 | if (SubUndef.popcount() == Ratio) |
| 1893 | PoisonElts.setBit(OutIdx); |
| 1894 | } |
| 1895 | } else { |
| 1896 | llvm_unreachable("Unimp" ); |
| 1897 | } |
| 1898 | break; |
| 1899 | } |
| 1900 | case Instruction::FPTrunc: |
| 1901 | case Instruction::FPExt: |
| 1902 | simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); |
| 1903 | break; |
| 1904 | |
| 1905 | case Instruction::Call: { |
| 1906 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I); |
| 1907 | if (!II) break; |
| 1908 | switch (II->getIntrinsicID()) { |
| 1909 | case Intrinsic::masked_gather: // fallthrough |
| 1910 | case Intrinsic::masked_load: { |
| 1911 | // Subtlety: If we load from a pointer, the pointer must be valid |
| 1912 | // regardless of whether the element is demanded. Doing otherwise risks |
| 1913 | // segfaults which didn't exist in the original program. |
| 1914 | APInt DemandedPtrs(APInt::getAllOnes(numBits: VWidth)), |
| 1915 | DemandedPassThrough(DemandedElts); |
| 1916 | if (auto *CMask = dyn_cast<Constant>(Val: II->getOperand(i_nocapture: 1))) { |
| 1917 | for (unsigned i = 0; i < VWidth; i++) { |
| 1918 | if (Constant *CElt = CMask->getAggregateElement(Elt: i)) { |
| 1919 | if (CElt->isNullValue()) |
| 1920 | DemandedPtrs.clearBit(BitPosition: i); |
| 1921 | else if (CElt->isAllOnesValue()) |
| 1922 | DemandedPassThrough.clearBit(BitPosition: i); |
| 1923 | } |
| 1924 | } |
| 1925 | } |
| 1926 | |
| 1927 | if (II->getIntrinsicID() == Intrinsic::masked_gather) |
| 1928 | simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2); |
| 1929 | simplifyAndSetOp(II, 2, DemandedPassThrough, PoisonElts3); |
| 1930 | |
| 1931 | // Output elements are undefined if the element from both sources are. |
| 1932 | // TODO: can strengthen via mask as well. |
| 1933 | PoisonElts = PoisonElts2 & PoisonElts3; |
| 1934 | break; |
| 1935 | } |
| 1936 | default: { |
| 1937 | // Handle target specific intrinsics |
| 1938 | std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic( |
| 1939 | II&: *II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3, |
| 1940 | SimplifyAndSetOp: simplifyAndSetOp); |
| 1941 | if (V) |
| 1942 | return *V; |
| 1943 | break; |
| 1944 | } |
| 1945 | } // switch on IntrinsicID |
| 1946 | break; |
| 1947 | } // case Call |
| 1948 | } // switch on Opcode |
| 1949 | |
| 1950 | // TODO: We bail completely on integer div/rem and shifts because they have |
| 1951 | // UB/poison potential, but that should be refined. |
| 1952 | BinaryOperator *BO; |
| 1953 | if (match(V: I, P: m_BinOp(I&: BO)) && !BO->isIntDivRem() && !BO->isShift()) { |
| 1954 | Value *X = BO->getOperand(i_nocapture: 0); |
| 1955 | Value *Y = BO->getOperand(i_nocapture: 1); |
| 1956 | |
| 1957 | // Look for an equivalent binop except that one operand has been shuffled. |
| 1958 | // If the demand for this binop only includes elements that are the same as |
| 1959 | // the other binop, then we may be able to replace this binop with a use of |
| 1960 | // the earlier one. |
| 1961 | // |
| 1962 | // Example: |
| 1963 | // %other_bo = bo (shuf X, {0}), Y |
| 1964 | // %this_extracted_bo = extelt (bo X, Y), 0 |
| 1965 | // --> |
| 1966 | // %other_bo = bo (shuf X, {0}), Y |
| 1967 | // %this_extracted_bo = extelt %other_bo, 0 |
| 1968 | // |
| 1969 | // TODO: Handle demand of an arbitrary single element or more than one |
| 1970 | // element instead of just element 0. |
| 1971 | // TODO: Unlike general demanded elements transforms, this should be safe |
| 1972 | // for any (div/rem/shift) opcode too. |
| 1973 | if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() && |
| 1974 | BO->hasOneUse() ) { |
| 1975 | |
| 1976 | auto findShufBO = [&](bool MatchShufAsOp0) -> User * { |
| 1977 | // Try to use shuffle-of-operand in place of an operand: |
| 1978 | // bo X, Y --> bo (shuf X), Y |
| 1979 | // bo X, Y --> bo X, (shuf Y) |
| 1980 | |
| 1981 | Value *OtherOp = MatchShufAsOp0 ? Y : X; |
| 1982 | if (!OtherOp->hasUseList()) |
| 1983 | return nullptr; |
| 1984 | |
| 1985 | BinaryOperator::BinaryOps Opcode = BO->getOpcode(); |
| 1986 | Value *ShufOp = MatchShufAsOp0 ? X : Y; |
| 1987 | |
| 1988 | for (User *U : OtherOp->users()) { |
| 1989 | ArrayRef<int> Mask; |
| 1990 | auto Shuf = m_Shuffle(v1: m_Specific(V: ShufOp), v2: m_Value(), mask: m_Mask(Mask)); |
| 1991 | if (BO->isCommutative() |
| 1992 | ? match(V: U, P: m_c_BinOp(Opcode, L: Shuf, R: m_Specific(V: OtherOp))) |
| 1993 | : MatchShufAsOp0 |
| 1994 | ? match(V: U, P: m_BinOp(Opcode, L: Shuf, R: m_Specific(V: OtherOp))) |
| 1995 | : match(V: U, P: m_BinOp(Opcode, L: m_Specific(V: OtherOp), R: Shuf))) |
| 1996 | if (match(Mask, P: m_ZeroMask()) && Mask[0] != PoisonMaskElem) |
| 1997 | if (DT.dominates(Def: U, User: I)) |
| 1998 | return U; |
| 1999 | } |
| 2000 | return nullptr; |
| 2001 | }; |
| 2002 | |
| 2003 | if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ true)) |
| 2004 | return ShufBO; |
| 2005 | if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ false)) |
| 2006 | return ShufBO; |
| 2007 | } |
| 2008 | |
| 2009 | simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); |
| 2010 | simplifyAndSetOp(I, 1, DemandedElts, PoisonElts2); |
| 2011 | |
| 2012 | // Output elements are undefined if both are undefined. Consider things |
| 2013 | // like undef & 0. The result is known zero, not undef. |
| 2014 | PoisonElts &= PoisonElts2; |
| 2015 | } |
| 2016 | |
| 2017 | // If we've proven all of the lanes poison, return a poison value. |
| 2018 | // TODO: Intersect w/demanded lanes |
| 2019 | if (PoisonElts.isAllOnes()) |
| 2020 | return PoisonValue::get(T: I->getType()); |
| 2021 | |
| 2022 | return MadeChange ? I : nullptr; |
| 2023 | } |
| 2024 | |
| 2025 | /// For floating-point classes that resolve to a single bit pattern, return that |
| 2026 | /// value. |
| 2027 | static Constant *getFPClassConstant(Type *Ty, FPClassTest Mask, |
| 2028 | bool IsCanonicalizing = false) { |
| 2029 | if (Mask == fcNone) |
| 2030 | return PoisonValue::get(T: Ty); |
| 2031 | |
| 2032 | if (Mask == fcPosZero) |
| 2033 | return Constant::getNullValue(Ty); |
| 2034 | |
| 2035 | // TODO: Support aggregate types that are allowed by FPMathOperator. |
| 2036 | if (Ty->isAggregateType()) |
| 2037 | return nullptr; |
| 2038 | |
| 2039 | // Turn any possible snans into quiet if we can. |
| 2040 | if (Mask == fcNan && IsCanonicalizing) |
| 2041 | return ConstantFP::getQNaN(Ty); |
| 2042 | |
| 2043 | switch (Mask) { |
| 2044 | case fcNegZero: |
| 2045 | return ConstantFP::getZero(Ty, Negative: true); |
| 2046 | case fcPosInf: |
| 2047 | return ConstantFP::getInfinity(Ty); |
| 2048 | case fcNegInf: |
| 2049 | return ConstantFP::getInfinity(Ty, Negative: true); |
| 2050 | case fcQNan: |
| 2051 | // Payload bits cannot be dropped for pure signbit operations. |
| 2052 | return IsCanonicalizing ? ConstantFP::getQNaN(Ty) : nullptr; |
| 2053 | default: |
| 2054 | return nullptr; |
| 2055 | } |
| 2056 | } |
| 2057 | |
| 2058 | /// Perform multiple-use aware simplfications for fabs(\p Src). Returns a |
| 2059 | /// replacement value if it's simplified, otherwise nullptr. Updates \p Known |
| 2060 | /// with the known fpclass if not simplified. |
| 2061 | static Value *simplifyDemandedFPClassFabs(KnownFPClass &Known, Value *Src, |
| 2062 | FPClassTest DemandedMask, |
| 2063 | KnownFPClass KnownSrc, bool NSZ) { |
| 2064 | if ((DemandedMask & fcNan) == fcNone) |
| 2065 | KnownSrc.knownNot(RuleOut: fcNan); |
| 2066 | if ((DemandedMask & fcInf) == fcNone) |
| 2067 | KnownSrc.knownNot(RuleOut: fcInf); |
| 2068 | |
| 2069 | if (KnownSrc.SignBit == false || |
| 2070 | ((DemandedMask & fcNan) == fcNone && KnownSrc.isKnownNever(Mask: fcNegative))) |
| 2071 | return Src; |
| 2072 | |
| 2073 | // If the only sign bit difference is due to -0, ignore it with nsz |
| 2074 | if (NSZ && |
| 2075 | KnownSrc.isKnownNever(Mask: KnownFPClass::OrderedLessThanZeroMask | fcNan)) |
| 2076 | return Src; |
| 2077 | |
| 2078 | Known = KnownFPClass::fabs(Src: KnownSrc); |
| 2079 | Known.knownNot(RuleOut: ~DemandedMask); |
| 2080 | return nullptr; |
| 2081 | } |
| 2082 | |
| 2083 | /// Try to set an inferred no-nans or no-infs in \p FMF. \p ValidResults is a |
| 2084 | /// mask of known valid results for the operator (already computed from the |
| 2085 | /// result, and the known operand inputs in \p Known) |
| 2086 | static FastMathFlags inferFastMathValueFlags(FastMathFlags FMF, |
| 2087 | FPClassTest ValidResults, |
| 2088 | ArrayRef<KnownFPClass> Known) { |
| 2089 | if (!FMF.noNaNs() && (ValidResults & fcNan) == fcNone) { |
| 2090 | if (all_of(Range&: Known, P: [](const KnownFPClass KnownSrc) { |
| 2091 | return KnownSrc.isKnownNeverNaN(); |
| 2092 | })) |
| 2093 | FMF.setNoNaNs(); |
| 2094 | } |
| 2095 | |
| 2096 | if (!FMF.noInfs() && (ValidResults & fcInf) == fcNone) { |
| 2097 | if (all_of(Range&: Known, P: [](const KnownFPClass KnownSrc) { |
| 2098 | return KnownSrc.isKnownNeverInfinity(); |
| 2099 | })) |
| 2100 | FMF.setNoInfs(); |
| 2101 | } |
| 2102 | |
| 2103 | return FMF; |
| 2104 | } |
| 2105 | |
| 2106 | static FPClassTest adjustDemandedMaskFromFlags(FPClassTest DemandedMask, |
| 2107 | FastMathFlags FMF) { |
| 2108 | if (FMF.noNaNs()) |
| 2109 | DemandedMask &= ~fcNan; |
| 2110 | |
| 2111 | if (FMF.noInfs()) |
| 2112 | DemandedMask &= ~fcInf; |
| 2113 | return DemandedMask; |
| 2114 | } |
| 2115 | |
| 2116 | /// Apply epilog fixups to a floating-point intrinsic. See if the result can |
| 2117 | /// fold to a constant, or apply fast math flags. |
| 2118 | static Value *simplifyDemandedFPClassResult(Instruction *FPOp, |
| 2119 | FastMathFlags FMF, |
| 2120 | FPClassTest DemandedMask, |
| 2121 | KnownFPClass &Known, |
| 2122 | ArrayRef<KnownFPClass> KnownSrcs) { |
| 2123 | FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses; |
| 2124 | Constant *SingleVal = getFPClassConstant(Ty: FPOp->getType(), Mask: ValidResults, |
| 2125 | /*IsCanonicalizing=*/true); |
| 2126 | if (SingleVal) |
| 2127 | return SingleVal; |
| 2128 | |
| 2129 | FastMathFlags InferredFMF = |
| 2130 | inferFastMathValueFlags(FMF, ValidResults, Known: KnownSrcs); |
| 2131 | if (InferredFMF != FMF) { |
| 2132 | FPOp->dropUBImplyingAttrsAndMetadata(); |
| 2133 | FPOp->setFastMathFlags(InferredFMF); |
| 2134 | return FPOp; |
| 2135 | } |
| 2136 | |
| 2137 | return nullptr; |
| 2138 | } |
| 2139 | |
| 2140 | /// Perform multiple-use aware simplfications for fneg(fabs(\p Src)). Returns a |
| 2141 | /// replacement value if it's simplified, otherwise nullptr. Updates \p Known |
| 2142 | /// with the known fpclass if not simplified. |
| 2143 | static Value *simplifyDemandedFPClassFnegFabs(KnownFPClass &Known, Value *Src, |
| 2144 | FPClassTest DemandedMask, |
| 2145 | KnownFPClass KnownSrc, bool NSZ) { |
| 2146 | if ((DemandedMask & fcNan) == fcNone) |
| 2147 | KnownSrc.knownNot(RuleOut: fcNan); |
| 2148 | if ((DemandedMask & fcInf) == fcNone) |
| 2149 | KnownSrc.knownNot(RuleOut: fcInf); |
| 2150 | |
| 2151 | // If the source value is known negative, we can directly fold to it. |
| 2152 | if (KnownSrc.SignBit == true) |
| 2153 | return Src; |
| 2154 | |
| 2155 | // If the only sign bit difference is for 0, ignore it with nsz. |
| 2156 | if (NSZ && |
| 2157 | KnownSrc.isKnownNever(Mask: KnownFPClass::OrderedGreaterThanZeroMask | fcNan)) |
| 2158 | return Src; |
| 2159 | |
| 2160 | Known = KnownFPClass::fneg(Src: KnownFPClass::fabs(Src: KnownSrc)); |
| 2161 | Known.knownNot(RuleOut: ~DemandedMask); |
| 2162 | return nullptr; |
| 2163 | } |
| 2164 | |
| 2165 | static Value *simplifyDemandedFPClassCopysignMag(Value *MagSrc, |
| 2166 | FPClassTest DemandedMask, |
| 2167 | KnownFPClass KnownSrc, |
| 2168 | bool NSZ) { |
| 2169 | if (NSZ) { |
| 2170 | constexpr FPClassTest NegOrZero = fcNegative | fcPosZero; |
| 2171 | constexpr FPClassTest PosOrZero = fcPositive | fcNegZero; |
| 2172 | |
| 2173 | if ((DemandedMask & ~NegOrZero) == fcNone && |
| 2174 | KnownSrc.isKnownAlways(Mask: NegOrZero)) |
| 2175 | return MagSrc; |
| 2176 | |
| 2177 | if ((DemandedMask & ~PosOrZero) == fcNone && |
| 2178 | KnownSrc.isKnownAlways(Mask: PosOrZero)) |
| 2179 | return MagSrc; |
| 2180 | } else { |
| 2181 | if ((DemandedMask & ~fcNegative) == fcNone && KnownSrc.SignBit == true) |
| 2182 | return MagSrc; |
| 2183 | |
| 2184 | if ((DemandedMask & ~fcPositive) == fcNone && KnownSrc.SignBit == false) |
| 2185 | return MagSrc; |
| 2186 | } |
| 2187 | |
| 2188 | return nullptr; |
| 2189 | } |
| 2190 | |
| 2191 | static Value * |
| 2192 | simplifyDemandedFPClassMinMax(KnownFPClass &Known, Intrinsic::ID IID, |
| 2193 | const CallInst *CI, FPClassTest DemandedMask, |
| 2194 | KnownFPClass KnownLHS, KnownFPClass KnownRHS, |
| 2195 | const Function &F, bool NSZ) { |
| 2196 | bool OrderedZeroSign = !NSZ; |
| 2197 | |
| 2198 | KnownFPClass::MinMaxKind OpKind; |
| 2199 | switch (IID) { |
| 2200 | case Intrinsic::maximum: { |
| 2201 | OpKind = KnownFPClass::MinMaxKind::maximum; |
| 2202 | |
| 2203 | // If one operand is known greater than the other, it must be that |
| 2204 | // operand unless the other is a nan. |
| 2205 | if (cannotOrderStrictlyLess(LHS: KnownLHS.KnownFPClasses, |
| 2206 | RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) && |
| 2207 | KnownRHS.isKnownNever(Mask: fcNan)) |
| 2208 | return CI->getArgOperand(i: 0); |
| 2209 | |
| 2210 | if (cannotOrderStrictlyGreater(LHS: KnownLHS.KnownFPClasses, |
| 2211 | RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) && |
| 2212 | KnownLHS.isKnownNever(Mask: fcNan)) |
| 2213 | return CI->getArgOperand(i: 1); |
| 2214 | |
| 2215 | break; |
| 2216 | } |
| 2217 | case Intrinsic::minimum: { |
| 2218 | OpKind = KnownFPClass::MinMaxKind::minimum; |
| 2219 | |
| 2220 | // If one operand is known less than the other, it must be that operand |
| 2221 | // unless the other is a nan. |
| 2222 | if (cannotOrderStrictlyGreater(LHS: KnownLHS.KnownFPClasses, |
| 2223 | RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) && |
| 2224 | KnownRHS.isKnownNever(Mask: fcNan)) |
| 2225 | return CI->getArgOperand(i: 0); |
| 2226 | |
| 2227 | if (cannotOrderStrictlyLess(LHS: KnownLHS.KnownFPClasses, |
| 2228 | RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) && |
| 2229 | KnownLHS.isKnownNever(Mask: fcNan)) |
| 2230 | return CI->getArgOperand(i: 1); |
| 2231 | |
| 2232 | break; |
| 2233 | } |
| 2234 | case Intrinsic::maxnum: |
| 2235 | case Intrinsic::maximumnum: { |
| 2236 | OpKind = IID == Intrinsic::maxnum ? KnownFPClass::MinMaxKind::maxnum |
| 2237 | : KnownFPClass::MinMaxKind::maximumnum; |
| 2238 | |
| 2239 | if (cannotOrderStrictlyLess(LHS: KnownLHS.KnownFPClasses, |
| 2240 | RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) && |
| 2241 | KnownLHS.isKnownNever(Mask: fcNan)) |
| 2242 | return CI->getArgOperand(i: 0); |
| 2243 | |
| 2244 | if (cannotOrderStrictlyGreater(LHS: KnownLHS.KnownFPClasses, |
| 2245 | RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) && |
| 2246 | KnownRHS.isKnownNever(Mask: fcNan)) |
| 2247 | return CI->getArgOperand(i: 1); |
| 2248 | |
| 2249 | break; |
| 2250 | } |
| 2251 | case Intrinsic::minnum: |
| 2252 | case Intrinsic::minimumnum: { |
| 2253 | OpKind = IID == Intrinsic::minnum ? KnownFPClass::MinMaxKind::minnum |
| 2254 | : KnownFPClass::MinMaxKind::minimumnum; |
| 2255 | |
| 2256 | if (cannotOrderStrictlyGreater(LHS: KnownLHS.KnownFPClasses, |
| 2257 | RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) && |
| 2258 | KnownLHS.isKnownNever(Mask: fcNan)) |
| 2259 | return CI->getArgOperand(i: 0); |
| 2260 | |
| 2261 | if (cannotOrderStrictlyLess(LHS: KnownLHS.KnownFPClasses, |
| 2262 | RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) && |
| 2263 | KnownRHS.isKnownNever(Mask: fcNan)) |
| 2264 | return CI->getArgOperand(i: 1); |
| 2265 | |
| 2266 | break; |
| 2267 | } |
| 2268 | default: |
| 2269 | llvm_unreachable("not a min/max intrinsic" ); |
| 2270 | } |
| 2271 | |
| 2272 | Type *EltTy = CI->getType()->getScalarType(); |
| 2273 | DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics()); |
| 2274 | Known = KnownFPClass::minMaxLike(LHS: KnownLHS, RHS: KnownRHS, Kind: OpKind, DenormMode: Mode); |
| 2275 | Known.knownNot(RuleOut: ~DemandedMask); |
| 2276 | |
| 2277 | return getFPClassConstant(Ty: CI->getType(), Mask: Known.KnownFPClasses, |
| 2278 | /*IsCanonicalizing=*/true); |
| 2279 | } |
| 2280 | |
| 2281 | static Value * |
| 2282 | simplifyDemandedUseFPClassFPTrunc(InstCombinerImpl &IC, Instruction &I, |
| 2283 | FastMathFlags FMF, FPClassTest DemandedMask, |
| 2284 | KnownFPClass &Known, unsigned Depth) { |
| 2285 | |
| 2286 | FPClassTest SrcDemandedMask = DemandedMask; |
| 2287 | if (DemandedMask & fcNan) |
| 2288 | SrcDemandedMask |= fcNan; |
| 2289 | |
| 2290 | // Zero results may have been rounded from subnormal or normal sources. |
| 2291 | if (DemandedMask & fcNegZero) |
| 2292 | SrcDemandedMask |= fcNegSubnormal | fcNegNormal; |
| 2293 | if (DemandedMask & fcPosZero) |
| 2294 | SrcDemandedMask |= fcPosSubnormal | fcPosNormal; |
| 2295 | |
| 2296 | // Subnormal results may have been normal in the source type |
| 2297 | if (DemandedMask & fcNegSubnormal) |
| 2298 | SrcDemandedMask |= fcNegNormal; |
| 2299 | if (DemandedMask & fcPosSubnormal) |
| 2300 | SrcDemandedMask |= fcPosNormal; |
| 2301 | |
| 2302 | if (DemandedMask & fcPosInf) |
| 2303 | SrcDemandedMask |= fcPosNormal; |
| 2304 | if (DemandedMask & fcNegInf) |
| 2305 | SrcDemandedMask |= fcNegNormal; |
| 2306 | |
| 2307 | KnownFPClass KnownSrc; |
| 2308 | if (IC.SimplifyDemandedFPClass(I: &I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc, Depth: Depth + 1)) |
| 2309 | return &I; |
| 2310 | |
| 2311 | Known = KnownFPClass::fptrunc(KnownSrc); |
| 2312 | Known.knownNot(RuleOut: ~DemandedMask); |
| 2313 | |
| 2314 | return simplifyDemandedFPClassResult(FPOp: &I, FMF, DemandedMask, Known, |
| 2315 | KnownSrcs: {KnownSrc}); |
| 2316 | } |
| 2317 | |
| 2318 | Value *InstCombinerImpl::SimplifyDemandedUseFPClass(Instruction *I, |
| 2319 | FPClassTest DemandedMask, |
| 2320 | KnownFPClass &Known, |
| 2321 | Instruction *CxtI, |
| 2322 | unsigned Depth) { |
| 2323 | assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth" ); |
| 2324 | assert(Known == KnownFPClass() && "expected uninitialized state" ); |
| 2325 | |
| 2326 | Type *VTy = I->getType(); |
| 2327 | |
| 2328 | FastMathFlags FMF; |
| 2329 | if (auto *FPOp = dyn_cast<FPMathOperator>(Val: I)) { |
| 2330 | FMF = FPOp->getFastMathFlags(); |
| 2331 | DemandedMask = adjustDemandedMaskFromFlags(DemandedMask, FMF); |
| 2332 | } |
| 2333 | |
| 2334 | switch (I->getOpcode()) { |
| 2335 | case Instruction::FNeg: { |
| 2336 | // Special case fneg(fabs(x)) |
| 2337 | |
| 2338 | Value *FNegSrc = I->getOperand(i: 0); |
| 2339 | Value *FNegFAbsSrc; |
| 2340 | if (match(V: FNegSrc, P: m_OneUse(SubPattern: m_FAbs(Op0: m_Value(V&: FNegFAbsSrc))))) { |
| 2341 | KnownFPClass KnownSrc; |
| 2342 | if (SimplifyDemandedFPClass(I: cast<Instruction>(Val: FNegSrc), Op: 0, |
| 2343 | DemandedMask: llvm::unknown_sign(Mask: DemandedMask), Known&: KnownSrc, |
| 2344 | Depth: Depth + 1)) |
| 2345 | return I; |
| 2346 | |
| 2347 | FastMathFlags FabsFMF = cast<FPMathOperator>(Val: FNegSrc)->getFastMathFlags(); |
| 2348 | FPClassTest ThisDemandedMask = |
| 2349 | adjustDemandedMaskFromFlags(DemandedMask, FMF: FabsFMF); |
| 2350 | |
| 2351 | bool IsNSZ = FMF.noSignedZeros() || FabsFMF.noSignedZeros(); |
| 2352 | if (Value *Simplified = simplifyDemandedFPClassFnegFabs( |
| 2353 | Known, Src: FNegFAbsSrc, DemandedMask: ThisDemandedMask, KnownSrc, NSZ: IsNSZ)) |
| 2354 | return Simplified; |
| 2355 | |
| 2356 | if ((ThisDemandedMask & fcNan) == fcNone) |
| 2357 | KnownSrc.knownNot(RuleOut: fcNan); |
| 2358 | if ((ThisDemandedMask & fcInf) == fcNone) |
| 2359 | KnownSrc.knownNot(RuleOut: fcInf); |
| 2360 | |
| 2361 | // fneg(fabs(x)) => fneg(x) |
| 2362 | if (KnownSrc.SignBit == false) |
| 2363 | return replaceOperand(I&: *I, OpNum: 0, V: FNegFAbsSrc); |
| 2364 | |
| 2365 | // fneg(fabs(x)) => fneg(x), ignoring -0 if nsz. |
| 2366 | if (IsNSZ && |
| 2367 | KnownSrc.isKnownNever(Mask: KnownFPClass::OrderedLessThanZeroMask | fcNan)) |
| 2368 | return replaceOperand(I&: *I, OpNum: 0, V: FNegFAbsSrc); |
| 2369 | |
| 2370 | break; |
| 2371 | } |
| 2372 | |
| 2373 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: llvm::fneg(Mask: DemandedMask), Known, |
| 2374 | Depth: Depth + 1)) |
| 2375 | return I; |
| 2376 | Known.fneg(); |
| 2377 | Known.knownNot(RuleOut: ~DemandedMask); |
| 2378 | break; |
| 2379 | } |
| 2380 | case Instruction::FAdd: |
| 2381 | case Instruction::FSub: { |
| 2382 | KnownFPClass KnownLHS, KnownRHS; |
| 2383 | |
| 2384 | // fadd x, x can be handled more aggressively. |
| 2385 | if (I->getOperand(i: 0) == I->getOperand(i: 1) && |
| 2386 | I->getOpcode() == Instruction::FAdd && |
| 2387 | isGuaranteedNotToBeUndef(V: I->getOperand(i: 0), AC: SQ.AC, CtxI: CxtI, DT: SQ.DT, |
| 2388 | Depth: Depth + 1)) { |
| 2389 | Type *EltTy = VTy->getScalarType(); |
| 2390 | DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics()); |
| 2391 | |
| 2392 | FPClassTest SrcDemandedMask = DemandedMask; |
| 2393 | if (DemandedMask & fcNan) |
| 2394 | SrcDemandedMask |= fcNan; |
| 2395 | |
| 2396 | // Doubling a subnormal could have resulted in a normal value. |
| 2397 | if (DemandedMask & fcPosNormal) |
| 2398 | SrcDemandedMask |= fcPosSubnormal; |
| 2399 | if (DemandedMask & fcNegNormal) |
| 2400 | SrcDemandedMask |= fcNegSubnormal; |
| 2401 | |
| 2402 | // Doubling a subnormal may produce 0 if FTZ/DAZ. |
| 2403 | if (Mode != DenormalMode::getIEEE()) { |
| 2404 | if (DemandedMask & fcPosZero) { |
| 2405 | SrcDemandedMask |= fcPosSubnormal; |
| 2406 | |
| 2407 | if (Mode.inputsMayBePositiveZero() || Mode.outputsMayBePositiveZero()) |
| 2408 | SrcDemandedMask |= fcNegSubnormal; |
| 2409 | } |
| 2410 | |
| 2411 | if (DemandedMask & fcNegZero) |
| 2412 | SrcDemandedMask |= fcNegSubnormal; |
| 2413 | } |
| 2414 | |
| 2415 | // Doubling a normal could have resulted in an infinity. |
| 2416 | if (DemandedMask & fcPosInf) |
| 2417 | SrcDemandedMask |= fcPosNormal; |
| 2418 | if (DemandedMask & fcNegInf) |
| 2419 | SrcDemandedMask |= fcNegNormal; |
| 2420 | |
| 2421 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownLHS, Depth: Depth + 1)) |
| 2422 | return I; |
| 2423 | |
| 2424 | Known = KnownFPClass::fadd_self(Src: KnownLHS, Mode); |
| 2425 | KnownRHS = KnownLHS; |
| 2426 | } else { |
| 2427 | FPClassTest SrcDemandedMask = fcFinite; |
| 2428 | |
| 2429 | // inf + (-inf) = nan |
| 2430 | if (DemandedMask & fcNan) |
| 2431 | SrcDemandedMask |= fcNan | fcInf; |
| 2432 | |
| 2433 | if (DemandedMask & fcInf) |
| 2434 | SrcDemandedMask |= fcInf; |
| 2435 | |
| 2436 | if (SimplifyDemandedFPClass(I, Op: 1, DemandedMask: SrcDemandedMask, Known&: KnownRHS, Depth: Depth + 1) || |
| 2437 | SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownLHS, Depth: Depth + 1)) |
| 2438 | return I; |
| 2439 | |
| 2440 | Type *EltTy = VTy->getScalarType(); |
| 2441 | DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics()); |
| 2442 | |
| 2443 | Known = I->getOpcode() == Instruction::FAdd |
| 2444 | ? KnownFPClass::fadd(LHS: KnownLHS, RHS: KnownRHS, Mode) |
| 2445 | : KnownFPClass::fsub(LHS: KnownLHS, RHS: KnownRHS, Mode); |
| 2446 | } |
| 2447 | |
| 2448 | Known.knownNot(RuleOut: ~DemandedMask); |
| 2449 | |
| 2450 | if (Constant *SingleVal = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses, |
| 2451 | /*IsCanonicalizing=*/true)) |
| 2452 | return SingleVal; |
| 2453 | |
| 2454 | // Propagate known result to simplify edge case checks. |
| 2455 | bool ResultNotNan = (DemandedMask & fcNan) == fcNone; |
| 2456 | |
| 2457 | // With nnan: X + {+/-}Inf --> {+/-}Inf |
| 2458 | if (ResultNotNan && I->getOpcode() == Instruction::FAdd && |
| 2459 | KnownRHS.isKnownAlways(Mask: fcInf | fcNan) && KnownLHS.isKnownNever(Mask: fcNan)) |
| 2460 | return I->getOperand(i: 1); |
| 2461 | |
| 2462 | // With nnan: {+/-}Inf + X --> {+/-}Inf |
| 2463 | // With nnan: {+/-}Inf - X --> {+/-}Inf |
| 2464 | if (ResultNotNan && KnownLHS.isKnownAlways(Mask: fcInf | fcNan) && |
| 2465 | KnownRHS.isKnownNever(Mask: fcNan)) |
| 2466 | return I->getOperand(i: 0); |
| 2467 | |
| 2468 | FastMathFlags InferredFMF = inferFastMathValueFlags( |
| 2469 | FMF, ValidResults: Known.KnownFPClasses, Known: {KnownLHS, KnownRHS}); |
| 2470 | if (InferredFMF != FMF) { |
| 2471 | I->setFastMathFlags(InferredFMF); |
| 2472 | return I; |
| 2473 | } |
| 2474 | |
| 2475 | return nullptr; |
| 2476 | } |
| 2477 | case Instruction::FMul: { |
| 2478 | KnownFPClass KnownLHS, KnownRHS; |
| 2479 | |
| 2480 | Value *X = I->getOperand(i: 0); |
| 2481 | Value *Y = I->getOperand(i: 1); |
| 2482 | |
| 2483 | FPClassTest SrcDemandedMask = |
| 2484 | DemandedMask & (fcNan | fcZero | fcSubnormal | fcNormal); |
| 2485 | |
| 2486 | if (DemandedMask & fcInf) { |
| 2487 | // mul x, inf = inf |
| 2488 | // mul large_x, large_y = inf |
| 2489 | SrcDemandedMask |= fcSubnormal | fcNormal | fcInf; |
| 2490 | } |
| 2491 | |
| 2492 | if (DemandedMask & fcNan) { |
| 2493 | // mul +/-inf, 0 => nan |
| 2494 | SrcDemandedMask |= fcZero | fcInf | fcNan; |
| 2495 | |
| 2496 | // TODO: Mode check |
| 2497 | // mul +/-inf, sub => nan if daz |
| 2498 | SrcDemandedMask |= fcSubnormal; |
| 2499 | } |
| 2500 | |
| 2501 | // mul normal, subnormal = normal |
| 2502 | // Normal inputs may result in underflow. |
| 2503 | if (DemandedMask & (fcNormal | fcSubnormal)) |
| 2504 | SrcDemandedMask |= fcNormal | fcSubnormal; |
| 2505 | |
| 2506 | if (DemandedMask & fcZero) |
| 2507 | SrcDemandedMask |= fcNormal | fcSubnormal; |
| 2508 | |
| 2509 | if (X == Y && isGuaranteedNotToBeUndef(V: X, AC: SQ.AC, CtxI: CxtI, DT: SQ.DT, Depth: Depth + 1)) { |
| 2510 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownLHS, Depth: Depth + 1)) |
| 2511 | return I; |
| 2512 | Type *EltTy = VTy->getScalarType(); |
| 2513 | |
| 2514 | DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics()); |
| 2515 | Known = KnownFPClass::square(Src: KnownLHS, Mode); |
| 2516 | Known.knownNot(RuleOut: ~DemandedMask); |
| 2517 | |
| 2518 | if (Constant *Folded = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses, |
| 2519 | /*IsCanonicalizing=*/true)) |
| 2520 | return Folded; |
| 2521 | |
| 2522 | if (Known.isKnownAlways(Mask: fcPosZero | fcPosInf | fcNan) && |
| 2523 | KnownLHS.isKnownNever(Mask: fcSubnormal | fcNormal)) { |
| 2524 | // We can skip the fabs if the source was already known positive. |
| 2525 | if (KnownLHS.isKnownAlways(Mask: fcPositive)) |
| 2526 | return X; |
| 2527 | |
| 2528 | // => fabs(x), in case this was a -inf or -0. |
| 2529 | // Note: Dropping canonicalize. |
| 2530 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 2531 | Builder.SetInsertPoint(I); |
| 2532 | Value *Fabs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: FMF); |
| 2533 | Fabs->takeName(V: I); |
| 2534 | return Fabs; |
| 2535 | } |
| 2536 | |
| 2537 | return nullptr; |
| 2538 | } |
| 2539 | |
| 2540 | if (SimplifyDemandedFPClass(I, Op: 1, DemandedMask: SrcDemandedMask, Known&: KnownRHS, Depth: Depth + 1) || |
| 2541 | SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownLHS, Depth: Depth + 1)) |
| 2542 | return I; |
| 2543 | |
| 2544 | if (FMF.noInfs()) { |
| 2545 | // Flag implies inputs cannot be infinity. |
| 2546 | KnownLHS.knownNot(RuleOut: fcInf); |
| 2547 | KnownRHS.knownNot(RuleOut: fcInf); |
| 2548 | } |
| 2549 | |
| 2550 | bool NonNanResult = (DemandedMask & fcNan) == fcNone; |
| 2551 | |
| 2552 | // With no-nans/no-infs: |
| 2553 | // X * 0.0 --> copysign(0.0, X) |
| 2554 | // X * -0.0 --> copysign(0.0, -X) |
| 2555 | if ((NonNanResult || KnownLHS.isKnownNeverInfOrNaN()) && |
| 2556 | KnownRHS.isKnownAlways(Mask: fcPosZero | fcNan)) { |
| 2557 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 2558 | Builder.SetInsertPoint(I); |
| 2559 | |
| 2560 | // => copysign(+0, lhs) |
| 2561 | // Note: Dropping canonicalize |
| 2562 | Value *Copysign = Builder.CreateCopySign(LHS: Y, RHS: X, FMFSource: FMF); |
| 2563 | Copysign->takeName(V: I); |
| 2564 | return Copysign; |
| 2565 | } |
| 2566 | |
| 2567 | if (KnownLHS.isKnownAlways(Mask: fcPosZero | fcNan) && |
| 2568 | (NonNanResult || KnownRHS.isKnownNeverInfOrNaN())) { |
| 2569 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 2570 | Builder.SetInsertPoint(I); |
| 2571 | |
| 2572 | // => copysign(+0, rhs) |
| 2573 | // Note: Dropping canonicalize |
| 2574 | Value *Copysign = Builder.CreateCopySign(LHS: X, RHS: Y, FMFSource: FMF); |
| 2575 | Copysign->takeName(V: I); |
| 2576 | return Copysign; |
| 2577 | } |
| 2578 | |
| 2579 | if ((NonNanResult || KnownLHS.isKnownNeverInfOrNaN()) && |
| 2580 | KnownRHS.isKnownAlways(Mask: fcNegZero | fcNan)) { |
| 2581 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 2582 | Builder.SetInsertPoint(I); |
| 2583 | |
| 2584 | // => copysign(0, fneg(lhs)) |
| 2585 | // Note: Dropping canonicalize |
| 2586 | Value *Copysign = |
| 2587 | Builder.CreateCopySign(LHS: Y, RHS: Builder.CreateFNegFMF(V: X, FMFSource: FMF), FMFSource: FMF); |
| 2588 | Copysign->takeName(V: I); |
| 2589 | return Copysign; |
| 2590 | } |
| 2591 | |
| 2592 | if (KnownLHS.isKnownAlways(Mask: fcNegZero | fcNan) && |
| 2593 | (NonNanResult || KnownRHS.isKnownNeverInfOrNaN())) { |
| 2594 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 2595 | Builder.SetInsertPoint(I); |
| 2596 | |
| 2597 | // => copysign(+0, fneg(rhs)) |
| 2598 | // Note: Dropping canonicalize |
| 2599 | Value *Copysign = |
| 2600 | Builder.CreateCopySign(LHS: X, RHS: Builder.CreateFNegFMF(V: Y, FMFSource: FMF), FMFSource: FMF); |
| 2601 | Copysign->takeName(V: I); |
| 2602 | return Copysign; |
| 2603 | } |
| 2604 | |
| 2605 | Type *EltTy = VTy->getScalarType(); |
| 2606 | DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics()); |
| 2607 | |
| 2608 | if (KnownLHS.isKnownAlways(Mask: fcInf | fcNan) && |
| 2609 | (KnownRHS.isKnownNeverNaN() && |
| 2610 | KnownRHS.cannotBeOrderedGreaterEqZero(Mode))) { |
| 2611 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 2612 | Builder.SetInsertPoint(I); |
| 2613 | |
| 2614 | // Note: Dropping canonicalize |
| 2615 | Value *Neg = Builder.CreateFNegFMF(V: X, FMFSource: FMF); |
| 2616 | Neg->takeName(V: I); |
| 2617 | return Neg; |
| 2618 | } |
| 2619 | |
| 2620 | if (KnownRHS.isKnownAlways(Mask: fcInf | fcNan) && |
| 2621 | (KnownLHS.isKnownNeverNaN() && |
| 2622 | KnownLHS.cannotBeOrderedGreaterEqZero(Mode))) { |
| 2623 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 2624 | Builder.SetInsertPoint(I); |
| 2625 | |
| 2626 | // Note: Dropping canonicalize |
| 2627 | Value *Neg = Builder.CreateFNegFMF(V: Y, FMFSource: FMF); |
| 2628 | Neg->takeName(V: I); |
| 2629 | return Neg; |
| 2630 | } |
| 2631 | |
| 2632 | Known = KnownFPClass::fmul(LHS: KnownLHS, RHS: KnownRHS, Mode); |
| 2633 | Known.knownNot(RuleOut: ~DemandedMask); |
| 2634 | |
| 2635 | if (Constant *SingleVal = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses, |
| 2636 | /*IsCanonicalizing=*/true)) |
| 2637 | return SingleVal; |
| 2638 | |
| 2639 | FastMathFlags InferredFMF = inferFastMathValueFlags( |
| 2640 | FMF, ValidResults: Known.KnownFPClasses, Known: {KnownLHS, KnownRHS}); |
| 2641 | if (InferredFMF != FMF) { |
| 2642 | I->setFastMathFlags(InferredFMF); |
| 2643 | return I; |
| 2644 | } |
| 2645 | |
| 2646 | return nullptr; |
| 2647 | } |
| 2648 | case Instruction::FDiv: { |
| 2649 | Value *X = I->getOperand(i: 0); |
| 2650 | Value *Y = I->getOperand(i: 1); |
| 2651 | if (X == Y && isGuaranteedNotToBeUndef(V: X, AC: SQ.AC, CtxI: CxtI, DT: SQ.DT, Depth: Depth + 1)) { |
| 2652 | // If the source is 0, inf or nan, the result is a nan |
| 2653 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 2654 | Builder.SetInsertPoint(I); |
| 2655 | |
| 2656 | Value *IsZeroOrNan = Builder.CreateFCmpFMF( |
| 2657 | P: FCmpInst::FCMP_UEQ, LHS: I->getOperand(i: 0), RHS: ConstantFP::getZero(Ty: VTy), FMFSource: FMF); |
| 2658 | |
| 2659 | Value *Fabs = |
| 2660 | Builder.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: I->getOperand(i: 0), FMFSource: FMF); |
| 2661 | Value *IsInfOrNan = Builder.CreateFCmpFMF( |
| 2662 | P: FCmpInst::FCMP_UEQ, LHS: Fabs, RHS: ConstantFP::getInfinity(Ty: VTy), FMFSource: FMF); |
| 2663 | |
| 2664 | Value *IsInfOrZeroOrNan = Builder.CreateOr(LHS: IsInfOrNan, RHS: IsZeroOrNan); |
| 2665 | |
| 2666 | return Builder.CreateSelectFMFWithUnknownProfile( |
| 2667 | C: IsInfOrZeroOrNan, True: ConstantFP::getQNaN(Ty: VTy), |
| 2668 | False: ConstantFP::get( |
| 2669 | Ty: VTy, V: APFloat::getOne(Sem: VTy->getScalarType()->getFltSemantics())), |
| 2670 | FMFSource: FMF, DEBUG_TYPE); |
| 2671 | } |
| 2672 | |
| 2673 | Type *EltTy = VTy->getScalarType(); |
| 2674 | DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics()); |
| 2675 | |
| 2676 | // Every output class could require denormal inputs (except for the |
| 2677 | // degenerate case of only-nan results, without DAZ). |
| 2678 | FPClassTest SrcDemandedMask = (DemandedMask & fcNan) | fcSubnormal; |
| 2679 | |
| 2680 | // Normal inputs may result in underflow. |
| 2681 | // x / x = 1.0 for non0/inf/nan |
| 2682 | // -x = +y / -z |
| 2683 | // -x = -y / +z |
| 2684 | if (DemandedMask & (fcSubnormal | fcNormal)) |
| 2685 | SrcDemandedMask |= fcNormal; |
| 2686 | |
| 2687 | if (DemandedMask & fcNan) { |
| 2688 | // 0 / 0 = nan |
| 2689 | // inf / inf = nan |
| 2690 | |
| 2691 | // Subnormal is added in case of DAZ, but this isn't strictly |
| 2692 | // necessary. Every other input class implies a possible subnormal source, |
| 2693 | // so this only could matter in the degenerate case of only-nan results. |
| 2694 | SrcDemandedMask |= fcZero | fcInf | fcNan; |
| 2695 | } |
| 2696 | |
| 2697 | // Zero outputs may be the result of underflow. |
| 2698 | if (DemandedMask & fcZero) |
| 2699 | SrcDemandedMask |= fcNormal | fcSubnormal; |
| 2700 | |
| 2701 | FPClassTest LHSDemandedMask = SrcDemandedMask; |
| 2702 | FPClassTest RHSDemandedMask = SrcDemandedMask; |
| 2703 | |
| 2704 | // 0 / inf = 0 |
| 2705 | if (DemandedMask & fcZero) { |
| 2706 | assert((LHSDemandedMask & fcSubnormal) && |
| 2707 | "should not have to worry about daz here" ); |
| 2708 | LHSDemandedMask |= fcZero; |
| 2709 | RHSDemandedMask |= fcInf; |
| 2710 | } |
| 2711 | |
| 2712 | // x / 0 = inf |
| 2713 | // large_normal / small_normal = inf |
| 2714 | // inf / 1 = inf |
| 2715 | // large_normal / subnormal = inf |
| 2716 | if (DemandedMask & fcInf) { |
| 2717 | LHSDemandedMask |= fcInf | fcNormal | fcSubnormal; |
| 2718 | RHSDemandedMask |= fcZero | fcSubnormal | fcNormal; |
| 2719 | } |
| 2720 | |
| 2721 | KnownFPClass KnownLHS, KnownRHS; |
| 2722 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: LHSDemandedMask, Known&: KnownLHS, Depth: Depth + 1) || |
| 2723 | SimplifyDemandedFPClass(I, Op: 1, DemandedMask: RHSDemandedMask, Known&: KnownRHS, Depth: Depth + 1)) |
| 2724 | return I; |
| 2725 | |
| 2726 | // nsz [+-]0 / x -> 0 |
| 2727 | if (FMF.noSignedZeros() && KnownLHS.isKnownAlways(Mask: fcZero) && |
| 2728 | KnownRHS.isKnownNeverNaN()) |
| 2729 | return ConstantFP::getZero(Ty: VTy); |
| 2730 | |
| 2731 | if (KnownLHS.isKnownAlways(Mask: fcPosZero) && KnownRHS.isKnownNeverNaN()) { |
| 2732 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 2733 | Builder.SetInsertPoint(I); |
| 2734 | |
| 2735 | // nnan +0 / x -> copysign(0, rhs) |
| 2736 | // TODO: -0 / x => copysign(0, fneg(rhs)) |
| 2737 | Value *Copysign = Builder.CreateCopySign(LHS: X, RHS: Y, FMFSource: FMF); |
| 2738 | Copysign->takeName(V: I); |
| 2739 | return Copysign; |
| 2740 | } |
| 2741 | |
| 2742 | bool ResultNotNan = (DemandedMask & fcNan) == fcNone; |
| 2743 | bool ResultNotInf = (DemandedMask & fcInf) == fcNone; |
| 2744 | |
| 2745 | if (!ResultNotInf && |
| 2746 | ((ResultNotNan || (KnownLHS.isKnownNeverNaN() && |
| 2747 | KnownLHS.isKnownNeverLogicalZero(Mode))) && |
| 2748 | (KnownRHS.isKnownAlways(Mask: fcPosZero) || |
| 2749 | (FMF.noSignedZeros() && KnownRHS.isKnownAlways(Mask: fcZero))))) { |
| 2750 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 2751 | Builder.SetInsertPoint(I); |
| 2752 | |
| 2753 | // nnan x / 0 => copysign(inf, x); |
| 2754 | // nnan nsz x / -0 => copysign(inf, x); |
| 2755 | Value *Copysign = |
| 2756 | Builder.CreateCopySign(LHS: ConstantFP::getInfinity(Ty: VTy), RHS: X, FMFSource: FMF); |
| 2757 | Copysign->takeName(V: I); |
| 2758 | return Copysign; |
| 2759 | } |
| 2760 | |
| 2761 | // nnan ninf X / [-]0.0 -> poison |
| 2762 | if (ResultNotNan && ResultNotInf && KnownRHS.isKnownAlways(Mask: fcZero)) |
| 2763 | return PoisonValue::get(T: VTy); |
| 2764 | |
| 2765 | Known = KnownFPClass::fdiv(LHS: KnownLHS, RHS: KnownRHS, Mode); |
| 2766 | Known.knownNot(RuleOut: ~DemandedMask); |
| 2767 | |
| 2768 | if (Constant *SingleVal = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses, |
| 2769 | /*IsCanonicalizing=*/true)) |
| 2770 | return SingleVal; |
| 2771 | |
| 2772 | FastMathFlags InferredFMF = inferFastMathValueFlags( |
| 2773 | FMF, ValidResults: Known.KnownFPClasses, Known: {KnownLHS, KnownRHS}); |
| 2774 | if (InferredFMF != FMF) { |
| 2775 | I->setFastMathFlags(InferredFMF); |
| 2776 | return I; |
| 2777 | } |
| 2778 | |
| 2779 | return nullptr; |
| 2780 | } |
| 2781 | case Instruction::FPTrunc: |
| 2782 | return simplifyDemandedUseFPClassFPTrunc(IC&: *this, I&: *I, FMF, DemandedMask, |
| 2783 | Known, Depth); |
| 2784 | case Instruction::FPExt: { |
| 2785 | FPClassTest SrcDemandedMask = DemandedMask; |
| 2786 | if (DemandedMask & fcNan) |
| 2787 | SrcDemandedMask |= fcNan; |
| 2788 | |
| 2789 | // No subnormal result does not imply not-subnormal in the source type. |
| 2790 | if ((DemandedMask & fcNegNormal) != fcNone) |
| 2791 | SrcDemandedMask |= fcNegSubnormal; |
| 2792 | if ((DemandedMask & fcPosNormal) != fcNone) |
| 2793 | SrcDemandedMask |= fcPosSubnormal; |
| 2794 | |
| 2795 | KnownFPClass KnownSrc; |
| 2796 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc, Depth: Depth + 1)) |
| 2797 | return I; |
| 2798 | |
| 2799 | const fltSemantics &DstTy = VTy->getScalarType()->getFltSemantics(); |
| 2800 | const fltSemantics &SrcTy = |
| 2801 | I->getOperand(i: 0)->getType()->getScalarType()->getFltSemantics(); |
| 2802 | |
| 2803 | Known = KnownFPClass::fpext(KnownSrc, DstTy, SrcTy); |
| 2804 | Known.knownNot(RuleOut: ~DemandedMask); |
| 2805 | |
| 2806 | return simplifyDemandedFPClassResult(FPOp: I, FMF, DemandedMask, Known, |
| 2807 | KnownSrcs: {KnownSrc}); |
| 2808 | } |
| 2809 | case Instruction::Call: { |
| 2810 | CallInst *CI = cast<CallInst>(Val: I); |
| 2811 | const Intrinsic::ID IID = CI->getIntrinsicID(); |
| 2812 | switch (IID) { |
| 2813 | case Intrinsic::fabs: { |
| 2814 | KnownFPClass KnownSrc; |
| 2815 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: llvm::inverse_fabs(Mask: DemandedMask), |
| 2816 | Known&: KnownSrc, Depth: Depth + 1)) |
| 2817 | return I; |
| 2818 | |
| 2819 | if (Value *Simplified = simplifyDemandedFPClassFabs( |
| 2820 | Known, Src: CI->getArgOperand(i: 0), DemandedMask, KnownSrc, |
| 2821 | NSZ: FMF.noSignedZeros())) |
| 2822 | return Simplified; |
| 2823 | break; |
| 2824 | } |
| 2825 | case Intrinsic::arithmetic_fence: |
| 2826 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask, Known, Depth: Depth + 1)) |
| 2827 | return I; |
| 2828 | break; |
| 2829 | case Intrinsic::copysign: { |
| 2830 | // Flip on more potentially demanded classes |
| 2831 | const FPClassTest DemandedMaskAnySign = llvm::unknown_sign(Mask: DemandedMask); |
| 2832 | KnownFPClass KnownMag; |
| 2833 | if (SimplifyDemandedFPClass(I: CI, Op: 0, DemandedMask: DemandedMaskAnySign, Known&: KnownMag, |
| 2834 | Depth: Depth + 1)) |
| 2835 | return I; |
| 2836 | |
| 2837 | if ((DemandedMask & fcNegative) == DemandedMask) { |
| 2838 | // Roundabout way of replacing with fneg(fabs) |
| 2839 | CI->setOperand(i_nocapture: 1, Val_nocapture: ConstantFP::get(Ty: VTy, V: -1.0)); |
| 2840 | return I; |
| 2841 | } |
| 2842 | |
| 2843 | if ((DemandedMask & fcPositive) == DemandedMask) { |
| 2844 | // Roundabout way of replacing with fabs |
| 2845 | CI->setOperand(i_nocapture: 1, Val_nocapture: ConstantFP::getZero(Ty: VTy)); |
| 2846 | return I; |
| 2847 | } |
| 2848 | |
| 2849 | if (Value *Simplified = simplifyDemandedFPClassCopysignMag( |
| 2850 | MagSrc: CI->getArgOperand(i: 0), DemandedMask, KnownSrc: KnownMag, |
| 2851 | NSZ: FMF.noSignedZeros())) |
| 2852 | return Simplified; |
| 2853 | |
| 2854 | KnownFPClass KnownSign = computeKnownFPClass(Val: CI->getArgOperand(i: 1), |
| 2855 | Interested: fcAllFlags, CtxI: CxtI, Depth: Depth + 1); |
| 2856 | if (KnownMag.SignBit && KnownSign.SignBit && |
| 2857 | *KnownMag.SignBit == *KnownSign.SignBit) |
| 2858 | return CI->getOperand(i_nocapture: 0); |
| 2859 | |
| 2860 | // TODO: Call argument attribute not considered |
| 2861 | // Input implied not-nan from flag. |
| 2862 | if (FMF.noNaNs()) |
| 2863 | KnownSign.knownNot(RuleOut: fcNan); |
| 2864 | |
| 2865 | if (KnownSign.SignBit == false) { |
| 2866 | CI->dropUBImplyingAttrsAndMetadata(); |
| 2867 | CI->setOperand(i_nocapture: 1, Val_nocapture: ConstantFP::getZero(Ty: VTy)); |
| 2868 | return I; |
| 2869 | } |
| 2870 | |
| 2871 | if (KnownSign.SignBit == true) { |
| 2872 | CI->dropUBImplyingAttrsAndMetadata(); |
| 2873 | CI->setOperand(i_nocapture: 1, Val_nocapture: ConstantFP::get(Ty: VTy, V: -1.0)); |
| 2874 | return I; |
| 2875 | } |
| 2876 | |
| 2877 | Known = KnownFPClass::copysign(KnownMag, KnownSign); |
| 2878 | Known.knownNot(RuleOut: ~DemandedMask); |
| 2879 | break; |
| 2880 | } |
| 2881 | case Intrinsic::fma: |
| 2882 | case Intrinsic::fmuladd: { |
| 2883 | // We can't do any simplification on the source besides stripping out |
| 2884 | // unneeded nans. |
| 2885 | FPClassTest SrcDemandedMask = DemandedMask | ~fcNan; |
| 2886 | if (DemandedMask & fcNan) |
| 2887 | SrcDemandedMask |= fcNan; |
| 2888 | |
| 2889 | KnownFPClass KnownSrc[3]; |
| 2890 | |
| 2891 | Type *EltTy = VTy->getScalarType(); |
| 2892 | if (CI->getArgOperand(i: 0) == CI->getArgOperand(i: 1) && |
| 2893 | isGuaranteedNotToBeUndef(V: CI->getArgOperand(i: 0), AC: SQ.AC, CtxI: CxtI, DT: SQ.DT, |
| 2894 | Depth: Depth + 1)) { |
| 2895 | if (SimplifyDemandedFPClass(I: CI, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc[0], |
| 2896 | Depth: Depth + 1) || |
| 2897 | SimplifyDemandedFPClass(I: CI, Op: 2, DemandedMask: SrcDemandedMask, Known&: KnownSrc[2], |
| 2898 | Depth: Depth + 1)) |
| 2899 | return I; |
| 2900 | |
| 2901 | KnownSrc[1] = KnownSrc[0]; |
| 2902 | DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics()); |
| 2903 | Known = KnownFPClass::fma_square(Squared: KnownSrc[0], Addend: KnownSrc[2], Mode); |
| 2904 | } else { |
| 2905 | for (int OpIdx = 0; OpIdx != 3; ++OpIdx) { |
| 2906 | if (SimplifyDemandedFPClass(I: CI, Op: OpIdx, DemandedMask: SrcDemandedMask, |
| 2907 | Known&: KnownSrc[OpIdx], Depth: Depth + 1)) |
| 2908 | return CI; |
| 2909 | } |
| 2910 | |
| 2911 | DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics()); |
| 2912 | Known = KnownFPClass::fma(LHS: KnownSrc[0], RHS: KnownSrc[1], Addend: KnownSrc[2], Mode); |
| 2913 | } |
| 2914 | |
| 2915 | return simplifyDemandedFPClassResult(FPOp: CI, FMF, DemandedMask, Known, |
| 2916 | KnownSrcs: {KnownSrc}); |
| 2917 | } |
| 2918 | case Intrinsic::maximum: |
| 2919 | case Intrinsic::minimum: |
| 2920 | case Intrinsic::maximumnum: |
| 2921 | case Intrinsic::minimumnum: |
| 2922 | case Intrinsic::maxnum: |
| 2923 | case Intrinsic::minnum: { |
| 2924 | const bool PropagateNaN = |
| 2925 | IID == Intrinsic::maximum || IID == Intrinsic::minimum; |
| 2926 | |
| 2927 | // We can't tell much based on the demanded result without inspecting the |
| 2928 | // operands (e.g., a known-positive result could have been clamped), but |
| 2929 | // we can still prune known-nan inputs. |
| 2930 | FPClassTest SrcDemandedMask = |
| 2931 | PropagateNaN && ((DemandedMask & fcNan) == fcNone) |
| 2932 | ? DemandedMask | ~fcNan |
| 2933 | : fcAllFlags; |
| 2934 | |
| 2935 | KnownFPClass KnownLHS, KnownRHS; |
| 2936 | if (SimplifyDemandedFPClass(I: CI, Op: 1, DemandedMask: SrcDemandedMask, Known&: KnownRHS, |
| 2937 | Depth: Depth + 1) || |
| 2938 | SimplifyDemandedFPClass(I: CI, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownLHS, Depth: Depth + 1)) |
| 2939 | return I; |
| 2940 | |
| 2941 | Value *Simplified = |
| 2942 | simplifyDemandedFPClassMinMax(Known, IID, CI, DemandedMask, KnownLHS, |
| 2943 | KnownRHS, F, NSZ: FMF.noSignedZeros()); |
| 2944 | if (Simplified) |
| 2945 | return Simplified; |
| 2946 | |
| 2947 | auto *FPOp = cast<FPMathOperator>(Val: CI); |
| 2948 | |
| 2949 | FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses; |
| 2950 | FastMathFlags InferredFMF = FMF; |
| 2951 | |
| 2952 | if (!FMF.noSignedZeros()) { |
| 2953 | // Add NSZ flag if we know the result will not be sensitive to the sign |
| 2954 | // of 0. |
| 2955 | FPClassTest ZeroMask = fcZero; |
| 2956 | |
| 2957 | Type *EltTy = VTy->getScalarType(); |
| 2958 | DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics()); |
| 2959 | if (Mode != DenormalMode::getIEEE()) |
| 2960 | ZeroMask |= fcSubnormal; |
| 2961 | |
| 2962 | bool ResultNotLogical0 = (ValidResults & ZeroMask) == fcNone; |
| 2963 | if (ResultNotLogical0 || ((KnownLHS.isKnownNeverLogicalNegZero(Mode) || |
| 2964 | KnownRHS.isKnownNeverLogicalPosZero(Mode)) && |
| 2965 | (KnownLHS.isKnownNeverLogicalPosZero(Mode) || |
| 2966 | KnownRHS.isKnownNeverLogicalNegZero(Mode)))) |
| 2967 | InferredFMF.setNoSignedZeros(true); |
| 2968 | } |
| 2969 | |
| 2970 | if (!FMF.noNaNs() && |
| 2971 | ((PropagateNaN && (ValidResults & fcNan) == fcNone) || |
| 2972 | (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN()))) { |
| 2973 | CI->dropUBImplyingAttrsAndMetadata(); |
| 2974 | InferredFMF.setNoNaNs(true); |
| 2975 | } |
| 2976 | |
| 2977 | if (InferredFMF != FMF) { |
| 2978 | CI->setFastMathFlags(InferredFMF); |
| 2979 | return FPOp; |
| 2980 | } |
| 2981 | |
| 2982 | return nullptr; |
| 2983 | } |
| 2984 | case Intrinsic::exp: |
| 2985 | case Intrinsic::exp2: |
| 2986 | case Intrinsic::exp10: { |
| 2987 | if ((DemandedMask & fcPositive) == fcNone) { |
| 2988 | // Only returns positive values or nans. |
| 2989 | if ((DemandedMask & fcNan) == fcNone) |
| 2990 | return PoisonValue::get(T: VTy); |
| 2991 | |
| 2992 | // Only need nan propagation. |
| 2993 | if ((DemandedMask & ~fcNan) == fcNone) |
| 2994 | return ConstantFP::getQNaN(Ty: VTy); |
| 2995 | |
| 2996 | return CI->getArgOperand(i: 0); |
| 2997 | } |
| 2998 | |
| 2999 | FPClassTest SrcDemandedMask = DemandedMask & fcNan; |
| 3000 | if (DemandedMask & fcNan) |
| 3001 | SrcDemandedMask |= fcNan; |
| 3002 | |
| 3003 | if (DemandedMask & fcZero) { |
| 3004 | // exp(-infinity) = 0 |
| 3005 | SrcDemandedMask |= fcNegInf; |
| 3006 | |
| 3007 | // exp(-largest_normal) = 0 |
| 3008 | // |
| 3009 | // Negative numbers of sufficiently large magnitude underflow to 0. No |
| 3010 | // subnormal input has a 0 result. |
| 3011 | SrcDemandedMask |= fcNegNormal; |
| 3012 | } |
| 3013 | |
| 3014 | if (DemandedMask & fcPosSubnormal) { |
| 3015 | // Negative numbers of sufficiently large magnitude underflow to 0. No |
| 3016 | // subnormal input has a 0 result. |
| 3017 | SrcDemandedMask |= fcNegNormal; |
| 3018 | } |
| 3019 | |
| 3020 | if (DemandedMask & fcPosNormal) { |
| 3021 | // exp(0) = 1 |
| 3022 | // exp(+/- smallest_normal) = 1 |
| 3023 | // exp(+/- largest_denormal) = 1 |
| 3024 | // exp(+/- smallest_denormal) = 1 |
| 3025 | // exp(-1) = pos normal |
| 3026 | SrcDemandedMask |= fcNormal | fcSubnormal | fcZero; |
| 3027 | } |
| 3028 | |
| 3029 | // exp(inf), exp(largest_normal) = inf |
| 3030 | if (DemandedMask & fcPosInf) |
| 3031 | SrcDemandedMask |= fcPosInf | fcPosNormal; |
| 3032 | |
| 3033 | KnownFPClass KnownSrc; |
| 3034 | |
| 3035 | // TODO: This could really make use of KnownFPClass of specific value |
| 3036 | // range, (i.e., close enough to 1) |
| 3037 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc, Depth: Depth + 1)) |
| 3038 | return I; |
| 3039 | |
| 3040 | // exp(+/-0) = 1 |
| 3041 | if (KnownSrc.isKnownAlways(Mask: fcZero)) |
| 3042 | return ConstantFP::get(Ty: VTy, V: 1.0); |
| 3043 | |
| 3044 | // Only perform nan propagation. |
| 3045 | // Note: Dropping canonicalize / quiet of signaling nan. |
| 3046 | if (KnownSrc.isKnownAlways(Mask: fcNan)) |
| 3047 | return CI->getArgOperand(i: 0); |
| 3048 | |
| 3049 | // exp(0 | nan) => x == 0.0 ? 1.0 : x |
| 3050 | if (KnownSrc.isKnownAlways(Mask: fcZero | fcNan)) { |
| 3051 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 3052 | Builder.SetInsertPoint(CI); |
| 3053 | |
| 3054 | // fadd +/-0, 1.0 => 1.0 |
| 3055 | // fadd nan, 1.0 => nan |
| 3056 | return Builder.CreateFAddFMF(L: CI->getArgOperand(i: 0), |
| 3057 | R: ConstantFP::get(Ty: VTy, V: 1.0), FMFSource: FMF); |
| 3058 | } |
| 3059 | |
| 3060 | if (KnownSrc.isKnownAlways(Mask: fcInf | fcNan)) { |
| 3061 | // exp(-inf) = 0 |
| 3062 | // exp(+inf) = +inf |
| 3063 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 3064 | Builder.SetInsertPoint(CI); |
| 3065 | |
| 3066 | // Note: Dropping canonicalize / quiet of signaling nan. |
| 3067 | Value *X = CI->getArgOperand(i: 0); |
| 3068 | Value *IsPosInfOrNan = Builder.CreateFCmpFMF( |
| 3069 | P: FCmpInst::FCMP_UEQ, LHS: X, RHS: ConstantFP::getInfinity(Ty: VTy), FMFSource: FMF); |
| 3070 | // We do not know whether an infinity or a NaN is more likely here, |
| 3071 | // so mark the branch weights as unkown. |
| 3072 | Value *ZeroOrInf = Builder.CreateSelectFMFWithUnknownProfile( |
| 3073 | C: IsPosInfOrNan, True: X, False: ConstantFP::getZero(Ty: VTy), FMFSource: FMF, DEBUG_TYPE); |
| 3074 | return ZeroOrInf; |
| 3075 | } |
| 3076 | |
| 3077 | Known = KnownFPClass::exp(Src: KnownSrc); |
| 3078 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3079 | |
| 3080 | return simplifyDemandedFPClassResult(FPOp: CI, FMF, DemandedMask, Known, |
| 3081 | KnownSrcs: KnownSrc); |
| 3082 | } |
| 3083 | case Intrinsic::log: |
| 3084 | case Intrinsic::log2: |
| 3085 | case Intrinsic::log10: { |
| 3086 | FPClassTest DemandedSrcMask = DemandedMask & (fcNan | fcPosInf); |
| 3087 | if (DemandedMask & fcNan) |
| 3088 | DemandedSrcMask |= fcNan; |
| 3089 | |
| 3090 | Type *EltTy = VTy->getScalarType(); |
| 3091 | DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics()); |
| 3092 | |
| 3093 | // log(x < 0) = nan |
| 3094 | if (DemandedMask & fcNan) |
| 3095 | DemandedSrcMask |= (fcNegative & ~fcNegZero); |
| 3096 | |
| 3097 | // log(0) = -inf |
| 3098 | if (DemandedMask & fcNegInf) { |
| 3099 | DemandedSrcMask |= fcZero; |
| 3100 | |
| 3101 | // No value produces subnormal result. |
| 3102 | if (Mode.inputsMayBeZero()) |
| 3103 | DemandedSrcMask |= fcSubnormal; |
| 3104 | } |
| 3105 | |
| 3106 | if (DemandedMask & fcNormal) |
| 3107 | DemandedSrcMask |= fcNormal | fcSubnormal; |
| 3108 | |
| 3109 | // log(1) = 0 |
| 3110 | if (DemandedMask & fcZero) |
| 3111 | DemandedSrcMask |= fcPosNormal; |
| 3112 | |
| 3113 | KnownFPClass KnownSrc; |
| 3114 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: DemandedSrcMask, Known&: KnownSrc, Depth: Depth + 1)) |
| 3115 | return I; |
| 3116 | |
| 3117 | Known = KnownFPClass::log(Src: KnownSrc, Mode); |
| 3118 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3119 | |
| 3120 | return simplifyDemandedFPClassResult(FPOp: CI, FMF, DemandedMask, Known, |
| 3121 | KnownSrcs: KnownSrc); |
| 3122 | } |
| 3123 | case Intrinsic::sqrt: { |
| 3124 | FPClassTest DemandedSrcMask = |
| 3125 | DemandedMask & (fcNegZero | fcPositive | fcNan); |
| 3126 | |
| 3127 | if (DemandedMask & fcNan) |
| 3128 | DemandedSrcMask |= fcNan | (fcNegative & ~fcNegZero); |
| 3129 | |
| 3130 | // sqrt(max_subnormal) is a normal value |
| 3131 | if (DemandedMask & fcPosNormal) |
| 3132 | DemandedSrcMask |= fcPosSubnormal; |
| 3133 | |
| 3134 | KnownFPClass KnownSrc; |
| 3135 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: DemandedSrcMask, Known&: KnownSrc, Depth: Depth + 1)) |
| 3136 | return I; |
| 3137 | |
| 3138 | // Infer the source cannot be negative if the result cannot be nan. |
| 3139 | if ((DemandedMask & fcNan) == fcNone) |
| 3140 | KnownSrc.knownNot(RuleOut: (fcNegative & ~fcNegZero) | fcNan); |
| 3141 | |
| 3142 | // Infer the source cannot be +inf if the result is not +nf |
| 3143 | if ((DemandedMask & fcPosInf) == fcNone) |
| 3144 | KnownSrc.knownNot(RuleOut: fcPosInf); |
| 3145 | |
| 3146 | Type *EltTy = VTy->getScalarType(); |
| 3147 | DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics()); |
| 3148 | |
| 3149 | // sqrt(-x) = nan, but be careful of negative subnormals flushed to 0. |
| 3150 | if (KnownSrc.isKnownNever(Mask: fcPositive) && |
| 3151 | KnownSrc.isKnownNeverLogicalZero(Mode)) |
| 3152 | return ConstantFP::getQNaN(Ty: VTy); |
| 3153 | |
| 3154 | Known = KnownFPClass::sqrt(Src: KnownSrc, Mode); |
| 3155 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3156 | |
| 3157 | if (Known.KnownFPClasses == fcZero) { |
| 3158 | if (FMF.noSignedZeros()) |
| 3159 | return ConstantFP::getZero(Ty: VTy); |
| 3160 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 3161 | Builder.SetInsertPoint(CI); |
| 3162 | |
| 3163 | Value *Copysign = Builder.CreateCopySign(LHS: ConstantFP::getZero(Ty: VTy), |
| 3164 | RHS: CI->getArgOperand(i: 0), FMFSource: FMF); |
| 3165 | Copysign->takeName(V: CI); |
| 3166 | return Copysign; |
| 3167 | } |
| 3168 | |
| 3169 | return simplifyDemandedFPClassResult(FPOp: CI, FMF, DemandedMask, Known, |
| 3170 | KnownSrcs: {KnownSrc}); |
| 3171 | } |
| 3172 | case Intrinsic::ldexp: { |
| 3173 | FPClassTest SrcDemandedMask = DemandedMask & fcInf; |
| 3174 | if (DemandedMask & fcNan) |
| 3175 | SrcDemandedMask |= fcNan; |
| 3176 | |
| 3177 | if (DemandedMask & fcPosInf) |
| 3178 | SrcDemandedMask |= fcPosNormal | fcPosSubnormal; |
| 3179 | if (DemandedMask & fcNegInf) |
| 3180 | SrcDemandedMask |= fcNegNormal | fcNegSubnormal; |
| 3181 | |
| 3182 | if (DemandedMask & (fcPosNormal | fcPosSubnormal)) |
| 3183 | SrcDemandedMask |= fcPosNormal | fcPosSubnormal; |
| 3184 | if (DemandedMask & (fcNegNormal | fcNegSubnormal)) |
| 3185 | SrcDemandedMask |= fcNegNormal | fcNegSubnormal; |
| 3186 | |
| 3187 | if (DemandedMask & fcPosZero) |
| 3188 | SrcDemandedMask |= fcPosFinite; |
| 3189 | if (DemandedMask & fcNegZero) |
| 3190 | SrcDemandedMask |= fcNegFinite; |
| 3191 | |
| 3192 | KnownFPClass KnownSrc; |
| 3193 | if (SimplifyDemandedFPClass(I: CI, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc, Depth: Depth + 1)) |
| 3194 | return CI; |
| 3195 | |
| 3196 | Type *EltTy = VTy->getScalarType(); |
| 3197 | const fltSemantics &FltSem = EltTy->getFltSemantics(); |
| 3198 | DenormalMode Mode = F.getDenormalMode(FPType: FltSem); |
| 3199 | |
| 3200 | KnownBits KnownExpBits = |
| 3201 | ::computeKnownBits(V: CI->getArgOperand(i: 1), Q: SQ, Depth: Depth + 1); |
| 3202 | |
| 3203 | Known = KnownFPClass::ldexp(Src: KnownSrc, N: KnownExpBits, Flt: FltSem, Mode); |
| 3204 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3205 | |
| 3206 | return simplifyDemandedFPClassResult(FPOp: CI, FMF, DemandedMask, Known, |
| 3207 | KnownSrcs: {KnownSrc}); |
| 3208 | } |
| 3209 | case Intrinsic::trunc: |
| 3210 | case Intrinsic::floor: |
| 3211 | case Intrinsic::ceil: |
| 3212 | case Intrinsic::rint: |
| 3213 | case Intrinsic::nearbyint: |
| 3214 | case Intrinsic::round: |
| 3215 | case Intrinsic::roundeven: { |
| 3216 | FPClassTest DemandedSrcMask = DemandedMask; |
| 3217 | if (DemandedMask & fcNan) |
| 3218 | DemandedSrcMask |= fcNan; |
| 3219 | |
| 3220 | // Zero results imply valid subnormal sources. |
| 3221 | if (DemandedMask & fcNegZero) |
| 3222 | DemandedSrcMask |= fcNegSubnormal | fcNegNormal; |
| 3223 | |
| 3224 | if (DemandedMask & fcPosZero) |
| 3225 | DemandedSrcMask |= fcPosSubnormal | fcPosNormal; |
| 3226 | |
| 3227 | KnownFPClass KnownSrc; |
| 3228 | if (SimplifyDemandedFPClass(I: CI, Op: 0, DemandedMask: DemandedSrcMask, Known&: KnownSrc, Depth: Depth + 1)) |
| 3229 | return I; |
| 3230 | |
| 3231 | // Note: Possibly dropping snan quiet. |
| 3232 | if (KnownSrc.isKnownAlways(Mask: fcInf | fcNan | fcZero)) |
| 3233 | return CI->getArgOperand(i: 0); |
| 3234 | |
| 3235 | bool IsRoundNearestOrTrunc = |
| 3236 | IID == Intrinsic::round || IID == Intrinsic::roundeven || |
| 3237 | IID == Intrinsic::nearbyint || IID == Intrinsic::rint || |
| 3238 | IID == Intrinsic::trunc; |
| 3239 | |
| 3240 | // Ignore denormals-as-zero, as canonicalization is not mandated. |
| 3241 | if ((IID == Intrinsic::floor || IsRoundNearestOrTrunc) && |
| 3242 | KnownSrc.isKnownAlways(Mask: fcPosZero | fcPosSubnormal)) |
| 3243 | return ConstantFP::getZero(Ty: VTy); |
| 3244 | |
| 3245 | if ((IID == Intrinsic::ceil || IsRoundNearestOrTrunc) && |
| 3246 | KnownSrc.isKnownAlways(Mask: fcNegZero | fcNegSubnormal)) |
| 3247 | return ConstantFP::getZero(Ty: VTy, Negative: true); |
| 3248 | |
| 3249 | if (IID == Intrinsic::floor && KnownSrc.isKnownAlways(Mask: fcNegSubnormal)) |
| 3250 | return ConstantFP::get(Ty: VTy, V: -1.0); |
| 3251 | |
| 3252 | if (IID == Intrinsic::ceil && KnownSrc.isKnownAlways(Mask: fcPosSubnormal)) |
| 3253 | return ConstantFP::get(Ty: VTy, V: 1.0); |
| 3254 | |
| 3255 | Known = KnownFPClass::roundToIntegral( |
| 3256 | Src: KnownSrc, IsTrunc: IID == Intrinsic::trunc, |
| 3257 | IsMultiUnitFPType: VTy->getScalarType()->isMultiUnitFPType()); |
| 3258 | |
| 3259 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3260 | |
| 3261 | if (Constant *SingleVal = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses, |
| 3262 | /*IsCanonicalizing=*/true)) |
| 3263 | return SingleVal; |
| 3264 | |
| 3265 | if ((IID == Intrinsic::trunc || IsRoundNearestOrTrunc) && |
| 3266 | KnownSrc.isKnownAlways(Mask: fcZero | fcSubnormal)) { |
| 3267 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 3268 | Builder.SetInsertPoint(CI); |
| 3269 | |
| 3270 | Value *Copysign = Builder.CreateCopySign(LHS: ConstantFP::getZero(Ty: VTy), |
| 3271 | RHS: CI->getArgOperand(i: 0)); |
| 3272 | Copysign->takeName(V: CI); |
| 3273 | return Copysign; |
| 3274 | } |
| 3275 | |
| 3276 | FastMathFlags InferredFMF = |
| 3277 | inferFastMathValueFlags(FMF, ValidResults: Known.KnownFPClasses, Known: KnownSrc); |
| 3278 | if (InferredFMF != FMF) { |
| 3279 | CI->dropUBImplyingAttrsAndMetadata(); |
| 3280 | CI->setFastMathFlags(InferredFMF); |
| 3281 | return CI; |
| 3282 | } |
| 3283 | |
| 3284 | return nullptr; |
| 3285 | } |
| 3286 | case Intrinsic::fptrunc_round: |
| 3287 | return simplifyDemandedUseFPClassFPTrunc(IC&: *this, I&: *CI, FMF, DemandedMask, |
| 3288 | Known, Depth); |
| 3289 | case Intrinsic::canonicalize: { |
| 3290 | Type *EltTy = VTy->getScalarType(); |
| 3291 | |
| 3292 | // TODO: This could have more refined support for PositiveZero denormal |
| 3293 | // mode. |
| 3294 | if (EltTy->isIEEELikeFPTy()) { |
| 3295 | DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics()); |
| 3296 | |
| 3297 | FPClassTest SrcDemandedMask = DemandedMask; |
| 3298 | |
| 3299 | // A demanded quiet nan result may have come from a signaling nan, so we |
| 3300 | // need to expand the demanded mask. |
| 3301 | if ((DemandedMask & fcQNan) != fcNone) |
| 3302 | SrcDemandedMask |= fcSNan; |
| 3303 | |
| 3304 | if (Mode != DenormalMode::getIEEE()) { |
| 3305 | // Any zero results may have come from flushed denormals. |
| 3306 | if (DemandedMask & fcPosZero) |
| 3307 | SrcDemandedMask |= fcPosSubnormal; |
| 3308 | if (DemandedMask & fcNegZero) |
| 3309 | SrcDemandedMask |= fcNegSubnormal; |
| 3310 | } |
| 3311 | |
| 3312 | if (Mode == DenormalMode::getPreserveSign()) { |
| 3313 | // If a denormal input will be flushed, and we don't need zeros, we |
| 3314 | // don't need denormals either. |
| 3315 | if ((DemandedMask & fcPosZero) == fcNone) |
| 3316 | SrcDemandedMask &= ~fcPosSubnormal; |
| 3317 | |
| 3318 | if ((DemandedMask & fcNegZero) == fcNone) |
| 3319 | SrcDemandedMask &= ~fcNegSubnormal; |
| 3320 | } |
| 3321 | |
| 3322 | KnownFPClass KnownSrc; |
| 3323 | |
| 3324 | // Simplify upstream operations before trying to simplify this call. |
| 3325 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc, Depth: Depth + 1)) |
| 3326 | return I; |
| 3327 | |
| 3328 | // Perform the canonicalization to see if this folded to a constant. |
| 3329 | Known = KnownFPClass::canonicalize(Src: KnownSrc, DenormMode: Mode); |
| 3330 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3331 | |
| 3332 | if (Constant *SingleVal = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses)) |
| 3333 | return SingleVal; |
| 3334 | |
| 3335 | // For IEEE handling, there is only a bit change for nan inputs, so we |
| 3336 | // can drop it if we do not demand nan results or we know the input |
| 3337 | // isn't a nan. |
| 3338 | // Otherwise, we also need to avoid denormal inputs to drop the |
| 3339 | // canonicalize. |
| 3340 | if (KnownSrc.isKnownNeverNaN() && (Mode == DenormalMode::getIEEE() || |
| 3341 | KnownSrc.isKnownNeverSubnormal())) |
| 3342 | return CI->getArgOperand(i: 0); |
| 3343 | |
| 3344 | FastMathFlags InferredFMF = |
| 3345 | inferFastMathValueFlags(FMF, ValidResults: Known.KnownFPClasses, Known: KnownSrc); |
| 3346 | if (InferredFMF != FMF) { |
| 3347 | CI->dropUBImplyingAttrsAndMetadata(); |
| 3348 | CI->setFastMathFlags(InferredFMF); |
| 3349 | return CI; |
| 3350 | } |
| 3351 | |
| 3352 | return nullptr; |
| 3353 | } |
| 3354 | |
| 3355 | [[fallthrough]]; |
| 3356 | } |
| 3357 | default: |
| 3358 | Known = computeKnownFPClass(Val: I, Interested: DemandedMask, CtxI: CxtI, Depth: Depth + 1); |
| 3359 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3360 | break; |
| 3361 | } |
| 3362 | |
| 3363 | break; |
| 3364 | } |
| 3365 | case Instruction::Select: { |
| 3366 | KnownFPClass KnownLHS, KnownRHS; |
| 3367 | if (SimplifyDemandedFPClass(I, Op: 2, DemandedMask, Known&: KnownRHS, Depth: Depth + 1) || |
| 3368 | SimplifyDemandedFPClass(I, Op: 1, DemandedMask, Known&: KnownLHS, Depth: Depth + 1)) |
| 3369 | return I; |
| 3370 | |
| 3371 | if (KnownLHS.isKnownNever(Mask: DemandedMask)) |
| 3372 | return I->getOperand(i: 2); |
| 3373 | if (KnownRHS.isKnownNever(Mask: DemandedMask)) |
| 3374 | return I->getOperand(i: 1); |
| 3375 | |
| 3376 | adjustKnownFPClassForSelectArm(Known&: KnownLHS, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 1), |
| 3377 | /*Invert=*/false, Q: SQ, Depth); |
| 3378 | adjustKnownFPClassForSelectArm(Known&: KnownRHS, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 2), |
| 3379 | /*Invert=*/true, Q: SQ, Depth); |
| 3380 | Known = KnownLHS.intersectWith(RHS: KnownRHS); |
| 3381 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3382 | break; |
| 3383 | } |
| 3384 | case Instruction::ExtractElement: { |
| 3385 | // TODO: Handle demanded element mask |
| 3386 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask, Known, Depth: Depth + 1)) |
| 3387 | return I; |
| 3388 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3389 | break; |
| 3390 | } |
| 3391 | case Instruction::InsertElement: { |
| 3392 | KnownFPClass KnownInserted, KnownVec; |
| 3393 | if (SimplifyDemandedFPClass(I, Op: 1, DemandedMask, Known&: KnownInserted, Depth: Depth + 1) || |
| 3394 | SimplifyDemandedFPClass(I, Op: 0, DemandedMask, Known&: KnownVec, Depth: Depth + 1)) |
| 3395 | return I; |
| 3396 | |
| 3397 | // TODO: Use demanded elements logic from computeKnownFPClass |
| 3398 | Known = KnownVec | KnownInserted; |
| 3399 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3400 | break; |
| 3401 | } |
| 3402 | case Instruction::ShuffleVector: { |
| 3403 | KnownFPClass KnownLHS, KnownRHS; |
| 3404 | if (SimplifyDemandedFPClass(I, Op: 1, DemandedMask, Known&: KnownRHS, Depth: Depth + 1) || |
| 3405 | SimplifyDemandedFPClass(I, Op: 0, DemandedMask, Known&: KnownLHS, Depth: Depth + 1)) |
| 3406 | return I; |
| 3407 | |
| 3408 | // TODO: This is overly conservative and should consider demanded elements, |
| 3409 | // and splats. |
| 3410 | Known = KnownLHS | KnownRHS; |
| 3411 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3412 | break; |
| 3413 | } |
| 3414 | case Instruction::ExtractValue: { |
| 3415 | Value *; |
| 3416 | if (match(V: I, P: m_ExtractValue<0>(V: m_OneUse(SubPattern: m_Value(V&: ExtractSrc))))) { |
| 3417 | if (auto *II = dyn_cast<IntrinsicInst>(Val: ExtractSrc)) { |
| 3418 | const Intrinsic::ID IID = II->getIntrinsicID(); |
| 3419 | switch (IID) { |
| 3420 | case Intrinsic::frexp: { |
| 3421 | FPClassTest SrcDemandedMask = fcNone; |
| 3422 | if (DemandedMask & fcNan) |
| 3423 | SrcDemandedMask |= fcNan; |
| 3424 | if (DemandedMask & fcNegFinite) |
| 3425 | SrcDemandedMask |= fcNegFinite; |
| 3426 | if (DemandedMask & fcPosFinite) |
| 3427 | SrcDemandedMask |= fcPosFinite; |
| 3428 | if (DemandedMask & fcPosInf) |
| 3429 | SrcDemandedMask |= fcPosInf; |
| 3430 | if (DemandedMask & fcNegInf) |
| 3431 | SrcDemandedMask |= fcNegInf; |
| 3432 | |
| 3433 | KnownFPClass KnownSrc; |
| 3434 | if (SimplifyDemandedFPClass(I: II, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc, |
| 3435 | Depth: Depth + 1)) |
| 3436 | return I; |
| 3437 | |
| 3438 | Type *EltTy = VTy->getScalarType(); |
| 3439 | DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics()); |
| 3440 | |
| 3441 | Known = KnownFPClass::frexp_mant(Src: KnownSrc, Mode); |
| 3442 | Known.KnownFPClasses &= DemandedMask; |
| 3443 | |
| 3444 | if (Constant *SingleVal = |
| 3445 | getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses, |
| 3446 | /*IsCanonicalizing=*/true)) |
| 3447 | return SingleVal; |
| 3448 | |
| 3449 | if (Known.isKnownAlways(Mask: fcInf | fcNan)) |
| 3450 | return II->getArgOperand(i: 0); |
| 3451 | |
| 3452 | return nullptr; |
| 3453 | } |
| 3454 | default: |
| 3455 | break; |
| 3456 | } |
| 3457 | } |
| 3458 | } |
| 3459 | [[fallthrough]]; |
| 3460 | } |
| 3461 | default: |
| 3462 | Known = computeKnownFPClass(Val: I, Interested: DemandedMask, CtxI: CxtI, Depth: Depth + 1); |
| 3463 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3464 | break; |
| 3465 | } |
| 3466 | |
| 3467 | return getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses); |
| 3468 | } |
| 3469 | |
| 3470 | /// Helper routine of SimplifyDemandedUseFPClass. It computes Known |
| 3471 | /// floating-point classes. It also tries to handle simplifications that can be |
| 3472 | /// done based on DemandedMask, but without modifying the Instruction. |
| 3473 | Value *InstCombinerImpl::SimplifyMultipleUseDemandedFPClass( |
| 3474 | Instruction *I, FPClassTest DemandedMask, KnownFPClass &Known, |
| 3475 | Instruction *CxtI, unsigned Depth) { |
| 3476 | FastMathFlags FMF; |
| 3477 | if (auto *FPOp = dyn_cast<FPMathOperator>(Val: I)) { |
| 3478 | FMF = FPOp->getFastMathFlags(); |
| 3479 | DemandedMask = adjustDemandedMaskFromFlags(DemandedMask, FMF); |
| 3480 | } |
| 3481 | |
| 3482 | switch (I->getOpcode()) { |
| 3483 | case Instruction::Select: { |
| 3484 | // TODO: Can we infer which side it came from based on adjusted result |
| 3485 | // class? |
| 3486 | KnownFPClass KnownRHS = |
| 3487 | computeKnownFPClass(Val: I->getOperand(i: 2), Interested: DemandedMask, CtxI: CxtI, Depth: Depth + 1); |
| 3488 | if (KnownRHS.isKnownNever(Mask: DemandedMask)) |
| 3489 | return I->getOperand(i: 1); |
| 3490 | |
| 3491 | KnownFPClass KnownLHS = |
| 3492 | computeKnownFPClass(Val: I->getOperand(i: 1), Interested: DemandedMask, CtxI: CxtI, Depth: Depth + 1); |
| 3493 | if (KnownLHS.isKnownNever(Mask: DemandedMask)) |
| 3494 | return I->getOperand(i: 2); |
| 3495 | |
| 3496 | adjustKnownFPClassForSelectArm(Known&: KnownLHS, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 1), |
| 3497 | /*Invert=*/false, Q: SQ, Depth); |
| 3498 | adjustKnownFPClassForSelectArm(Known&: KnownRHS, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 2), |
| 3499 | /*Invert=*/true, Q: SQ, Depth); |
| 3500 | Known = KnownLHS.intersectWith(RHS: KnownRHS); |
| 3501 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3502 | break; |
| 3503 | } |
| 3504 | case Instruction::FNeg: { |
| 3505 | // Special case fneg(fabs(x)) |
| 3506 | Value *Src; |
| 3507 | |
| 3508 | Value *FNegSrc = I->getOperand(i: 0); |
| 3509 | if (!match(V: FNegSrc, P: m_FAbs(Op0: m_Value(V&: Src)))) { |
| 3510 | Known = computeKnownFPClass(Val: I, Interested: DemandedMask, CtxI: CxtI, Depth: Depth + 1); |
| 3511 | break; |
| 3512 | } |
| 3513 | |
| 3514 | KnownFPClass KnownSrc = |
| 3515 | computeKnownFPClass(Val: Src, Interested: fcAllFlags, CtxI: CxtI, Depth: Depth + 1); |
| 3516 | |
| 3517 | FastMathFlags FabsFMF = cast<FPMathOperator>(Val: FNegSrc)->getFastMathFlags(); |
| 3518 | FPClassTest ThisDemandedMask = |
| 3519 | adjustDemandedMaskFromFlags(DemandedMask, FMF: FabsFMF); |
| 3520 | |
| 3521 | // We cannot apply the NSZ logic with multiple uses. We can apply it if the |
| 3522 | // inner fabs has it and this is the only use. |
| 3523 | if (Value *Simplified = simplifyDemandedFPClassFnegFabs( |
| 3524 | Known, Src, DemandedMask: ThisDemandedMask, KnownSrc, /*NSZ=*/false)) |
| 3525 | return Simplified; |
| 3526 | break; |
| 3527 | } |
| 3528 | case Instruction::Call: { |
| 3529 | const CallInst *CI = cast<CallInst>(Val: I); |
| 3530 | const Intrinsic::ID IID = CI->getIntrinsicID(); |
| 3531 | switch (IID) { |
| 3532 | case Intrinsic::fabs: { |
| 3533 | Value *Src = CI->getArgOperand(i: 0); |
| 3534 | KnownFPClass KnownSrc = |
| 3535 | computeKnownFPClass(Val: Src, Interested: fcAllFlags, CtxI: CxtI, Depth: Depth + 1); |
| 3536 | |
| 3537 | // NSZ cannot be applied in multiple use case (maybe it could if all uses |
| 3538 | // were known nsz) |
| 3539 | if (Value *Simplified = simplifyDemandedFPClassFabs( |
| 3540 | Known, Src: CI->getArgOperand(i: 0), DemandedMask, KnownSrc, |
| 3541 | /*NSZ=*/false)) |
| 3542 | return Simplified; |
| 3543 | break; |
| 3544 | } |
| 3545 | case Intrinsic::copysign: { |
| 3546 | Value *Mag = CI->getArgOperand(i: 0); |
| 3547 | Value *Sign = CI->getArgOperand(i: 1); |
| 3548 | KnownFPClass KnownMag = |
| 3549 | computeKnownFPClass(Val: Mag, Interested: fcAllFlags, CtxI: CxtI, Depth: Depth + 1); |
| 3550 | |
| 3551 | // Rule out some cases by magnitude, which may help prove the sign bit is |
| 3552 | // one direction or the other. |
| 3553 | KnownMag.knownNot(RuleOut: ~llvm::unknown_sign(Mask: DemandedMask)); |
| 3554 | |
| 3555 | // Cannot use nsz in the multiple use case. |
| 3556 | if (Value *Simplified = simplifyDemandedFPClassCopysignMag( |
| 3557 | MagSrc: Mag, DemandedMask, KnownSrc: KnownMag, /*NSZ=*/false)) |
| 3558 | return Simplified; |
| 3559 | |
| 3560 | KnownFPClass KnownSign = |
| 3561 | computeKnownFPClass(Val: Sign, Interested: fcAllFlags, CtxI: CxtI, Depth: Depth + 1); |
| 3562 | |
| 3563 | if (FMF.noInfs()) |
| 3564 | KnownSign.knownNot(RuleOut: fcInf); |
| 3565 | if (FMF.noNaNs()) |
| 3566 | KnownSign.knownNot(RuleOut: fcNan); |
| 3567 | |
| 3568 | if (KnownSign.SignBit && KnownMag.SignBit && |
| 3569 | *KnownSign.SignBit == *KnownMag.SignBit) |
| 3570 | return Mag; |
| 3571 | |
| 3572 | Known = KnownFPClass::copysign(KnownMag, KnownSign); |
| 3573 | break; |
| 3574 | } |
| 3575 | case Intrinsic::maxnum: |
| 3576 | case Intrinsic::minnum: |
| 3577 | case Intrinsic::maximum: |
| 3578 | case Intrinsic::minimum: |
| 3579 | case Intrinsic::maximumnum: |
| 3580 | case Intrinsic::minimumnum: { |
| 3581 | KnownFPClass KnownRHS = computeKnownFPClass( |
| 3582 | Val: CI->getArgOperand(i: 1), Interested: DemandedMask, CtxI: CxtI, Depth: Depth + 1); |
| 3583 | if (KnownRHS.isUnknown()) |
| 3584 | return nullptr; |
| 3585 | |
| 3586 | KnownFPClass KnownLHS = computeKnownFPClass( |
| 3587 | Val: CI->getArgOperand(i: 0), Interested: DemandedMask, CtxI: CxtI, Depth: Depth + 1); |
| 3588 | |
| 3589 | // Cannot use NSZ in the multiple use case. |
| 3590 | return simplifyDemandedFPClassMinMax(Known, IID, CI, DemandedMask, |
| 3591 | KnownLHS, KnownRHS, F, |
| 3592 | /*NSZ=*/false); |
| 3593 | } |
| 3594 | default: |
| 3595 | break; |
| 3596 | } |
| 3597 | |
| 3598 | [[fallthrough]]; |
| 3599 | } |
| 3600 | default: |
| 3601 | Known = computeKnownFPClass(Val: I, Interested: DemandedMask, CtxI: CxtI, Depth: Depth + 1); |
| 3602 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3603 | break; |
| 3604 | } |
| 3605 | |
| 3606 | return getFPClassConstant(Ty: I->getType(), Mask: Known.KnownFPClasses); |
| 3607 | } |
| 3608 | |
| 3609 | bool InstCombinerImpl::SimplifyDemandedFPClass(Instruction *I, unsigned OpNo, |
| 3610 | FPClassTest DemandedMask, |
| 3611 | KnownFPClass &Known, |
| 3612 | unsigned Depth) { |
| 3613 | Use &U = I->getOperandUse(i: OpNo); |
| 3614 | Value *V = U.get(); |
| 3615 | Type *VTy = V->getType(); |
| 3616 | |
| 3617 | if (DemandedMask == fcNone) { |
| 3618 | if (isa<PoisonValue>(Val: V)) |
| 3619 | return false; |
| 3620 | replaceUse(U, NewValue: PoisonValue::get(T: VTy)); |
| 3621 | return true; |
| 3622 | } |
| 3623 | |
| 3624 | // Handle constant |
| 3625 | Instruction *VInst = dyn_cast<Instruction>(Val: V); |
| 3626 | if (!VInst) { |
| 3627 | // Handle constants and arguments |
| 3628 | Known = computeKnownFPClass(Val: V, Interested: fcAllFlags, CtxI: I, Depth); |
| 3629 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3630 | |
| 3631 | if (Known.KnownFPClasses == fcNone) { |
| 3632 | if (isa<PoisonValue>(Val: V)) |
| 3633 | return false; |
| 3634 | replaceUse(U, NewValue: PoisonValue::get(T: VTy)); |
| 3635 | return true; |
| 3636 | } |
| 3637 | |
| 3638 | // Do not try to replace values which are already constants (unless we are |
| 3639 | // folding to poison). Doing so could promote poison elements to non-poison |
| 3640 | // constants. |
| 3641 | if (isa<Constant>(Val: V)) |
| 3642 | return false; |
| 3643 | |
| 3644 | Value *FoldedToConst = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses); |
| 3645 | if (!FoldedToConst || FoldedToConst == V) |
| 3646 | return false; |
| 3647 | |
| 3648 | replaceUse(U, NewValue: FoldedToConst); |
| 3649 | return true; |
| 3650 | } |
| 3651 | |
| 3652 | if (const CallBase *CB = dyn_cast<CallBase>(Val: VInst)) { |
| 3653 | FPClassTest NoFPClass = CB->getParamNoFPClass(i: U.getOperandNo()); |
| 3654 | DemandedMask &= ~NoFPClass; |
| 3655 | } |
| 3656 | |
| 3657 | if (Depth == MaxAnalysisRecursionDepth) { |
| 3658 | Known.knownNot(RuleOut: ~DemandedMask); |
| 3659 | return false; |
| 3660 | } |
| 3661 | |
| 3662 | Value *NewVal; |
| 3663 | |
| 3664 | if (VInst->hasOneUse()) { |
| 3665 | // If the instruction has one use, we can directly simplify it. |
| 3666 | NewVal = SimplifyDemandedUseFPClass(I: VInst, DemandedMask, Known, CxtI: I, Depth); |
| 3667 | } else { |
| 3668 | // If there are multiple uses of this instruction, then we can simplify |
| 3669 | // VInst to some other value, but not modify the instruction. |
| 3670 | NewVal = SimplifyMultipleUseDemandedFPClass(I: VInst, DemandedMask, Known, CxtI: I, |
| 3671 | Depth); |
| 3672 | } |
| 3673 | |
| 3674 | if (!NewVal) |
| 3675 | return false; |
| 3676 | if (Instruction *OpInst = dyn_cast<Instruction>(Val&: U)) |
| 3677 | salvageDebugInfo(I&: *OpInst); |
| 3678 | |
| 3679 | replaceUse(U, NewValue: NewVal); |
| 3680 | return true; |
| 3681 | } |
| 3682 | |