| 1 | //===- InstCombineSimplifyDemanded.cpp ------------------------------------===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | // | 
|---|
| 9 | // This file contains logic for simplifying instructions based on information | 
|---|
| 10 | // about how they are used. | 
|---|
| 11 | // | 
|---|
| 12 | //===----------------------------------------------------------------------===// | 
|---|
| 13 |  | 
|---|
| 14 | #include "InstCombineInternal.h" | 
|---|
| 15 | #include "llvm/Analysis/ValueTracking.h" | 
|---|
| 16 | #include "llvm/IR/GetElementPtrTypeIterator.h" | 
|---|
| 17 | #include "llvm/IR/IntrinsicInst.h" | 
|---|
| 18 | #include "llvm/IR/PatternMatch.h" | 
|---|
| 19 | #include "llvm/Support/KnownBits.h" | 
|---|
| 20 | #include "llvm/Transforms/InstCombine/InstCombiner.h" | 
|---|
| 21 |  | 
|---|
| 22 | using namespace llvm; | 
|---|
| 23 | using namespace llvm::PatternMatch; | 
|---|
| 24 |  | 
|---|
| 25 | #define DEBUG_TYPE "instcombine" | 
|---|
| 26 |  | 
|---|
| 27 | static cl::opt<bool> | 
|---|
| 28 | VerifyKnownBits( "instcombine-verify-known-bits", | 
|---|
| 29 | cl::desc( "Verify that computeKnownBits() and " | 
|---|
| 30 | "SimplifyDemandedBits() are consistent"), | 
|---|
| 31 | cl::Hidden, cl::init(Val: false)); | 
|---|
| 32 |  | 
|---|
| 33 | static cl::opt<unsigned> SimplifyDemandedVectorEltsDepthLimit( | 
|---|
| 34 | "instcombine-simplify-vector-elts-depth", | 
|---|
| 35 | cl::desc( | 
|---|
| 36 | "Depth limit when simplifying vector instructions and their operands"), | 
|---|
| 37 | cl::Hidden, cl::init(Val: 10)); | 
|---|
| 38 |  | 
|---|
| 39 | /// Check to see if the specified operand of the specified instruction is a | 
|---|
| 40 | /// constant integer. If so, check to see if there are any bits set in the | 
|---|
| 41 | /// constant that are not demanded. If so, shrink the constant and return true. | 
|---|
| 42 | static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, | 
|---|
| 43 | const APInt &Demanded) { | 
|---|
| 44 | assert(I && "No instruction?"); | 
|---|
| 45 | assert(OpNo < I->getNumOperands() && "Operand index too large"); | 
|---|
| 46 |  | 
|---|
| 47 | // The operand must be a constant integer or splat integer. | 
|---|
| 48 | Value *Op = I->getOperand(i: OpNo); | 
|---|
| 49 | const APInt *C; | 
|---|
| 50 | if (!match(V: Op, P: m_APInt(Res&: C))) | 
|---|
| 51 | return false; | 
|---|
| 52 |  | 
|---|
| 53 | // If there are no bits set that aren't demanded, nothing to do. | 
|---|
| 54 | if (C->isSubsetOf(RHS: Demanded)) | 
|---|
| 55 | return false; | 
|---|
| 56 |  | 
|---|
| 57 | // This instruction is producing bits that are not demanded. Shrink the RHS. | 
|---|
| 58 | I->setOperand(i: OpNo, Val: ConstantInt::get(Ty: Op->getType(), V: *C & Demanded)); | 
|---|
| 59 |  | 
|---|
| 60 | return true; | 
|---|
| 61 | } | 
|---|
| 62 |  | 
|---|
| 63 | /// Returns the bitwidth of the given scalar or pointer type. For vector types, | 
|---|
| 64 | /// returns the element type's bitwidth. | 
|---|
| 65 | static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { | 
|---|
| 66 | if (unsigned BitWidth = Ty->getScalarSizeInBits()) | 
|---|
| 67 | return BitWidth; | 
|---|
| 68 |  | 
|---|
| 69 | return DL.getPointerTypeSizeInBits(Ty); | 
|---|
| 70 | } | 
|---|
| 71 |  | 
|---|
| 72 | /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if | 
|---|
| 73 | /// the instruction has any properties that allow us to simplify its operands. | 
|---|
| 74 | bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst, | 
|---|
| 75 | KnownBits &Known) { | 
|---|
| 76 | APInt DemandedMask(APInt::getAllOnes(numBits: Known.getBitWidth())); | 
|---|
| 77 | Value *V = SimplifyDemandedUseBits(I: &Inst, DemandedMask, Known, | 
|---|
| 78 | Q: SQ.getWithInstruction(I: &Inst)); | 
|---|
| 79 | if (!V) return false; | 
|---|
| 80 | if (V == &Inst) return true; | 
|---|
| 81 | replaceInstUsesWith(I&: Inst, V); | 
|---|
| 82 | return true; | 
|---|
| 83 | } | 
|---|
| 84 |  | 
|---|
| 85 | /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if | 
|---|
| 86 | /// the instruction has any properties that allow us to simplify its operands. | 
|---|
| 87 | bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) { | 
|---|
| 88 | KnownBits Known(getBitWidth(Ty: Inst.getType(), DL)); | 
|---|
| 89 | return SimplifyDemandedInstructionBits(Inst, Known); | 
|---|
| 90 | } | 
|---|
| 91 |  | 
|---|
| 92 | /// This form of SimplifyDemandedBits simplifies the specified instruction | 
|---|
| 93 | /// operand if possible, updating it in place. It returns true if it made any | 
|---|
| 94 | /// change and false otherwise. | 
|---|
| 95 | bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo, | 
|---|
| 96 | const APInt &DemandedMask, | 
|---|
| 97 | KnownBits &Known, | 
|---|
| 98 | const SimplifyQuery &Q, | 
|---|
| 99 | unsigned Depth) { | 
|---|
| 100 | Use &U = I->getOperandUse(i: OpNo); | 
|---|
| 101 | Value *V = U.get(); | 
|---|
| 102 | if (isa<Constant>(Val: V)) { | 
|---|
| 103 | llvm::computeKnownBits(V, Known, Q, Depth); | 
|---|
| 104 | return false; | 
|---|
| 105 | } | 
|---|
| 106 |  | 
|---|
| 107 | Known.resetAll(); | 
|---|
| 108 | if (DemandedMask.isZero()) { | 
|---|
| 109 | // Not demanding any bits from V. | 
|---|
| 110 | replaceUse(U, NewValue: UndefValue::get(T: V->getType())); | 
|---|
| 111 | return true; | 
|---|
| 112 | } | 
|---|
| 113 |  | 
|---|
| 114 | Instruction *VInst = dyn_cast<Instruction>(Val: V); | 
|---|
| 115 | if (!VInst) { | 
|---|
| 116 | llvm::computeKnownBits(V, Known, Q, Depth); | 
|---|
| 117 | return false; | 
|---|
| 118 | } | 
|---|
| 119 |  | 
|---|
| 120 | if (Depth == MaxAnalysisRecursionDepth) | 
|---|
| 121 | return false; | 
|---|
| 122 |  | 
|---|
| 123 | Value *NewVal; | 
|---|
| 124 | if (VInst->hasOneUse()) { | 
|---|
| 125 | // If the instruction has one use, we can directly simplify it. | 
|---|
| 126 | NewVal = SimplifyDemandedUseBits(I: VInst, DemandedMask, Known, Q, Depth); | 
|---|
| 127 | } else { | 
|---|
| 128 | // If there are multiple uses of this instruction, then we can simplify | 
|---|
| 129 | // VInst to some other value, but not modify the instruction. | 
|---|
| 130 | NewVal = | 
|---|
| 131 | SimplifyMultipleUseDemandedBits(I: VInst, DemandedMask, Known, Q, Depth); | 
|---|
| 132 | } | 
|---|
| 133 | if (!NewVal) return false; | 
|---|
| 134 | if (Instruction* OpInst = dyn_cast<Instruction>(Val&: U)) | 
|---|
| 135 | salvageDebugInfo(I&: *OpInst); | 
|---|
| 136 |  | 
|---|
| 137 | replaceUse(U, NewValue: NewVal); | 
|---|
| 138 | return true; | 
|---|
| 139 | } | 
|---|
| 140 |  | 
|---|
| 141 | /// This function attempts to replace V with a simpler value based on the | 
|---|
| 142 | /// demanded bits. When this function is called, it is known that only the bits | 
|---|
| 143 | /// set in DemandedMask of the result of V are ever used downstream. | 
|---|
| 144 | /// Consequently, depending on the mask and V, it may be possible to replace V | 
|---|
| 145 | /// with a constant or one of its operands. In such cases, this function does | 
|---|
| 146 | /// the replacement and returns true. In all other cases, it returns false after | 
|---|
| 147 | /// analyzing the expression and setting KnownOne and known to be one in the | 
|---|
| 148 | /// expression. Known.Zero contains all the bits that are known to be zero in | 
|---|
| 149 | /// the expression. These are provided to potentially allow the caller (which | 
|---|
| 150 | /// might recursively be SimplifyDemandedBits itself) to simplify the | 
|---|
| 151 | /// expression. | 
|---|
| 152 | /// Known.One and Known.Zero always follow the invariant that: | 
|---|
| 153 | ///   Known.One & Known.Zero == 0. | 
|---|
| 154 | /// That is, a bit can't be both 1 and 0. The bits in Known.One and Known.Zero | 
|---|
| 155 | /// are accurate even for bits not in DemandedMask. Note | 
|---|
| 156 | /// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all | 
|---|
| 157 | /// be the same. | 
|---|
| 158 | /// | 
|---|
| 159 | /// This returns null if it did not change anything and it permits no | 
|---|
| 160 | /// simplification.  This returns V itself if it did some simplification of V's | 
|---|
| 161 | /// operands based on the information about what bits are demanded. This returns | 
|---|
| 162 | /// some other non-null value if it found out that V is equal to another value | 
|---|
| 163 | /// in the context where the specified bits are demanded, but not for all users. | 
|---|
| 164 | Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I, | 
|---|
| 165 | const APInt &DemandedMask, | 
|---|
| 166 | KnownBits &Known, | 
|---|
| 167 | const SimplifyQuery &Q, | 
|---|
| 168 | unsigned Depth) { | 
|---|
| 169 | assert(I != nullptr && "Null pointer of Value???"); | 
|---|
| 170 | assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); | 
|---|
| 171 | uint32_t BitWidth = DemandedMask.getBitWidth(); | 
|---|
| 172 | Type *VTy = I->getType(); | 
|---|
| 173 | assert( | 
|---|
| 174 | (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && | 
|---|
| 175 | Known.getBitWidth() == BitWidth && | 
|---|
| 176 | "Value *V, DemandedMask and Known must have same BitWidth"); | 
|---|
| 177 |  | 
|---|
| 178 | KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth); | 
|---|
| 179 |  | 
|---|
| 180 | // Update flags after simplifying an operand based on the fact that some high | 
|---|
| 181 | // order bits are not demanded. | 
|---|
| 182 | auto disableWrapFlagsBasedOnUnusedHighBits = [](Instruction *I, | 
|---|
| 183 | unsigned NLZ) { | 
|---|
| 184 | if (NLZ > 0) { | 
|---|
| 185 | // Disable the nsw and nuw flags here: We can no longer guarantee that | 
|---|
| 186 | // we won't wrap after simplification. Removing the nsw/nuw flags is | 
|---|
| 187 | // legal here because the top bit is not demanded. | 
|---|
| 188 | I->setHasNoSignedWrap(false); | 
|---|
| 189 | I->setHasNoUnsignedWrap(false); | 
|---|
| 190 | } | 
|---|
| 191 | return I; | 
|---|
| 192 | }; | 
|---|
| 193 |  | 
|---|
| 194 | // If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care | 
|---|
| 195 | // about the high bits of the operands. | 
|---|
| 196 | auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) { | 
|---|
| 197 | unsigned NLZ = DemandedMask.countl_zero(); | 
|---|
| 198 | // Right fill the mask of bits for the operands to demand the most | 
|---|
| 199 | // significant bit and all those below it. | 
|---|
| 200 | DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); | 
|---|
| 201 | if (ShrinkDemandedConstant(I, OpNo: 0, Demanded: DemandedFromOps) || | 
|---|
| 202 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromOps, Known&: LHSKnown, Q, Depth: Depth + 1) || | 
|---|
| 203 | ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromOps) || | 
|---|
| 204 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedFromOps, Known&: RHSKnown, Q, Depth: Depth + 1)) { | 
|---|
| 205 | disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); | 
|---|
| 206 | return true; | 
|---|
| 207 | } | 
|---|
| 208 | return false; | 
|---|
| 209 | }; | 
|---|
| 210 |  | 
|---|
| 211 | switch (I->getOpcode()) { | 
|---|
| 212 | default: | 
|---|
| 213 | llvm::computeKnownBits(V: I, Known, Q, Depth); | 
|---|
| 214 | break; | 
|---|
| 215 | case Instruction::And: { | 
|---|
| 216 | // If either the LHS or the RHS are Zero, the result is zero. | 
|---|
| 217 | if (SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: RHSKnown, Q, Depth: Depth + 1) || | 
|---|
| 218 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMask & ~RHSKnown.Zero, Known&: LHSKnown, Q, | 
|---|
| 219 | Depth: Depth + 1)) | 
|---|
| 220 | return I; | 
|---|
| 221 |  | 
|---|
| 222 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, | 
|---|
| 223 | SQ: Q, Depth); | 
|---|
| 224 |  | 
|---|
| 225 | // If the client is only demanding bits that we know, return the known | 
|---|
| 226 | // constant. | 
|---|
| 227 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) | 
|---|
| 228 | return Constant::getIntegerValue(Ty: VTy, V: Known.One); | 
|---|
| 229 |  | 
|---|
| 230 | // If all of the demanded bits are known 1 on one side, return the other. | 
|---|
| 231 | // These bits cannot contribute to the result of the 'and'. | 
|---|
| 232 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One)) | 
|---|
| 233 | return I->getOperand(i: 0); | 
|---|
| 234 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One)) | 
|---|
| 235 | return I->getOperand(i: 1); | 
|---|
| 236 |  | 
|---|
| 237 | // If the RHS is a constant, see if we can simplify it. | 
|---|
| 238 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedMask & ~LHSKnown.Zero)) | 
|---|
| 239 | return I; | 
|---|
| 240 |  | 
|---|
| 241 | break; | 
|---|
| 242 | } | 
|---|
| 243 | case Instruction::Or: { | 
|---|
| 244 | // If either the LHS or the RHS are One, the result is One. | 
|---|
| 245 | if (SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: RHSKnown, Q, Depth: Depth + 1) || | 
|---|
| 246 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMask & ~RHSKnown.One, Known&: LHSKnown, Q, | 
|---|
| 247 | Depth: Depth + 1)) { | 
|---|
| 248 | // Disjoint flag may not longer hold. | 
|---|
| 249 | I->dropPoisonGeneratingFlags(); | 
|---|
| 250 | return I; | 
|---|
| 251 | } | 
|---|
| 252 |  | 
|---|
| 253 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, | 
|---|
| 254 | SQ: Q, Depth); | 
|---|
| 255 |  | 
|---|
| 256 | // If the client is only demanding bits that we know, return the known | 
|---|
| 257 | // constant. | 
|---|
| 258 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) | 
|---|
| 259 | return Constant::getIntegerValue(Ty: VTy, V: Known.One); | 
|---|
| 260 |  | 
|---|
| 261 | // If all of the demanded bits are known zero on one side, return the other. | 
|---|
| 262 | // These bits cannot contribute to the result of the 'or'. | 
|---|
| 263 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero)) | 
|---|
| 264 | return I->getOperand(i: 0); | 
|---|
| 265 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero)) | 
|---|
| 266 | return I->getOperand(i: 1); | 
|---|
| 267 |  | 
|---|
| 268 | // If the RHS is a constant, see if we can simplify it. | 
|---|
| 269 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedMask)) | 
|---|
| 270 | return I; | 
|---|
| 271 |  | 
|---|
| 272 | // Infer disjoint flag if no common bits are set. | 
|---|
| 273 | if (!cast<PossiblyDisjointInst>(Val: I)->isDisjoint()) { | 
|---|
| 274 | WithCache<const Value *> LHSCache(I->getOperand(i: 0), LHSKnown), | 
|---|
| 275 | RHSCache(I->getOperand(i: 1), RHSKnown); | 
|---|
| 276 | if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ: Q)) { | 
|---|
| 277 | cast<PossiblyDisjointInst>(Val: I)->setIsDisjoint(true); | 
|---|
| 278 | return I; | 
|---|
| 279 | } | 
|---|
| 280 | } | 
|---|
| 281 |  | 
|---|
| 282 | break; | 
|---|
| 283 | } | 
|---|
| 284 | case Instruction::Xor: { | 
|---|
| 285 | if (SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: RHSKnown, Q, Depth: Depth + 1) || | 
|---|
| 286 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask, Known&: LHSKnown, Q, Depth: Depth + 1)) | 
|---|
| 287 | return I; | 
|---|
| 288 | Value *LHS, *RHS; | 
|---|
| 289 | if (DemandedMask == 1 && | 
|---|
| 290 | match(V: I->getOperand(i: 0), P: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: LHS))) && | 
|---|
| 291 | match(V: I->getOperand(i: 1), P: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: RHS)))) { | 
|---|
| 292 | // (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1 | 
|---|
| 293 | IRBuilderBase::InsertPointGuard Guard(Builder); | 
|---|
| 294 | Builder.SetInsertPoint(I); | 
|---|
| 295 | auto *Xor = Builder.CreateXor(LHS, RHS); | 
|---|
| 296 | return Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V: Xor); | 
|---|
| 297 | } | 
|---|
| 298 |  | 
|---|
| 299 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, | 
|---|
| 300 | SQ: Q, Depth); | 
|---|
| 301 |  | 
|---|
| 302 | // If the client is only demanding bits that we know, return the known | 
|---|
| 303 | // constant. | 
|---|
| 304 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) | 
|---|
| 305 | return Constant::getIntegerValue(Ty: VTy, V: Known.One); | 
|---|
| 306 |  | 
|---|
| 307 | // If all of the demanded bits are known zero on one side, return the other. | 
|---|
| 308 | // These bits cannot contribute to the result of the 'xor'. | 
|---|
| 309 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero)) | 
|---|
| 310 | return I->getOperand(i: 0); | 
|---|
| 311 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero)) | 
|---|
| 312 | return I->getOperand(i: 1); | 
|---|
| 313 |  | 
|---|
| 314 | // If all of the demanded bits are known to be zero on one side or the | 
|---|
| 315 | // other, turn this into an *inclusive* or. | 
|---|
| 316 | //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 | 
|---|
| 317 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.Zero)) { | 
|---|
| 318 | Instruction *Or = | 
|---|
| 319 | BinaryOperator::CreateOr(V1: I->getOperand(i: 0), V2: I->getOperand(i: 1)); | 
|---|
| 320 | if (DemandedMask.isAllOnes()) | 
|---|
| 321 | cast<PossiblyDisjointInst>(Val: Or)->setIsDisjoint(true); | 
|---|
| 322 | Or->takeName(V: I); | 
|---|
| 323 | return InsertNewInstWith(New: Or, Old: I->getIterator()); | 
|---|
| 324 | } | 
|---|
| 325 |  | 
|---|
| 326 | // If all of the demanded bits on one side are known, and all of the set | 
|---|
| 327 | // bits on that side are also known to be set on the other side, turn this | 
|---|
| 328 | // into an AND, as we know the bits will be cleared. | 
|---|
| 329 | //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 | 
|---|
| 330 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero|RHSKnown.One) && | 
|---|
| 331 | RHSKnown.One.isSubsetOf(RHS: LHSKnown.One)) { | 
|---|
| 332 | Constant *AndC = Constant::getIntegerValue(Ty: VTy, | 
|---|
| 333 | V: ~RHSKnown.One & DemandedMask); | 
|---|
| 334 | Instruction *And = BinaryOperator::CreateAnd(V1: I->getOperand(i: 0), V2: AndC); | 
|---|
| 335 | return InsertNewInstWith(New: And, Old: I->getIterator()); | 
|---|
| 336 | } | 
|---|
| 337 |  | 
|---|
| 338 | // If the RHS is a constant, see if we can change it. Don't alter a -1 | 
|---|
| 339 | // constant because that's a canonical 'not' op, and that is better for | 
|---|
| 340 | // combining, SCEV, and codegen. | 
|---|
| 341 | const APInt *C; | 
|---|
| 342 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: C)) && !C->isAllOnes()) { | 
|---|
| 343 | if ((*C | ~DemandedMask).isAllOnes()) { | 
|---|
| 344 | // Force bits to 1 to create a 'not' op. | 
|---|
| 345 | I->setOperand(i: 1, Val: ConstantInt::getAllOnesValue(Ty: VTy)); | 
|---|
| 346 | return I; | 
|---|
| 347 | } | 
|---|
| 348 | // If we can't turn this into a 'not', try to shrink the constant. | 
|---|
| 349 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedMask)) | 
|---|
| 350 | return I; | 
|---|
| 351 | } | 
|---|
| 352 |  | 
|---|
| 353 | // If our LHS is an 'and' and if it has one use, and if any of the bits we | 
|---|
| 354 | // are flipping are known to be set, then the xor is just resetting those | 
|---|
| 355 | // bits to zero.  We can just knock out bits from the 'and' and the 'xor', | 
|---|
| 356 | // simplifying both of them. | 
|---|
| 357 | if (Instruction *LHSInst = dyn_cast<Instruction>(Val: I->getOperand(i: 0))) { | 
|---|
| 358 | ConstantInt *AndRHS, *XorRHS; | 
|---|
| 359 | if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && | 
|---|
| 360 | match(V: I->getOperand(i: 1), P: m_ConstantInt(CI&: XorRHS)) && | 
|---|
| 361 | match(V: LHSInst->getOperand(i: 1), P: m_ConstantInt(CI&: AndRHS)) && | 
|---|
| 362 | (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) { | 
|---|
| 363 | APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask); | 
|---|
| 364 |  | 
|---|
| 365 | Constant *AndC = ConstantInt::get(Ty: VTy, V: NewMask & AndRHS->getValue()); | 
|---|
| 366 | Instruction *NewAnd = BinaryOperator::CreateAnd(V1: I->getOperand(i: 0), V2: AndC); | 
|---|
| 367 | InsertNewInstWith(New: NewAnd, Old: I->getIterator()); | 
|---|
| 368 |  | 
|---|
| 369 | Constant *XorC = ConstantInt::get(Ty: VTy, V: NewMask & XorRHS->getValue()); | 
|---|
| 370 | Instruction *NewXor = BinaryOperator::CreateXor(V1: NewAnd, V2: XorC); | 
|---|
| 371 | return InsertNewInstWith(New: NewXor, Old: I->getIterator()); | 
|---|
| 372 | } | 
|---|
| 373 | } | 
|---|
| 374 | break; | 
|---|
| 375 | } | 
|---|
| 376 | case Instruction::Select: { | 
|---|
| 377 | if (SimplifyDemandedBits(I, OpNo: 2, DemandedMask, Known&: RHSKnown, Q, Depth: Depth + 1) || | 
|---|
| 378 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: LHSKnown, Q, Depth: Depth + 1)) | 
|---|
| 379 | return I; | 
|---|
| 380 |  | 
|---|
| 381 | // If the operands are constants, see if we can simplify them. | 
|---|
| 382 | // This is similar to ShrinkDemandedConstant, but for a select we want to | 
|---|
| 383 | // try to keep the selected constants the same as icmp value constants, if | 
|---|
| 384 | // we can. This helps not break apart (or helps put back together) | 
|---|
| 385 | // canonical patterns like min and max. | 
|---|
| 386 | auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo, | 
|---|
| 387 | const APInt &DemandedMask) { | 
|---|
| 388 | const APInt *SelC; | 
|---|
| 389 | if (!match(V: I->getOperand(i: OpNo), P: m_APInt(Res&: SelC))) | 
|---|
| 390 | return false; | 
|---|
| 391 |  | 
|---|
| 392 | // Get the constant out of the ICmp, if there is one. | 
|---|
| 393 | // Only try this when exactly 1 operand is a constant (if both operands | 
|---|
| 394 | // are constant, the icmp should eventually simplify). Otherwise, we may | 
|---|
| 395 | // invert the transform that reduces set bits and infinite-loop. | 
|---|
| 396 | Value *X; | 
|---|
| 397 | const APInt *CmpC; | 
|---|
| 398 | if (!match(V: I->getOperand(i: 0), P: m_ICmp(L: m_Value(V&: X), R: m_APInt(Res&: CmpC))) || | 
|---|
| 399 | isa<Constant>(Val: X) || CmpC->getBitWidth() != SelC->getBitWidth()) | 
|---|
| 400 | return ShrinkDemandedConstant(I, OpNo, Demanded: DemandedMask); | 
|---|
| 401 |  | 
|---|
| 402 | // If the constant is already the same as the ICmp, leave it as-is. | 
|---|
| 403 | if (*CmpC == *SelC) | 
|---|
| 404 | return false; | 
|---|
| 405 | // If the constants are not already the same, but can be with the demand | 
|---|
| 406 | // mask, use the constant value from the ICmp. | 
|---|
| 407 | if ((*CmpC & DemandedMask) == (*SelC & DemandedMask)) { | 
|---|
| 408 | I->setOperand(i: OpNo, Val: ConstantInt::get(Ty: I->getType(), V: *CmpC)); | 
|---|
| 409 | return true; | 
|---|
| 410 | } | 
|---|
| 411 | return ShrinkDemandedConstant(I, OpNo, Demanded: DemandedMask); | 
|---|
| 412 | }; | 
|---|
| 413 | if (CanonicalizeSelectConstant(I, 1, DemandedMask) || | 
|---|
| 414 | CanonicalizeSelectConstant(I, 2, DemandedMask)) | 
|---|
| 415 | return I; | 
|---|
| 416 |  | 
|---|
| 417 | // Only known if known in both the LHS and RHS. | 
|---|
| 418 | adjustKnownBitsForSelectArm(Known&: LHSKnown, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 1), | 
|---|
| 419 | /*Invert=*/false, Q, Depth); | 
|---|
| 420 | adjustKnownBitsForSelectArm(Known&: RHSKnown, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 2), | 
|---|
| 421 | /*Invert=*/true, Q, Depth); | 
|---|
| 422 | Known = LHSKnown.intersectWith(RHS: RHSKnown); | 
|---|
| 423 | break; | 
|---|
| 424 | } | 
|---|
| 425 | case Instruction::Trunc: { | 
|---|
| 426 | // If we do not demand the high bits of a right-shifted and truncated value, | 
|---|
| 427 | // then we may be able to truncate it before the shift. | 
|---|
| 428 | Value *X; | 
|---|
| 429 | const APInt *C; | 
|---|
| 430 | if (match(V: I->getOperand(i: 0), P: m_OneUse(SubPattern: m_LShr(L: m_Value(V&: X), R: m_APInt(Res&: C))))) { | 
|---|
| 431 | // The shift amount must be valid (not poison) in the narrow type, and | 
|---|
| 432 | // it must not be greater than the high bits demanded of the result. | 
|---|
| 433 | if (C->ult(RHS: VTy->getScalarSizeInBits()) && | 
|---|
| 434 | C->ule(RHS: DemandedMask.countl_zero())) { | 
|---|
| 435 | // trunc (lshr X, C) --> lshr (trunc X), C | 
|---|
| 436 | IRBuilderBase::InsertPointGuard Guard(Builder); | 
|---|
| 437 | Builder.SetInsertPoint(I); | 
|---|
| 438 | Value *Trunc = Builder.CreateTrunc(V: X, DestTy: VTy); | 
|---|
| 439 | return Builder.CreateLShr(LHS: Trunc, RHS: C->getZExtValue()); | 
|---|
| 440 | } | 
|---|
| 441 | } | 
|---|
| 442 | } | 
|---|
| 443 | [[fallthrough]]; | 
|---|
| 444 | case Instruction::ZExt: { | 
|---|
| 445 | unsigned SrcBitWidth = I->getOperand(i: 0)->getType()->getScalarSizeInBits(); | 
|---|
| 446 |  | 
|---|
| 447 | APInt InputDemandedMask = DemandedMask.zextOrTrunc(width: SrcBitWidth); | 
|---|
| 448 | KnownBits InputKnown(SrcBitWidth); | 
|---|
| 449 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: InputDemandedMask, Known&: InputKnown, Q, | 
|---|
| 450 | Depth: Depth + 1)) { | 
|---|
| 451 | // For zext nneg, we may have dropped the instruction which made the | 
|---|
| 452 | // input non-negative. | 
|---|
| 453 | I->dropPoisonGeneratingFlags(); | 
|---|
| 454 | return I; | 
|---|
| 455 | } | 
|---|
| 456 | assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?"); | 
|---|
| 457 | if (I->getOpcode() == Instruction::ZExt && I->hasNonNeg() && | 
|---|
| 458 | !InputKnown.isNegative()) | 
|---|
| 459 | InputKnown.makeNonNegative(); | 
|---|
| 460 | Known = InputKnown.zextOrTrunc(BitWidth); | 
|---|
| 461 |  | 
|---|
| 462 | break; | 
|---|
| 463 | } | 
|---|
| 464 | case Instruction::SExt: { | 
|---|
| 465 | // Compute the bits in the result that are not present in the input. | 
|---|
| 466 | unsigned SrcBitWidth = I->getOperand(i: 0)->getType()->getScalarSizeInBits(); | 
|---|
| 467 |  | 
|---|
| 468 | APInt InputDemandedBits = DemandedMask.trunc(width: SrcBitWidth); | 
|---|
| 469 |  | 
|---|
| 470 | // If any of the sign extended bits are demanded, we know that the sign | 
|---|
| 471 | // bit is demanded. | 
|---|
| 472 | if (DemandedMask.getActiveBits() > SrcBitWidth) | 
|---|
| 473 | InputDemandedBits.setBit(SrcBitWidth-1); | 
|---|
| 474 |  | 
|---|
| 475 | KnownBits InputKnown(SrcBitWidth); | 
|---|
| 476 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: InputDemandedBits, Known&: InputKnown, Q, Depth: Depth + 1)) | 
|---|
| 477 | return I; | 
|---|
| 478 |  | 
|---|
| 479 | // If the input sign bit is known zero, or if the NewBits are not demanded | 
|---|
| 480 | // convert this into a zero extension. | 
|---|
| 481 | if (InputKnown.isNonNegative() || | 
|---|
| 482 | DemandedMask.getActiveBits() <= SrcBitWidth) { | 
|---|
| 483 | // Convert to ZExt cast. | 
|---|
| 484 | CastInst *NewCast = new ZExtInst(I->getOperand(i: 0), VTy); | 
|---|
| 485 | NewCast->takeName(V: I); | 
|---|
| 486 | return InsertNewInstWith(New: NewCast, Old: I->getIterator()); | 
|---|
| 487 | } | 
|---|
| 488 |  | 
|---|
| 489 | // If the sign bit of the input is known set or clear, then we know the | 
|---|
| 490 | // top bits of the result. | 
|---|
| 491 | Known = InputKnown.sext(BitWidth); | 
|---|
| 492 | break; | 
|---|
| 493 | } | 
|---|
| 494 | case Instruction::Add: { | 
|---|
| 495 | if ((DemandedMask & 1) == 0) { | 
|---|
| 496 | // If we do not need the low bit, try to convert bool math to logic: | 
|---|
| 497 | // add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN | 
|---|
| 498 | Value *X, *Y; | 
|---|
| 499 | if (match(V: I, P: m_c_Add(L: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))), | 
|---|
| 500 | R: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: Y))))) && | 
|---|
| 501 | X->getType()->isIntOrIntVectorTy(BitWidth: 1) && X->getType() == Y->getType()) { | 
|---|
| 502 | // Truth table for inputs and output signbits: | 
|---|
| 503 | //       X:0 | X:1 | 
|---|
| 504 | //      ---------- | 
|---|
| 505 | // Y:0  |  0 | 0 | | 
|---|
| 506 | // Y:1  | -1 | 0 | | 
|---|
| 507 | //      ---------- | 
|---|
| 508 | IRBuilderBase::InsertPointGuard Guard(Builder); | 
|---|
| 509 | Builder.SetInsertPoint(I); | 
|---|
| 510 | Value *AndNot = Builder.CreateAnd(LHS: Builder.CreateNot(V: X), RHS: Y); | 
|---|
| 511 | return Builder.CreateSExt(V: AndNot, DestTy: VTy); | 
|---|
| 512 | } | 
|---|
| 513 |  | 
|---|
| 514 | // add iN (sext i1 X), (sext i1 Y) --> sext (X | Y) to iN | 
|---|
| 515 | if (match(V: I, P: m_Add(L: m_SExt(Op: m_Value(V&: X)), R: m_SExt(Op: m_Value(V&: Y)))) && | 
|---|
| 516 | X->getType()->isIntOrIntVectorTy(BitWidth: 1) && X->getType() == Y->getType() && | 
|---|
| 517 | (I->getOperand(i: 0)->hasOneUse() || I->getOperand(i: 1)->hasOneUse())) { | 
|---|
| 518 |  | 
|---|
| 519 | // Truth table for inputs and output signbits: | 
|---|
| 520 | //       X:0 | X:1 | 
|---|
| 521 | //      ----------- | 
|---|
| 522 | // Y:0  | -1 | -1 | | 
|---|
| 523 | // Y:1  | -1 |  0 | | 
|---|
| 524 | //      ----------- | 
|---|
| 525 | IRBuilderBase::InsertPointGuard Guard(Builder); | 
|---|
| 526 | Builder.SetInsertPoint(I); | 
|---|
| 527 | Value *Or = Builder.CreateOr(LHS: X, RHS: Y); | 
|---|
| 528 | return Builder.CreateSExt(V: Or, DestTy: VTy); | 
|---|
| 529 | } | 
|---|
| 530 | } | 
|---|
| 531 |  | 
|---|
| 532 | // Right fill the mask of bits for the operands to demand the most | 
|---|
| 533 | // significant bit and all those below it. | 
|---|
| 534 | unsigned NLZ = DemandedMask.countl_zero(); | 
|---|
| 535 | APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); | 
|---|
| 536 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromOps) || | 
|---|
| 537 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedFromOps, Known&: RHSKnown, Q, Depth: Depth + 1)) | 
|---|
| 538 | return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); | 
|---|
| 539 |  | 
|---|
| 540 | // If low order bits are not demanded and known to be zero in one operand, | 
|---|
| 541 | // then we don't need to demand them from the other operand, since they | 
|---|
| 542 | // can't cause overflow into any bits that are demanded in the result. | 
|---|
| 543 | unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one(); | 
|---|
| 544 | APInt DemandedFromLHS = DemandedFromOps; | 
|---|
| 545 | DemandedFromLHS.clearLowBits(loBits: NTZ); | 
|---|
| 546 | if (ShrinkDemandedConstant(I, OpNo: 0, Demanded: DemandedFromLHS) || | 
|---|
| 547 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromLHS, Known&: LHSKnown, Q, Depth: Depth + 1)) | 
|---|
| 548 | return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); | 
|---|
| 549 |  | 
|---|
| 550 | // If we are known to be adding zeros to every bit below | 
|---|
| 551 | // the highest demanded bit, we just return the other side. | 
|---|
| 552 | if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero)) | 
|---|
| 553 | return I->getOperand(i: 0); | 
|---|
| 554 | if (DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero)) | 
|---|
| 555 | return I->getOperand(i: 1); | 
|---|
| 556 |  | 
|---|
| 557 | // (add X, C) --> (xor X, C) IFF C is equal to the top bit of the DemandMask | 
|---|
| 558 | { | 
|---|
| 559 | const APInt *C; | 
|---|
| 560 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: C)) && | 
|---|
| 561 | C->isOneBitSet(BitNo: DemandedMask.getActiveBits() - 1)) { | 
|---|
| 562 | IRBuilderBase::InsertPointGuard Guard(Builder); | 
|---|
| 563 | Builder.SetInsertPoint(I); | 
|---|
| 564 | return Builder.CreateXor(LHS: I->getOperand(i: 0), RHS: ConstantInt::get(Ty: VTy, V: *C)); | 
|---|
| 565 | } | 
|---|
| 566 | } | 
|---|
| 567 |  | 
|---|
| 568 | // Otherwise just compute the known bits of the result. | 
|---|
| 569 | bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap(); | 
|---|
| 570 | bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap(); | 
|---|
| 571 | Known = KnownBits::add(LHS: LHSKnown, RHS: RHSKnown, NSW, NUW); | 
|---|
| 572 | break; | 
|---|
| 573 | } | 
|---|
| 574 | case Instruction::Sub: { | 
|---|
| 575 | // Right fill the mask of bits for the operands to demand the most | 
|---|
| 576 | // significant bit and all those below it. | 
|---|
| 577 | unsigned NLZ = DemandedMask.countl_zero(); | 
|---|
| 578 | APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); | 
|---|
| 579 | if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromOps) || | 
|---|
| 580 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedFromOps, Known&: RHSKnown, Q, Depth: Depth + 1)) | 
|---|
| 581 | return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); | 
|---|
| 582 |  | 
|---|
| 583 | // If low order bits are not demanded and are known to be zero in RHS, | 
|---|
| 584 | // then we don't need to demand them from LHS, since they can't cause a | 
|---|
| 585 | // borrow from any bits that are demanded in the result. | 
|---|
| 586 | unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one(); | 
|---|
| 587 | APInt DemandedFromLHS = DemandedFromOps; | 
|---|
| 588 | DemandedFromLHS.clearLowBits(loBits: NTZ); | 
|---|
| 589 | if (ShrinkDemandedConstant(I, OpNo: 0, Demanded: DemandedFromLHS) || | 
|---|
| 590 | SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromLHS, Known&: LHSKnown, Q, Depth: Depth + 1)) | 
|---|
| 591 | return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ); | 
|---|
| 592 |  | 
|---|
| 593 | // If we are known to be subtracting zeros from every bit below | 
|---|
| 594 | // the highest demanded bit, we just return the other side. | 
|---|
| 595 | if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero)) | 
|---|
| 596 | return I->getOperand(i: 0); | 
|---|
| 597 | // We can't do this with the LHS for subtraction, unless we are only | 
|---|
| 598 | // demanding the LSB. | 
|---|
| 599 | if (DemandedFromOps.isOne() && DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero)) | 
|---|
| 600 | return I->getOperand(i: 1); | 
|---|
| 601 |  | 
|---|
| 602 | // Canonicalize sub mask, X -> ~X | 
|---|
| 603 | const APInt *LHSC; | 
|---|
| 604 | if (match(V: I->getOperand(i: 0), P: m_LowBitMask(V&: LHSC)) && | 
|---|
| 605 | DemandedFromOps.isSubsetOf(RHS: *LHSC)) { | 
|---|
| 606 | IRBuilderBase::InsertPointGuard Guard(Builder); | 
|---|
| 607 | Builder.SetInsertPoint(I); | 
|---|
| 608 | return Builder.CreateNot(V: I->getOperand(i: 1)); | 
|---|
| 609 | } | 
|---|
| 610 |  | 
|---|
| 611 | // Otherwise just compute the known bits of the result. | 
|---|
| 612 | bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap(); | 
|---|
| 613 | bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap(); | 
|---|
| 614 | Known = KnownBits::sub(LHS: LHSKnown, RHS: RHSKnown, NSW, NUW); | 
|---|
| 615 | break; | 
|---|
| 616 | } | 
|---|
| 617 | case Instruction::Mul: { | 
|---|
| 618 | APInt DemandedFromOps; | 
|---|
| 619 | if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps)) | 
|---|
| 620 | return I; | 
|---|
| 621 |  | 
|---|
| 622 | if (DemandedMask.isPowerOf2()) { | 
|---|
| 623 | // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1. | 
|---|
| 624 | // If we demand exactly one bit N and we have "X * (C' << N)" where C' is | 
|---|
| 625 | // odd (has LSB set), then the left-shifted low bit of X is the answer. | 
|---|
| 626 | unsigned CTZ = DemandedMask.countr_zero(); | 
|---|
| 627 | const APInt *C; | 
|---|
| 628 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: C)) && C->countr_zero() == CTZ) { | 
|---|
| 629 | Constant *ShiftC = ConstantInt::get(Ty: VTy, V: CTZ); | 
|---|
| 630 | Instruction *Shl = BinaryOperator::CreateShl(V1: I->getOperand(i: 0), V2: ShiftC); | 
|---|
| 631 | return InsertNewInstWith(New: Shl, Old: I->getIterator()); | 
|---|
| 632 | } | 
|---|
| 633 | } | 
|---|
| 634 | // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because: | 
|---|
| 635 | // X * X is odd iff X is odd. | 
|---|
| 636 | // 'Quadratic Reciprocity': X * X -> 0 for bit[1] | 
|---|
| 637 | if (I->getOperand(i: 0) == I->getOperand(i: 1) && DemandedMask.ult(RHS: 4)) { | 
|---|
| 638 | Constant *One = ConstantInt::get(Ty: VTy, V: 1); | 
|---|
| 639 | Instruction *And1 = BinaryOperator::CreateAnd(V1: I->getOperand(i: 0), V2: One); | 
|---|
| 640 | return InsertNewInstWith(New: And1, Old: I->getIterator()); | 
|---|
| 641 | } | 
|---|
| 642 |  | 
|---|
| 643 | llvm::computeKnownBits(V: I, Known, Q, Depth); | 
|---|
| 644 | break; | 
|---|
| 645 | } | 
|---|
| 646 | case Instruction::Shl: { | 
|---|
| 647 | const APInt *SA; | 
|---|
| 648 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) { | 
|---|
| 649 | const APInt *ShrAmt; | 
|---|
| 650 | if (match(V: I->getOperand(i: 0), P: m_Shr(L: m_Value(), R: m_APInt(Res&: ShrAmt)))) | 
|---|
| 651 | if (Instruction *Shr = dyn_cast<Instruction>(Val: I->getOperand(i: 0))) | 
|---|
| 652 | if (Value *R = simplifyShrShlDemandedBits(Shr, ShrOp1: *ShrAmt, Shl: I, ShlOp1: *SA, | 
|---|
| 653 | DemandedMask, Known)) | 
|---|
| 654 | return R; | 
|---|
| 655 |  | 
|---|
| 656 | // Do not simplify if shl is part of funnel-shift pattern | 
|---|
| 657 | if (I->hasOneUse()) { | 
|---|
| 658 | auto *Inst = dyn_cast<Instruction>(Val: I->user_back()); | 
|---|
| 659 | if (Inst && Inst->getOpcode() == BinaryOperator::Or) { | 
|---|
| 660 | if (auto Opt = convertOrOfShiftsToFunnelShift(Or&: *Inst)) { | 
|---|
| 661 | auto [IID, FShiftArgs] = *Opt; | 
|---|
| 662 | if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) && | 
|---|
| 663 | FShiftArgs[0] == FShiftArgs[1]) { | 
|---|
| 664 | llvm::computeKnownBits(V: I, Known, Q, Depth); | 
|---|
| 665 | break; | 
|---|
| 666 | } | 
|---|
| 667 | } | 
|---|
| 668 | } | 
|---|
| 669 | } | 
|---|
| 670 |  | 
|---|
| 671 | // We only want bits that already match the signbit then we don't | 
|---|
| 672 | // need to shift. | 
|---|
| 673 | uint64_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth - 1); | 
|---|
| 674 | if (DemandedMask.countr_zero() >= ShiftAmt) { | 
|---|
| 675 | if (I->hasNoSignedWrap()) { | 
|---|
| 676 | unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero(); | 
|---|
| 677 | unsigned SignBits = | 
|---|
| 678 | ComputeNumSignBits(Op: I->getOperand(i: 0), CxtI: Q.CxtI, Depth: Depth + 1); | 
|---|
| 679 | if (SignBits > ShiftAmt && SignBits - ShiftAmt >= NumHiDemandedBits) | 
|---|
| 680 | return I->getOperand(i: 0); | 
|---|
| 681 | } | 
|---|
| 682 |  | 
|---|
| 683 | // If we can pre-shift a right-shifted constant to the left without | 
|---|
| 684 | // losing any high bits and we don't demand the low bits, then eliminate | 
|---|
| 685 | // the left-shift: | 
|---|
| 686 | // (C >> X) << LeftShiftAmtC --> (C << LeftShiftAmtC) >> X | 
|---|
| 687 | Value *X; | 
|---|
| 688 | Constant *C; | 
|---|
| 689 | if (match(V: I->getOperand(i: 0), P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) { | 
|---|
| 690 | Constant *LeftShiftAmtC = ConstantInt::get(Ty: VTy, V: ShiftAmt); | 
|---|
| 691 | Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: Instruction::Shl, LHS: C, | 
|---|
| 692 | RHS: LeftShiftAmtC, DL); | 
|---|
| 693 | if (ConstantFoldBinaryOpOperands(Opcode: Instruction::LShr, LHS: NewC, | 
|---|
| 694 | RHS: LeftShiftAmtC, DL) == C) { | 
|---|
| 695 | Instruction *Lshr = BinaryOperator::CreateLShr(V1: NewC, V2: X); | 
|---|
| 696 | return InsertNewInstWith(New: Lshr, Old: I->getIterator()); | 
|---|
| 697 | } | 
|---|
| 698 | } | 
|---|
| 699 | } | 
|---|
| 700 |  | 
|---|
| 701 | APInt DemandedMaskIn(DemandedMask.lshr(shiftAmt: ShiftAmt)); | 
|---|
| 702 |  | 
|---|
| 703 | // If the shift is NUW/NSW, then it does demand the high bits. | 
|---|
| 704 | ShlOperator *IOp = cast<ShlOperator>(Val: I); | 
|---|
| 705 | if (IOp->hasNoSignedWrap()) | 
|---|
| 706 | DemandedMaskIn.setHighBits(ShiftAmt+1); | 
|---|
| 707 | else if (IOp->hasNoUnsignedWrap()) | 
|---|
| 708 | DemandedMaskIn.setHighBits(ShiftAmt); | 
|---|
| 709 |  | 
|---|
| 710 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known, Q, Depth: Depth + 1)) | 
|---|
| 711 | return I; | 
|---|
| 712 |  | 
|---|
| 713 | Known = KnownBits::shl(LHS: Known, | 
|---|
| 714 | RHS: KnownBits::makeConstant(C: APInt(BitWidth, ShiftAmt)), | 
|---|
| 715 | /* NUW */ IOp->hasNoUnsignedWrap(), | 
|---|
| 716 | /* NSW */ IOp->hasNoSignedWrap()); | 
|---|
| 717 | } else { | 
|---|
| 718 | // This is a variable shift, so we can't shift the demand mask by a known | 
|---|
| 719 | // amount. But if we are not demanding high bits, then we are not | 
|---|
| 720 | // demanding those bits from the pre-shifted operand either. | 
|---|
| 721 | if (unsigned CTLZ = DemandedMask.countl_zero()) { | 
|---|
| 722 | APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ)); | 
|---|
| 723 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromOp, Known, Q, Depth: Depth + 1)) { | 
|---|
| 724 | // We can't guarantee that nsw/nuw hold after simplifying the operand. | 
|---|
| 725 | I->dropPoisonGeneratingFlags(); | 
|---|
| 726 | return I; | 
|---|
| 727 | } | 
|---|
| 728 | } | 
|---|
| 729 | llvm::computeKnownBits(V: I, Known, Q, Depth); | 
|---|
| 730 | } | 
|---|
| 731 | break; | 
|---|
| 732 | } | 
|---|
| 733 | case Instruction::LShr: { | 
|---|
| 734 | const APInt *SA; | 
|---|
| 735 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) { | 
|---|
| 736 | uint64_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth-1); | 
|---|
| 737 |  | 
|---|
| 738 | // Do not simplify if lshr is part of funnel-shift pattern | 
|---|
| 739 | if (I->hasOneUse()) { | 
|---|
| 740 | auto *Inst = dyn_cast<Instruction>(Val: I->user_back()); | 
|---|
| 741 | if (Inst && Inst->getOpcode() == BinaryOperator::Or) { | 
|---|
| 742 | if (auto Opt = convertOrOfShiftsToFunnelShift(Or&: *Inst)) { | 
|---|
| 743 | auto [IID, FShiftArgs] = *Opt; | 
|---|
| 744 | if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) && | 
|---|
| 745 | FShiftArgs[0] == FShiftArgs[1]) { | 
|---|
| 746 | llvm::computeKnownBits(V: I, Known, Q, Depth); | 
|---|
| 747 | break; | 
|---|
| 748 | } | 
|---|
| 749 | } | 
|---|
| 750 | } | 
|---|
| 751 | } | 
|---|
| 752 |  | 
|---|
| 753 | // If we are just demanding the shifted sign bit and below, then this can | 
|---|
| 754 | // be treated as an ASHR in disguise. | 
|---|
| 755 | if (DemandedMask.countl_zero() >= ShiftAmt) { | 
|---|
| 756 | // If we only want bits that already match the signbit then we don't | 
|---|
| 757 | // need to shift. | 
|---|
| 758 | unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero(); | 
|---|
| 759 | unsigned SignBits = | 
|---|
| 760 | ComputeNumSignBits(Op: I->getOperand(i: 0), CxtI: Q.CxtI, Depth: Depth + 1); | 
|---|
| 761 | if (SignBits >= NumHiDemandedBits) | 
|---|
| 762 | return I->getOperand(i: 0); | 
|---|
| 763 |  | 
|---|
| 764 | // If we can pre-shift a left-shifted constant to the right without | 
|---|
| 765 | // losing any low bits (we already know we don't demand the high bits), | 
|---|
| 766 | // then eliminate the right-shift: | 
|---|
| 767 | // (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X | 
|---|
| 768 | Value *X; | 
|---|
| 769 | Constant *C; | 
|---|
| 770 | if (match(V: I->getOperand(i: 0), P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X)))) { | 
|---|
| 771 | Constant *RightShiftAmtC = ConstantInt::get(Ty: VTy, V: ShiftAmt); | 
|---|
| 772 | Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: Instruction::LShr, LHS: C, | 
|---|
| 773 | RHS: RightShiftAmtC, DL); | 
|---|
| 774 | if (ConstantFoldBinaryOpOperands(Opcode: Instruction::Shl, LHS: NewC, | 
|---|
| 775 | RHS: RightShiftAmtC, DL) == C) { | 
|---|
| 776 | Instruction *Shl = BinaryOperator::CreateShl(V1: NewC, V2: X); | 
|---|
| 777 | return InsertNewInstWith(New: Shl, Old: I->getIterator()); | 
|---|
| 778 | } | 
|---|
| 779 | } | 
|---|
| 780 |  | 
|---|
| 781 | const APInt *Factor; | 
|---|
| 782 | if (match(V: I->getOperand(i: 0), | 
|---|
| 783 | P: m_OneUse(SubPattern: m_Mul(L: m_Value(V&: X), R: m_APInt(Res&: Factor)))) && | 
|---|
| 784 | Factor->countr_zero() >= ShiftAmt) { | 
|---|
| 785 | BinaryOperator *Mul = BinaryOperator::CreateMul( | 
|---|
| 786 | V1: X, V2: ConstantInt::get(Ty: X->getType(), V: Factor->lshr(shiftAmt: ShiftAmt))); | 
|---|
| 787 | return InsertNewInstWith(New: Mul, Old: I->getIterator()); | 
|---|
| 788 | } | 
|---|
| 789 | } | 
|---|
| 790 |  | 
|---|
| 791 | // Unsigned shift right. | 
|---|
| 792 | APInt DemandedMaskIn(DemandedMask.shl(shiftAmt: ShiftAmt)); | 
|---|
| 793 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known, Q, Depth: Depth + 1)) { | 
|---|
| 794 | // exact flag may not longer hold. | 
|---|
| 795 | I->dropPoisonGeneratingFlags(); | 
|---|
| 796 | return I; | 
|---|
| 797 | } | 
|---|
| 798 | Known.Zero.lshrInPlace(ShiftAmt); | 
|---|
| 799 | Known.One.lshrInPlace(ShiftAmt); | 
|---|
| 800 | if (ShiftAmt) | 
|---|
| 801 | Known.Zero.setHighBits(ShiftAmt);  // high bits known zero. | 
|---|
| 802 | } else { | 
|---|
| 803 | llvm::computeKnownBits(V: I, Known, Q, Depth); | 
|---|
| 804 | } | 
|---|
| 805 | break; | 
|---|
| 806 | } | 
|---|
| 807 | case Instruction::AShr: { | 
|---|
| 808 | unsigned SignBits = ComputeNumSignBits(Op: I->getOperand(i: 0), CxtI: Q.CxtI, Depth: Depth + 1); | 
|---|
| 809 |  | 
|---|
| 810 | // If we only want bits that already match the signbit then we don't need | 
|---|
| 811 | // to shift. | 
|---|
| 812 | unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero(); | 
|---|
| 813 | if (SignBits >= NumHiDemandedBits) | 
|---|
| 814 | return I->getOperand(i: 0); | 
|---|
| 815 |  | 
|---|
| 816 | // If this is an arithmetic shift right and only the low-bit is set, we can | 
|---|
| 817 | // always convert this into a logical shr, even if the shift amount is | 
|---|
| 818 | // variable.  The low bit of the shift cannot be an input sign bit unless | 
|---|
| 819 | // the shift amount is >= the size of the datatype, which is undefined. | 
|---|
| 820 | if (DemandedMask.isOne()) { | 
|---|
| 821 | // Perform the logical shift right. | 
|---|
| 822 | Instruction *NewVal = BinaryOperator::CreateLShr( | 
|---|
| 823 | V1: I->getOperand(i: 0), V2: I->getOperand(i: 1), Name: I->getName()); | 
|---|
| 824 | return InsertNewInstWith(New: NewVal, Old: I->getIterator()); | 
|---|
| 825 | } | 
|---|
| 826 |  | 
|---|
| 827 | const APInt *SA; | 
|---|
| 828 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) { | 
|---|
| 829 | uint32_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth-1); | 
|---|
| 830 |  | 
|---|
| 831 | // Signed shift right. | 
|---|
| 832 | APInt DemandedMaskIn(DemandedMask.shl(shiftAmt: ShiftAmt)); | 
|---|
| 833 | // If any of the bits being shifted in are demanded, then we should set | 
|---|
| 834 | // the sign bit as demanded. | 
|---|
| 835 | bool ShiftedInBitsDemanded = DemandedMask.countl_zero() < ShiftAmt; | 
|---|
| 836 | if (ShiftedInBitsDemanded) | 
|---|
| 837 | DemandedMaskIn.setSignBit(); | 
|---|
| 838 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known, Q, Depth: Depth + 1)) { | 
|---|
| 839 | // exact flag may not longer hold. | 
|---|
| 840 | I->dropPoisonGeneratingFlags(); | 
|---|
| 841 | return I; | 
|---|
| 842 | } | 
|---|
| 843 |  | 
|---|
| 844 | // If the input sign bit is known to be zero, or if none of the shifted in | 
|---|
| 845 | // bits are demanded, turn this into an unsigned shift right. | 
|---|
| 846 | if (Known.Zero[BitWidth - 1] || !ShiftedInBitsDemanded) { | 
|---|
| 847 | BinaryOperator *LShr = BinaryOperator::CreateLShr(V1: I->getOperand(i: 0), | 
|---|
| 848 | V2: I->getOperand(i: 1)); | 
|---|
| 849 | LShr->setIsExact(cast<BinaryOperator>(Val: I)->isExact()); | 
|---|
| 850 | LShr->takeName(V: I); | 
|---|
| 851 | return InsertNewInstWith(New: LShr, Old: I->getIterator()); | 
|---|
| 852 | } | 
|---|
| 853 |  | 
|---|
| 854 | Known = KnownBits::ashr( | 
|---|
| 855 | LHS: Known, RHS: KnownBits::makeConstant(C: APInt(BitWidth, ShiftAmt)), | 
|---|
| 856 | ShAmtNonZero: ShiftAmt != 0, Exact: I->isExact()); | 
|---|
| 857 | } else { | 
|---|
| 858 | llvm::computeKnownBits(V: I, Known, Q, Depth); | 
|---|
| 859 | } | 
|---|
| 860 | break; | 
|---|
| 861 | } | 
|---|
| 862 | case Instruction::UDiv: { | 
|---|
| 863 | // UDiv doesn't demand low bits that are zero in the divisor. | 
|---|
| 864 | const APInt *SA; | 
|---|
| 865 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) { | 
|---|
| 866 | // TODO: Take the demanded mask of the result into account. | 
|---|
| 867 | unsigned RHSTrailingZeros = SA->countr_zero(); | 
|---|
| 868 | APInt DemandedMaskIn = | 
|---|
| 869 | APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - RHSTrailingZeros); | 
|---|
| 870 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known&: LHSKnown, Q, Depth: Depth + 1)) { | 
|---|
| 871 | // We can't guarantee that "exact" is still true after changing the | 
|---|
| 872 | // the dividend. | 
|---|
| 873 | I->dropPoisonGeneratingFlags(); | 
|---|
| 874 | return I; | 
|---|
| 875 | } | 
|---|
| 876 |  | 
|---|
| 877 | Known = KnownBits::udiv(LHS: LHSKnown, RHS: KnownBits::makeConstant(C: *SA), | 
|---|
| 878 | Exact: cast<BinaryOperator>(Val: I)->isExact()); | 
|---|
| 879 | } else { | 
|---|
| 880 | llvm::computeKnownBits(V: I, Known, Q, Depth); | 
|---|
| 881 | } | 
|---|
| 882 | break; | 
|---|
| 883 | } | 
|---|
| 884 | case Instruction::SRem: { | 
|---|
| 885 | const APInt *Rem; | 
|---|
| 886 | if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: Rem)) && Rem->isPowerOf2()) { | 
|---|
| 887 | if (DemandedMask.ult(RHS: *Rem)) // srem won't affect demanded bits | 
|---|
| 888 | return I->getOperand(i: 0); | 
|---|
| 889 |  | 
|---|
| 890 | APInt LowBits = *Rem - 1; | 
|---|
| 891 | APInt Mask2 = LowBits | APInt::getSignMask(BitWidth); | 
|---|
| 892 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: Mask2, Known&: LHSKnown, Q, Depth: Depth + 1)) | 
|---|
| 893 | return I; | 
|---|
| 894 | Known = KnownBits::srem(LHS: LHSKnown, RHS: KnownBits::makeConstant(C: *Rem)); | 
|---|
| 895 | break; | 
|---|
| 896 | } | 
|---|
| 897 |  | 
|---|
| 898 | llvm::computeKnownBits(V: I, Known, Q, Depth); | 
|---|
| 899 | break; | 
|---|
| 900 | } | 
|---|
| 901 | case Instruction::Call: { | 
|---|
| 902 | bool KnownBitsComputed = false; | 
|---|
| 903 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { | 
|---|
| 904 | switch (II->getIntrinsicID()) { | 
|---|
| 905 | case Intrinsic::abs: { | 
|---|
| 906 | if (DemandedMask == 1) | 
|---|
| 907 | return II->getArgOperand(i: 0); | 
|---|
| 908 | break; | 
|---|
| 909 | } | 
|---|
| 910 | case Intrinsic::ctpop: { | 
|---|
| 911 | // Checking if the number of clear bits is odd (parity)? If the type has | 
|---|
| 912 | // an even number of bits, that's the same as checking if the number of | 
|---|
| 913 | // set bits is odd, so we can eliminate the 'not' op. | 
|---|
| 914 | Value *X; | 
|---|
| 915 | if (DemandedMask == 1 && VTy->getScalarSizeInBits() % 2 == 0 && | 
|---|
| 916 | match(V: II->getArgOperand(i: 0), P: m_Not(V: m_Value(V&: X)))) { | 
|---|
| 917 | Function *Ctpop = Intrinsic::getOrInsertDeclaration( | 
|---|
| 918 | M: II->getModule(), id: Intrinsic::ctpop, Tys: VTy); | 
|---|
| 919 | return InsertNewInstWith(New: CallInst::Create(Func: Ctpop, Args: {X}), Old: I->getIterator()); | 
|---|
| 920 | } | 
|---|
| 921 | break; | 
|---|
| 922 | } | 
|---|
| 923 | case Intrinsic::bswap: { | 
|---|
| 924 | // If the only bits demanded come from one byte of the bswap result, | 
|---|
| 925 | // just shift the input byte into position to eliminate the bswap. | 
|---|
| 926 | unsigned NLZ = DemandedMask.countl_zero(); | 
|---|
| 927 | unsigned NTZ = DemandedMask.countr_zero(); | 
|---|
| 928 |  | 
|---|
| 929 | // Round NTZ down to the next byte.  If we have 11 trailing zeros, then | 
|---|
| 930 | // we need all the bits down to bit 8.  Likewise, round NLZ.  If we | 
|---|
| 931 | // have 14 leading zeros, round to 8. | 
|---|
| 932 | NLZ = alignDown(Value: NLZ, Align: 8); | 
|---|
| 933 | NTZ = alignDown(Value: NTZ, Align: 8); | 
|---|
| 934 | // If we need exactly one byte, we can do this transformation. | 
|---|
| 935 | if (BitWidth - NLZ - NTZ == 8) { | 
|---|
| 936 | // Replace this with either a left or right shift to get the byte into | 
|---|
| 937 | // the right place. | 
|---|
| 938 | Instruction *NewVal; | 
|---|
| 939 | if (NLZ > NTZ) | 
|---|
| 940 | NewVal = BinaryOperator::CreateLShr( | 
|---|
| 941 | V1: II->getArgOperand(i: 0), V2: ConstantInt::get(Ty: VTy, V: NLZ - NTZ)); | 
|---|
| 942 | else | 
|---|
| 943 | NewVal = BinaryOperator::CreateShl( | 
|---|
| 944 | V1: II->getArgOperand(i: 0), V2: ConstantInt::get(Ty: VTy, V: NTZ - NLZ)); | 
|---|
| 945 | NewVal->takeName(V: I); | 
|---|
| 946 | return InsertNewInstWith(New: NewVal, Old: I->getIterator()); | 
|---|
| 947 | } | 
|---|
| 948 | break; | 
|---|
| 949 | } | 
|---|
| 950 | case Intrinsic::ptrmask: { | 
|---|
| 951 | unsigned MaskWidth = I->getOperand(i: 1)->getType()->getScalarSizeInBits(); | 
|---|
| 952 | RHSKnown = KnownBits(MaskWidth); | 
|---|
| 953 | // If either the LHS or the RHS are Zero, the result is zero. | 
|---|
| 954 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask, Known&: LHSKnown, Q, Depth: Depth + 1) || | 
|---|
| 955 | SimplifyDemandedBits( | 
|---|
| 956 | I, OpNo: 1, DemandedMask: (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(width: MaskWidth), | 
|---|
| 957 | Known&: RHSKnown, Q, Depth: Depth + 1)) | 
|---|
| 958 | return I; | 
|---|
| 959 |  | 
|---|
| 960 | // TODO: Should be 1-extend | 
|---|
| 961 | RHSKnown = RHSKnown.anyextOrTrunc(BitWidth); | 
|---|
| 962 |  | 
|---|
| 963 | Known = LHSKnown & RHSKnown; | 
|---|
| 964 | KnownBitsComputed = true; | 
|---|
| 965 |  | 
|---|
| 966 | // If the client is only demanding bits we know to be zero, return | 
|---|
| 967 | // `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer | 
|---|
| 968 | // provenance, but making the mask zero will be easily optimizable in | 
|---|
| 969 | // the backend. | 
|---|
| 970 | if (DemandedMask.isSubsetOf(RHS: Known.Zero) && | 
|---|
| 971 | !match(V: I->getOperand(i: 1), P: m_Zero())) | 
|---|
| 972 | return replaceOperand( | 
|---|
| 973 | I&: *I, OpNum: 1, V: Constant::getNullValue(Ty: I->getOperand(i: 1)->getType())); | 
|---|
| 974 |  | 
|---|
| 975 | // Mask in demanded space does nothing. | 
|---|
| 976 | // NOTE: We may have attributes associated with the return value of the | 
|---|
| 977 | // llvm.ptrmask intrinsic that will be lost when we just return the | 
|---|
| 978 | // operand. We should try to preserve them. | 
|---|
| 979 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero)) | 
|---|
| 980 | return I->getOperand(i: 0); | 
|---|
| 981 |  | 
|---|
| 982 | // If the RHS is a constant, see if we can simplify it. | 
|---|
| 983 | if (ShrinkDemandedConstant( | 
|---|
| 984 | I, OpNo: 1, Demanded: (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(width: MaskWidth))) | 
|---|
| 985 | return I; | 
|---|
| 986 |  | 
|---|
| 987 | // Combine: | 
|---|
| 988 | // (ptrmask (getelementptr i8, ptr p, imm i), imm mask) | 
|---|
| 989 | //   -> (ptrmask (getelementptr i8, ptr p, imm (i & mask)), imm mask) | 
|---|
| 990 | // where only the low bits known to be zero in the pointer are changed | 
|---|
| 991 | Value *InnerPtr; | 
|---|
| 992 | uint64_t GEPIndex; | 
|---|
| 993 | uint64_t PtrMaskImmediate; | 
|---|
| 994 | if (match(V: I, P: m_Intrinsic<Intrinsic::ptrmask>( | 
|---|
| 995 | Op0: m_PtrAdd(PointerOp: m_Value(V&: InnerPtr), OffsetOp: m_ConstantInt(V&: GEPIndex)), | 
|---|
| 996 | Op1: m_ConstantInt(V&: PtrMaskImmediate)))) { | 
|---|
| 997 |  | 
|---|
| 998 | LHSKnown = computeKnownBits(V: InnerPtr, CxtI: I, Depth: Depth + 1); | 
|---|
| 999 | if (!LHSKnown.isZero()) { | 
|---|
| 1000 | const unsigned trailingZeros = LHSKnown.countMinTrailingZeros(); | 
|---|
| 1001 | uint64_t PointerAlignBits = (uint64_t(1) << trailingZeros) - 1; | 
|---|
| 1002 |  | 
|---|
| 1003 | uint64_t HighBitsGEPIndex = GEPIndex & ~PointerAlignBits; | 
|---|
| 1004 | uint64_t MaskedLowBitsGEPIndex = | 
|---|
| 1005 | GEPIndex & PointerAlignBits & PtrMaskImmediate; | 
|---|
| 1006 |  | 
|---|
| 1007 | uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex; | 
|---|
| 1008 |  | 
|---|
| 1009 | if (MaskedGEPIndex != GEPIndex) { | 
|---|
| 1010 | auto *GEP = cast<GEPOperator>(Val: II->getArgOperand(i: 0)); | 
|---|
| 1011 | Builder.SetInsertPoint(I); | 
|---|
| 1012 | Type *GEPIndexType = | 
|---|
| 1013 | DL.getIndexType(PtrTy: GEP->getPointerOperand()->getType()); | 
|---|
| 1014 | Value *MaskedGEP = Builder.CreateGEP( | 
|---|
| 1015 | Ty: GEP->getSourceElementType(), Ptr: InnerPtr, | 
|---|
| 1016 | IdxList: ConstantInt::get(Ty: GEPIndexType, V: MaskedGEPIndex), | 
|---|
| 1017 | Name: GEP->getName(), NW: GEP->isInBounds()); | 
|---|
| 1018 |  | 
|---|
| 1019 | replaceOperand(I&: *I, OpNum: 0, V: MaskedGEP); | 
|---|
| 1020 | return I; | 
|---|
| 1021 | } | 
|---|
| 1022 | } | 
|---|
| 1023 | } | 
|---|
| 1024 |  | 
|---|
| 1025 | break; | 
|---|
| 1026 | } | 
|---|
| 1027 |  | 
|---|
| 1028 | case Intrinsic::fshr: | 
|---|
| 1029 | case Intrinsic::fshl: { | 
|---|
| 1030 | const APInt *SA; | 
|---|
| 1031 | if (!match(V: I->getOperand(i: 2), P: m_APInt(Res&: SA))) | 
|---|
| 1032 | break; | 
|---|
| 1033 |  | 
|---|
| 1034 | // Normalize to funnel shift left. APInt shifts of BitWidth are well- | 
|---|
| 1035 | // defined, so no need to special-case zero shifts here. | 
|---|
| 1036 | uint64_t ShiftAmt = SA->urem(RHS: BitWidth); | 
|---|
| 1037 | if (II->getIntrinsicID() == Intrinsic::fshr) | 
|---|
| 1038 | ShiftAmt = BitWidth - ShiftAmt; | 
|---|
| 1039 |  | 
|---|
| 1040 | APInt DemandedMaskLHS(DemandedMask.lshr(shiftAmt: ShiftAmt)); | 
|---|
| 1041 | APInt DemandedMaskRHS(DemandedMask.shl(shiftAmt: BitWidth - ShiftAmt)); | 
|---|
| 1042 | if (I->getOperand(i: 0) != I->getOperand(i: 1)) { | 
|---|
| 1043 | if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskLHS, Known&: LHSKnown, Q, | 
|---|
| 1044 | Depth: Depth + 1) || | 
|---|
| 1045 | SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedMaskRHS, Known&: RHSKnown, Q, | 
|---|
| 1046 | Depth: Depth + 1)) { | 
|---|
| 1047 | // Range attribute may no longer hold. | 
|---|
| 1048 | I->dropPoisonGeneratingReturnAttributes(); | 
|---|
| 1049 | return I; | 
|---|
| 1050 | } | 
|---|
| 1051 | } else { // fshl is a rotate | 
|---|
| 1052 | // Avoid converting rotate into funnel shift. | 
|---|
| 1053 | // Only simplify if one operand is constant. | 
|---|
| 1054 | LHSKnown = computeKnownBits(V: I->getOperand(i: 0), CxtI: I, Depth: Depth + 1); | 
|---|
| 1055 | if (DemandedMaskLHS.isSubsetOf(RHS: LHSKnown.Zero | LHSKnown.One) && | 
|---|
| 1056 | !match(V: I->getOperand(i: 0), P: m_SpecificInt(V: LHSKnown.One))) { | 
|---|
| 1057 | replaceOperand(I&: *I, OpNum: 0, V: Constant::getIntegerValue(Ty: VTy, V: LHSKnown.One)); | 
|---|
| 1058 | return I; | 
|---|
| 1059 | } | 
|---|
| 1060 |  | 
|---|
| 1061 | RHSKnown = computeKnownBits(V: I->getOperand(i: 1), CxtI: I, Depth: Depth + 1); | 
|---|
| 1062 | if (DemandedMaskRHS.isSubsetOf(RHS: RHSKnown.Zero | RHSKnown.One) && | 
|---|
| 1063 | !match(V: I->getOperand(i: 1), P: m_SpecificInt(V: RHSKnown.One))) { | 
|---|
| 1064 | replaceOperand(I&: *I, OpNum: 1, V: Constant::getIntegerValue(Ty: VTy, V: RHSKnown.One)); | 
|---|
| 1065 | return I; | 
|---|
| 1066 | } | 
|---|
| 1067 | } | 
|---|
| 1068 |  | 
|---|
| 1069 | Known.Zero = LHSKnown.Zero.shl(shiftAmt: ShiftAmt) | | 
|---|
| 1070 | RHSKnown.Zero.lshr(shiftAmt: BitWidth - ShiftAmt); | 
|---|
| 1071 | Known.One = LHSKnown.One.shl(shiftAmt: ShiftAmt) | | 
|---|
| 1072 | RHSKnown.One.lshr(shiftAmt: BitWidth - ShiftAmt); | 
|---|
| 1073 | KnownBitsComputed = true; | 
|---|
| 1074 | break; | 
|---|
| 1075 | } | 
|---|
| 1076 | case Intrinsic::umax: { | 
|---|
| 1077 | // UMax(A, C) == A if ... | 
|---|
| 1078 | // The lowest non-zero bit of DemandMask is higher than the highest | 
|---|
| 1079 | // non-zero bit of C. | 
|---|
| 1080 | const APInt *C; | 
|---|
| 1081 | unsigned CTZ = DemandedMask.countr_zero(); | 
|---|
| 1082 | if (match(V: II->getArgOperand(i: 1), P: m_APInt(Res&: C)) && | 
|---|
| 1083 | CTZ >= C->getActiveBits()) | 
|---|
| 1084 | return II->getArgOperand(i: 0); | 
|---|
| 1085 | break; | 
|---|
| 1086 | } | 
|---|
| 1087 | case Intrinsic::umin: { | 
|---|
| 1088 | // UMin(A, C) == A if ... | 
|---|
| 1089 | // The lowest non-zero bit of DemandMask is higher than the highest | 
|---|
| 1090 | // non-one bit of C. | 
|---|
| 1091 | // This comes from using DeMorgans on the above umax example. | 
|---|
| 1092 | const APInt *C; | 
|---|
| 1093 | unsigned CTZ = DemandedMask.countr_zero(); | 
|---|
| 1094 | if (match(V: II->getArgOperand(i: 1), P: m_APInt(Res&: C)) && | 
|---|
| 1095 | CTZ >= C->getBitWidth() - C->countl_one()) | 
|---|
| 1096 | return II->getArgOperand(i: 0); | 
|---|
| 1097 | break; | 
|---|
| 1098 | } | 
|---|
| 1099 | default: { | 
|---|
| 1100 | // Handle target specific intrinsics | 
|---|
| 1101 | std::optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic( | 
|---|
| 1102 | II&: *II, DemandedMask, Known, KnownBitsComputed); | 
|---|
| 1103 | if (V) | 
|---|
| 1104 | return *V; | 
|---|
| 1105 | break; | 
|---|
| 1106 | } | 
|---|
| 1107 | } | 
|---|
| 1108 | } | 
|---|
| 1109 |  | 
|---|
| 1110 | if (!KnownBitsComputed) | 
|---|
| 1111 | llvm::computeKnownBits(V: I, Known, Q, Depth); | 
|---|
| 1112 | break; | 
|---|
| 1113 | } | 
|---|
| 1114 | } | 
|---|
| 1115 |  | 
|---|
| 1116 | if (I->getType()->isPointerTy()) { | 
|---|
| 1117 | Align Alignment = I->getPointerAlignment(DL); | 
|---|
| 1118 | Known.Zero.setLowBits(Log2(A: Alignment)); | 
|---|
| 1119 | } | 
|---|
| 1120 |  | 
|---|
| 1121 | // If the client is only demanding bits that we know, return the known | 
|---|
| 1122 | // constant. We can't directly simplify pointers as a constant because of | 
|---|
| 1123 | // pointer provenance. | 
|---|
| 1124 | // TODO: We could return `(inttoptr const)` for pointers. | 
|---|
| 1125 | if (!I->getType()->isPointerTy() && | 
|---|
| 1126 | DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) | 
|---|
| 1127 | return Constant::getIntegerValue(Ty: VTy, V: Known.One); | 
|---|
| 1128 |  | 
|---|
| 1129 | if (VerifyKnownBits) { | 
|---|
| 1130 | KnownBits ReferenceKnown = llvm::computeKnownBits(V: I, Q, Depth); | 
|---|
| 1131 | if (Known != ReferenceKnown) { | 
|---|
| 1132 | errs() << "Mismatched known bits for "<< *I << " in " | 
|---|
| 1133 | << I->getFunction()->getName() << "\n"; | 
|---|
| 1134 | errs() << "computeKnownBits(): "<< ReferenceKnown << "\n"; | 
|---|
| 1135 | errs() << "SimplifyDemandedBits(): "<< Known << "\n"; | 
|---|
| 1136 | std::abort(); | 
|---|
| 1137 | } | 
|---|
| 1138 | } | 
|---|
| 1139 |  | 
|---|
| 1140 | return nullptr; | 
|---|
| 1141 | } | 
|---|
| 1142 |  | 
|---|
| 1143 | /// Helper routine of SimplifyDemandedUseBits. It computes Known | 
|---|
| 1144 | /// bits. It also tries to handle simplifications that can be done based on | 
|---|
| 1145 | /// DemandedMask, but without modifying the Instruction. | 
|---|
| 1146 | Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits( | 
|---|
| 1147 | Instruction *I, const APInt &DemandedMask, KnownBits &Known, | 
|---|
| 1148 | const SimplifyQuery &Q, unsigned Depth) { | 
|---|
| 1149 | unsigned BitWidth = DemandedMask.getBitWidth(); | 
|---|
| 1150 | Type *ITy = I->getType(); | 
|---|
| 1151 |  | 
|---|
| 1152 | KnownBits LHSKnown(BitWidth); | 
|---|
| 1153 | KnownBits RHSKnown(BitWidth); | 
|---|
| 1154 |  | 
|---|
| 1155 | // Despite the fact that we can't simplify this instruction in all User's | 
|---|
| 1156 | // context, we can at least compute the known bits, and we can | 
|---|
| 1157 | // do simplifications that apply to *just* the one user if we know that | 
|---|
| 1158 | // this instruction has a simpler value in that context. | 
|---|
| 1159 | switch (I->getOpcode()) { | 
|---|
| 1160 | case Instruction::And: { | 
|---|
| 1161 | llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1); | 
|---|
| 1162 | llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1); | 
|---|
| 1163 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, | 
|---|
| 1164 | SQ: Q, Depth); | 
|---|
| 1165 | computeKnownBitsFromContext(V: I, Known, Q, Depth); | 
|---|
| 1166 |  | 
|---|
| 1167 | // If the client is only demanding bits that we know, return the known | 
|---|
| 1168 | // constant. | 
|---|
| 1169 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) | 
|---|
| 1170 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); | 
|---|
| 1171 |  | 
|---|
| 1172 | // If all of the demanded bits are known 1 on one side, return the other. | 
|---|
| 1173 | // These bits cannot contribute to the result of the 'and' in this context. | 
|---|
| 1174 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One)) | 
|---|
| 1175 | return I->getOperand(i: 0); | 
|---|
| 1176 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One)) | 
|---|
| 1177 | return I->getOperand(i: 1); | 
|---|
| 1178 |  | 
|---|
| 1179 | break; | 
|---|
| 1180 | } | 
|---|
| 1181 | case Instruction::Or: { | 
|---|
| 1182 | llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1); | 
|---|
| 1183 | llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1); | 
|---|
| 1184 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, | 
|---|
| 1185 | SQ: Q, Depth); | 
|---|
| 1186 | computeKnownBitsFromContext(V: I, Known, Q, Depth); | 
|---|
| 1187 |  | 
|---|
| 1188 | // If the client is only demanding bits that we know, return the known | 
|---|
| 1189 | // constant. | 
|---|
| 1190 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) | 
|---|
| 1191 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); | 
|---|
| 1192 |  | 
|---|
| 1193 | // We can simplify (X|Y) -> X or Y in the user's context if we know that | 
|---|
| 1194 | // only bits from X or Y are demanded. | 
|---|
| 1195 | // If all of the demanded bits are known zero on one side, return the other. | 
|---|
| 1196 | // These bits cannot contribute to the result of the 'or' in this context. | 
|---|
| 1197 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero)) | 
|---|
| 1198 | return I->getOperand(i: 0); | 
|---|
| 1199 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero)) | 
|---|
| 1200 | return I->getOperand(i: 1); | 
|---|
| 1201 |  | 
|---|
| 1202 | break; | 
|---|
| 1203 | } | 
|---|
| 1204 | case Instruction::Xor: { | 
|---|
| 1205 | llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1); | 
|---|
| 1206 | llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1); | 
|---|
| 1207 | Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown, | 
|---|
| 1208 | SQ: Q, Depth); | 
|---|
| 1209 | computeKnownBitsFromContext(V: I, Known, Q, Depth); | 
|---|
| 1210 |  | 
|---|
| 1211 | // If the client is only demanding bits that we know, return the known | 
|---|
| 1212 | // constant. | 
|---|
| 1213 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) | 
|---|
| 1214 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); | 
|---|
| 1215 |  | 
|---|
| 1216 | // We can simplify (X^Y) -> X or Y in the user's context if we know that | 
|---|
| 1217 | // only bits from X or Y are demanded. | 
|---|
| 1218 | // If all of the demanded bits are known zero on one side, return the other. | 
|---|
| 1219 | if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero)) | 
|---|
| 1220 | return I->getOperand(i: 0); | 
|---|
| 1221 | if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero)) | 
|---|
| 1222 | return I->getOperand(i: 1); | 
|---|
| 1223 |  | 
|---|
| 1224 | break; | 
|---|
| 1225 | } | 
|---|
| 1226 | case Instruction::Add: { | 
|---|
| 1227 | unsigned NLZ = DemandedMask.countl_zero(); | 
|---|
| 1228 | APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); | 
|---|
| 1229 |  | 
|---|
| 1230 | // If an operand adds zeros to every bit below the highest demanded bit, | 
|---|
| 1231 | // that operand doesn't change the result. Return the other side. | 
|---|
| 1232 | llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1); | 
|---|
| 1233 | if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero)) | 
|---|
| 1234 | return I->getOperand(i: 0); | 
|---|
| 1235 |  | 
|---|
| 1236 | llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1); | 
|---|
| 1237 | if (DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero)) | 
|---|
| 1238 | return I->getOperand(i: 1); | 
|---|
| 1239 |  | 
|---|
| 1240 | bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap(); | 
|---|
| 1241 | bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap(); | 
|---|
| 1242 | Known = KnownBits::add(LHS: LHSKnown, RHS: RHSKnown, NSW, NUW); | 
|---|
| 1243 | computeKnownBitsFromContext(V: I, Known, Q, Depth); | 
|---|
| 1244 | break; | 
|---|
| 1245 | } | 
|---|
| 1246 | case Instruction::Sub: { | 
|---|
| 1247 | unsigned NLZ = DemandedMask.countl_zero(); | 
|---|
| 1248 | APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ); | 
|---|
| 1249 |  | 
|---|
| 1250 | // If an operand subtracts zeros from every bit below the highest demanded | 
|---|
| 1251 | // bit, that operand doesn't change the result. Return the other side. | 
|---|
| 1252 | llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1); | 
|---|
| 1253 | if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero)) | 
|---|
| 1254 | return I->getOperand(i: 0); | 
|---|
| 1255 |  | 
|---|
| 1256 | bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap(); | 
|---|
| 1257 | bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap(); | 
|---|
| 1258 | llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1); | 
|---|
| 1259 | Known = KnownBits::sub(LHS: LHSKnown, RHS: RHSKnown, NSW, NUW); | 
|---|
| 1260 | computeKnownBitsFromContext(V: I, Known, Q, Depth); | 
|---|
| 1261 | break; | 
|---|
| 1262 | } | 
|---|
| 1263 | case Instruction::AShr: { | 
|---|
| 1264 | // Compute the Known bits to simplify things downstream. | 
|---|
| 1265 | llvm::computeKnownBits(V: I, Known, Q, Depth); | 
|---|
| 1266 |  | 
|---|
| 1267 | // If this user is only demanding bits that we know, return the known | 
|---|
| 1268 | // constant. | 
|---|
| 1269 | if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One)) | 
|---|
| 1270 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); | 
|---|
| 1271 |  | 
|---|
| 1272 | // If the right shift operand 0 is a result of a left shift by the same | 
|---|
| 1273 | // amount, this is probably a zero/sign extension, which may be unnecessary, | 
|---|
| 1274 | // if we do not demand any of the new sign bits. So, return the original | 
|---|
| 1275 | // operand instead. | 
|---|
| 1276 | const APInt *ShiftRC; | 
|---|
| 1277 | const APInt *ShiftLC; | 
|---|
| 1278 | Value *X; | 
|---|
| 1279 | unsigned BitWidth = DemandedMask.getBitWidth(); | 
|---|
| 1280 | if (match(V: I, | 
|---|
| 1281 | P: m_AShr(L: m_Shl(L: m_Value(V&: X), R: m_APInt(Res&: ShiftLC)), R: m_APInt(Res&: ShiftRC))) && | 
|---|
| 1282 | ShiftLC == ShiftRC && ShiftLC->ult(RHS: BitWidth) && | 
|---|
| 1283 | DemandedMask.isSubsetOf(RHS: APInt::getLowBitsSet( | 
|---|
| 1284 | numBits: BitWidth, loBitsSet: BitWidth - ShiftRC->getZExtValue()))) { | 
|---|
| 1285 | return X; | 
|---|
| 1286 | } | 
|---|
| 1287 |  | 
|---|
| 1288 | break; | 
|---|
| 1289 | } | 
|---|
| 1290 | default: | 
|---|
| 1291 | // Compute the Known bits to simplify things downstream. | 
|---|
| 1292 | llvm::computeKnownBits(V: I, Known, Q, Depth); | 
|---|
| 1293 |  | 
|---|
| 1294 | // If this user is only demanding bits that we know, return the known | 
|---|
| 1295 | // constant. | 
|---|
| 1296 | if (DemandedMask.isSubsetOf(RHS: Known.Zero|Known.One)) | 
|---|
| 1297 | return Constant::getIntegerValue(Ty: ITy, V: Known.One); | 
|---|
| 1298 |  | 
|---|
| 1299 | break; | 
|---|
| 1300 | } | 
|---|
| 1301 |  | 
|---|
| 1302 | return nullptr; | 
|---|
| 1303 | } | 
|---|
| 1304 |  | 
|---|
| 1305 | /// Helper routine of SimplifyDemandedUseBits. It tries to simplify | 
|---|
| 1306 | /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into | 
|---|
| 1307 | /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign | 
|---|
| 1308 | /// of "C2-C1". | 
|---|
| 1309 | /// | 
|---|
| 1310 | /// Suppose E1 and E2 are generally different in bits S={bm, bm+1, | 
|---|
| 1311 | /// ..., bn}, without considering the specific value X is holding. | 
|---|
| 1312 | /// This transformation is legal iff one of following conditions is hold: | 
|---|
| 1313 | ///  1) All the bit in S are 0, in this case E1 == E2. | 
|---|
| 1314 | ///  2) We don't care those bits in S, per the input DemandedMask. | 
|---|
| 1315 | ///  3) Combination of 1) and 2). Some bits in S are 0, and we don't care the | 
|---|
| 1316 | ///     rest bits. | 
|---|
| 1317 | /// | 
|---|
| 1318 | /// Currently we only test condition 2). | 
|---|
| 1319 | /// | 
|---|
| 1320 | /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was | 
|---|
| 1321 | /// not successful. | 
|---|
| 1322 | Value *InstCombinerImpl::simplifyShrShlDemandedBits( | 
|---|
| 1323 | Instruction *Shr, const APInt &ShrOp1, Instruction *Shl, | 
|---|
| 1324 | const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) { | 
|---|
| 1325 | if (!ShlOp1 || !ShrOp1) | 
|---|
| 1326 | return nullptr; // No-op. | 
|---|
| 1327 |  | 
|---|
| 1328 | Value *VarX = Shr->getOperand(i: 0); | 
|---|
| 1329 | Type *Ty = VarX->getType(); | 
|---|
| 1330 | unsigned BitWidth = Ty->getScalarSizeInBits(); | 
|---|
| 1331 | if (ShlOp1.uge(RHS: BitWidth) || ShrOp1.uge(RHS: BitWidth)) | 
|---|
| 1332 | return nullptr; // Undef. | 
|---|
| 1333 |  | 
|---|
| 1334 | unsigned ShlAmt = ShlOp1.getZExtValue(); | 
|---|
| 1335 | unsigned ShrAmt = ShrOp1.getZExtValue(); | 
|---|
| 1336 |  | 
|---|
| 1337 | Known.One.clearAllBits(); | 
|---|
| 1338 | Known.Zero.setLowBits(ShlAmt - 1); | 
|---|
| 1339 | Known.Zero &= DemandedMask; | 
|---|
| 1340 |  | 
|---|
| 1341 | APInt BitMask1(APInt::getAllOnes(numBits: BitWidth)); | 
|---|
| 1342 | APInt BitMask2(APInt::getAllOnes(numBits: BitWidth)); | 
|---|
| 1343 |  | 
|---|
| 1344 | bool isLshr = (Shr->getOpcode() == Instruction::LShr); | 
|---|
| 1345 | BitMask1 = isLshr ? (BitMask1.lshr(shiftAmt: ShrAmt) << ShlAmt) : | 
|---|
| 1346 | (BitMask1.ashr(ShiftAmt: ShrAmt) << ShlAmt); | 
|---|
| 1347 |  | 
|---|
| 1348 | if (ShrAmt <= ShlAmt) { | 
|---|
| 1349 | BitMask2 <<= (ShlAmt - ShrAmt); | 
|---|
| 1350 | } else { | 
|---|
| 1351 | BitMask2 = isLshr ? BitMask2.lshr(shiftAmt: ShrAmt - ShlAmt): | 
|---|
| 1352 | BitMask2.ashr(ShiftAmt: ShrAmt - ShlAmt); | 
|---|
| 1353 | } | 
|---|
| 1354 |  | 
|---|
| 1355 | // Check if condition-2 (see the comment to this function) is satified. | 
|---|
| 1356 | if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) { | 
|---|
| 1357 | if (ShrAmt == ShlAmt) | 
|---|
| 1358 | return VarX; | 
|---|
| 1359 |  | 
|---|
| 1360 | if (!Shr->hasOneUse()) | 
|---|
| 1361 | return nullptr; | 
|---|
| 1362 |  | 
|---|
| 1363 | BinaryOperator *New; | 
|---|
| 1364 | if (ShrAmt < ShlAmt) { | 
|---|
| 1365 | Constant *Amt = ConstantInt::get(Ty: VarX->getType(), V: ShlAmt - ShrAmt); | 
|---|
| 1366 | New = BinaryOperator::CreateShl(V1: VarX, V2: Amt); | 
|---|
| 1367 | BinaryOperator *Orig = cast<BinaryOperator>(Val: Shl); | 
|---|
| 1368 | New->setHasNoSignedWrap(Orig->hasNoSignedWrap()); | 
|---|
| 1369 | New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap()); | 
|---|
| 1370 | } else { | 
|---|
| 1371 | Constant *Amt = ConstantInt::get(Ty: VarX->getType(), V: ShrAmt - ShlAmt); | 
|---|
| 1372 | New = isLshr ? BinaryOperator::CreateLShr(V1: VarX, V2: Amt) : | 
|---|
| 1373 | BinaryOperator::CreateAShr(V1: VarX, V2: Amt); | 
|---|
| 1374 | if (cast<BinaryOperator>(Val: Shr)->isExact()) | 
|---|
| 1375 | New->setIsExact(true); | 
|---|
| 1376 | } | 
|---|
| 1377 |  | 
|---|
| 1378 | return InsertNewInstWith(New, Old: Shl->getIterator()); | 
|---|
| 1379 | } | 
|---|
| 1380 |  | 
|---|
| 1381 | return nullptr; | 
|---|
| 1382 | } | 
|---|
| 1383 |  | 
|---|
| 1384 | /// The specified value produces a vector with any number of elements. | 
|---|
| 1385 | /// This method analyzes which elements of the operand are poison and | 
|---|
| 1386 | /// returns that information in PoisonElts. | 
|---|
| 1387 | /// | 
|---|
| 1388 | /// DemandedElts contains the set of elements that are actually used by the | 
|---|
| 1389 | /// caller, and by default (AllowMultipleUsers equals false) the value is | 
|---|
| 1390 | /// simplified only if it has a single caller. If AllowMultipleUsers is set | 
|---|
| 1391 | /// to true, DemandedElts refers to the union of sets of elements that are | 
|---|
| 1392 | /// used by all callers. | 
|---|
| 1393 | /// | 
|---|
| 1394 | /// If the information about demanded elements can be used to simplify the | 
|---|
| 1395 | /// operation, the operation is simplified, then the resultant value is | 
|---|
| 1396 | /// returned.  This returns null if no change was made. | 
|---|
| 1397 | Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V, | 
|---|
| 1398 | APInt DemandedElts, | 
|---|
| 1399 | APInt &PoisonElts, | 
|---|
| 1400 | unsigned Depth, | 
|---|
| 1401 | bool AllowMultipleUsers) { | 
|---|
| 1402 | // Cannot analyze scalable type. The number of vector elements is not a | 
|---|
| 1403 | // compile-time constant. | 
|---|
| 1404 | if (isa<ScalableVectorType>(Val: V->getType())) | 
|---|
| 1405 | return nullptr; | 
|---|
| 1406 |  | 
|---|
| 1407 | unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements(); | 
|---|
| 1408 | APInt EltMask(APInt::getAllOnes(numBits: VWidth)); | 
|---|
| 1409 | assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); | 
|---|
| 1410 |  | 
|---|
| 1411 | if (match(V, P: m_Poison())) { | 
|---|
| 1412 | // If the entire vector is poison, just return this info. | 
|---|
| 1413 | PoisonElts = EltMask; | 
|---|
| 1414 | return nullptr; | 
|---|
| 1415 | } | 
|---|
| 1416 |  | 
|---|
| 1417 | if (DemandedElts.isZero()) { // If nothing is demanded, provide poison. | 
|---|
| 1418 | PoisonElts = EltMask; | 
|---|
| 1419 | return PoisonValue::get(T: V->getType()); | 
|---|
| 1420 | } | 
|---|
| 1421 |  | 
|---|
| 1422 | PoisonElts = 0; | 
|---|
| 1423 |  | 
|---|
| 1424 | if (auto *C = dyn_cast<Constant>(Val: V)) { | 
|---|
| 1425 | // Check if this is identity. If so, return 0 since we are not simplifying | 
|---|
| 1426 | // anything. | 
|---|
| 1427 | if (DemandedElts.isAllOnes()) | 
|---|
| 1428 | return nullptr; | 
|---|
| 1429 |  | 
|---|
| 1430 | Type *EltTy = cast<VectorType>(Val: V->getType())->getElementType(); | 
|---|
| 1431 | Constant *Poison = PoisonValue::get(T: EltTy); | 
|---|
| 1432 | SmallVector<Constant*, 16> Elts; | 
|---|
| 1433 | for (unsigned i = 0; i != VWidth; ++i) { | 
|---|
| 1434 | if (!DemandedElts[i]) {   // If not demanded, set to poison. | 
|---|
| 1435 | Elts.push_back(Elt: Poison); | 
|---|
| 1436 | PoisonElts.setBit(i); | 
|---|
| 1437 | continue; | 
|---|
| 1438 | } | 
|---|
| 1439 |  | 
|---|
| 1440 | Constant *Elt = C->getAggregateElement(Elt: i); | 
|---|
| 1441 | if (!Elt) return nullptr; | 
|---|
| 1442 |  | 
|---|
| 1443 | Elts.push_back(Elt); | 
|---|
| 1444 | if (isa<PoisonValue>(Val: Elt)) // Already poison. | 
|---|
| 1445 | PoisonElts.setBit(i); | 
|---|
| 1446 | } | 
|---|
| 1447 |  | 
|---|
| 1448 | // If we changed the constant, return it. | 
|---|
| 1449 | Constant *NewCV = ConstantVector::get(V: Elts); | 
|---|
| 1450 | return NewCV != C ? NewCV : nullptr; | 
|---|
| 1451 | } | 
|---|
| 1452 |  | 
|---|
| 1453 | // Limit search depth. | 
|---|
| 1454 | if (Depth == SimplifyDemandedVectorEltsDepthLimit) | 
|---|
| 1455 | return nullptr; | 
|---|
| 1456 |  | 
|---|
| 1457 | if (!AllowMultipleUsers) { | 
|---|
| 1458 | // If multiple users are using the root value, proceed with | 
|---|
| 1459 | // simplification conservatively assuming that all elements | 
|---|
| 1460 | // are needed. | 
|---|
| 1461 | if (!V->hasOneUse()) { | 
|---|
| 1462 | // Quit if we find multiple users of a non-root value though. | 
|---|
| 1463 | // They'll be handled when it's their turn to be visited by | 
|---|
| 1464 | // the main instcombine process. | 
|---|
| 1465 | if (Depth != 0) | 
|---|
| 1466 | // TODO: Just compute the PoisonElts information recursively. | 
|---|
| 1467 | return nullptr; | 
|---|
| 1468 |  | 
|---|
| 1469 | // Conservatively assume that all elements are needed. | 
|---|
| 1470 | DemandedElts = EltMask; | 
|---|
| 1471 | } | 
|---|
| 1472 | } | 
|---|
| 1473 |  | 
|---|
| 1474 | Instruction *I = dyn_cast<Instruction>(Val: V); | 
|---|
| 1475 | if (!I) return nullptr;        // Only analyze instructions. | 
|---|
| 1476 |  | 
|---|
| 1477 | bool MadeChange = false; | 
|---|
| 1478 | auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum, | 
|---|
| 1479 | APInt Demanded, APInt &Undef) { | 
|---|
| 1480 | auto *II = dyn_cast<IntrinsicInst>(Val: Inst); | 
|---|
| 1481 | Value *Op = II ? II->getArgOperand(i: OpNum) : Inst->getOperand(i: OpNum); | 
|---|
| 1482 | if (Value *V = SimplifyDemandedVectorElts(V: Op, DemandedElts: Demanded, PoisonElts&: Undef, Depth: Depth + 1)) { | 
|---|
| 1483 | replaceOperand(I&: *Inst, OpNum, V); | 
|---|
| 1484 | MadeChange = true; | 
|---|
| 1485 | } | 
|---|
| 1486 | }; | 
|---|
| 1487 |  | 
|---|
| 1488 | APInt PoisonElts2(VWidth, 0); | 
|---|
| 1489 | APInt PoisonElts3(VWidth, 0); | 
|---|
| 1490 | switch (I->getOpcode()) { | 
|---|
| 1491 | default: break; | 
|---|
| 1492 |  | 
|---|
| 1493 | case Instruction::GetElementPtr: { | 
|---|
| 1494 | // The LangRef requires that struct geps have all constant indices.  As | 
|---|
| 1495 | // such, we can't convert any operand to partial undef. | 
|---|
| 1496 | auto mayIndexStructType = [](GetElementPtrInst &GEP) { | 
|---|
| 1497 | for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP); | 
|---|
| 1498 | I != E; I++) | 
|---|
| 1499 | if (I.isStruct()) | 
|---|
| 1500 | return true; | 
|---|
| 1501 | return false; | 
|---|
| 1502 | }; | 
|---|
| 1503 | if (mayIndexStructType(cast<GetElementPtrInst>(Val&: *I))) | 
|---|
| 1504 | break; | 
|---|
| 1505 |  | 
|---|
| 1506 | // Conservatively track the demanded elements back through any vector | 
|---|
| 1507 | // operands we may have.  We know there must be at least one, or we | 
|---|
| 1508 | // wouldn't have a vector result to get here. Note that we intentionally | 
|---|
| 1509 | // merge the undef bits here since gepping with either an poison base or | 
|---|
| 1510 | // index results in poison. | 
|---|
| 1511 | for (unsigned i = 0; i < I->getNumOperands(); i++) { | 
|---|
| 1512 | if (i == 0 ? match(V: I->getOperand(i), P: m_Undef()) | 
|---|
| 1513 | : match(V: I->getOperand(i), P: m_Poison())) { | 
|---|
| 1514 | // If the entire vector is undefined, just return this info. | 
|---|
| 1515 | PoisonElts = EltMask; | 
|---|
| 1516 | return nullptr; | 
|---|
| 1517 | } | 
|---|
| 1518 | if (I->getOperand(i)->getType()->isVectorTy()) { | 
|---|
| 1519 | APInt PoisonEltsOp(VWidth, 0); | 
|---|
| 1520 | simplifyAndSetOp(I, i, DemandedElts, PoisonEltsOp); | 
|---|
| 1521 | // gep(x, undef) is not undef, so skip considering idx ops here | 
|---|
| 1522 | // Note that we could propagate poison, but we can't distinguish between | 
|---|
| 1523 | // undef & poison bits ATM | 
|---|
| 1524 | if (i == 0) | 
|---|
| 1525 | PoisonElts |= PoisonEltsOp; | 
|---|
| 1526 | } | 
|---|
| 1527 | } | 
|---|
| 1528 |  | 
|---|
| 1529 | break; | 
|---|
| 1530 | } | 
|---|
| 1531 | case Instruction::InsertElement: { | 
|---|
| 1532 | // If this is a variable index, we don't know which element it overwrites. | 
|---|
| 1533 | // demand exactly the same input as we produce. | 
|---|
| 1534 | ConstantInt *Idx = dyn_cast<ConstantInt>(Val: I->getOperand(i: 2)); | 
|---|
| 1535 | if (!Idx) { | 
|---|
| 1536 | // Note that we can't propagate undef elt info, because we don't know | 
|---|
| 1537 | // which elt is getting updated. | 
|---|
| 1538 | simplifyAndSetOp(I, 0, DemandedElts, PoisonElts2); | 
|---|
| 1539 | break; | 
|---|
| 1540 | } | 
|---|
| 1541 |  | 
|---|
| 1542 | // The element inserted overwrites whatever was there, so the input demanded | 
|---|
| 1543 | // set is simpler than the output set. | 
|---|
| 1544 | unsigned IdxNo = Idx->getZExtValue(); | 
|---|
| 1545 | APInt PreInsertDemandedElts = DemandedElts; | 
|---|
| 1546 | if (IdxNo < VWidth) | 
|---|
| 1547 | PreInsertDemandedElts.clearBit(BitPosition: IdxNo); | 
|---|
| 1548 |  | 
|---|
| 1549 | // If we only demand the element that is being inserted and that element | 
|---|
| 1550 | // was extracted from the same index in another vector with the same type, | 
|---|
| 1551 | // replace this insert with that other vector. | 
|---|
| 1552 | // Note: This is attempted before the call to simplifyAndSetOp because that | 
|---|
| 1553 | //       may change PoisonElts to a value that does not match with Vec. | 
|---|
| 1554 | Value *Vec; | 
|---|
| 1555 | if (PreInsertDemandedElts == 0 && | 
|---|
| 1556 | match(V: I->getOperand(i: 1), | 
|---|
| 1557 | P: m_ExtractElt(Val: m_Value(V&: Vec), Idx: m_SpecificInt(V: IdxNo))) && | 
|---|
| 1558 | Vec->getType() == I->getType()) { | 
|---|
| 1559 | return Vec; | 
|---|
| 1560 | } | 
|---|
| 1561 |  | 
|---|
| 1562 | simplifyAndSetOp(I, 0, PreInsertDemandedElts, PoisonElts); | 
|---|
| 1563 |  | 
|---|
| 1564 | // If this is inserting an element that isn't demanded, remove this | 
|---|
| 1565 | // insertelement. | 
|---|
| 1566 | if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { | 
|---|
| 1567 | Worklist.push(I); | 
|---|
| 1568 | return I->getOperand(i: 0); | 
|---|
| 1569 | } | 
|---|
| 1570 |  | 
|---|
| 1571 | // The inserted element is defined. | 
|---|
| 1572 | PoisonElts.clearBit(BitPosition: IdxNo); | 
|---|
| 1573 | break; | 
|---|
| 1574 | } | 
|---|
| 1575 | case Instruction::ShuffleVector: { | 
|---|
| 1576 | auto *Shuffle = cast<ShuffleVectorInst>(Val: I); | 
|---|
| 1577 | assert(Shuffle->getOperand(0)->getType() == | 
|---|
| 1578 | Shuffle->getOperand(1)->getType() && | 
|---|
| 1579 | "Expected shuffle operands to have same type"); | 
|---|
| 1580 | unsigned OpWidth = cast<FixedVectorType>(Val: Shuffle->getOperand(i_nocapture: 0)->getType()) | 
|---|
| 1581 | ->getNumElements(); | 
|---|
| 1582 | // Handle trivial case of a splat. Only check the first element of LHS | 
|---|
| 1583 | // operand. | 
|---|
| 1584 | if (all_of(Range: Shuffle->getShuffleMask(), P: [](int Elt) { return Elt == 0; }) && | 
|---|
| 1585 | DemandedElts.isAllOnes()) { | 
|---|
| 1586 | if (!isa<PoisonValue>(Val: I->getOperand(i: 1))) { | 
|---|
| 1587 | I->setOperand(i: 1, Val: PoisonValue::get(T: I->getOperand(i: 1)->getType())); | 
|---|
| 1588 | MadeChange = true; | 
|---|
| 1589 | } | 
|---|
| 1590 | APInt LeftDemanded(OpWidth, 1); | 
|---|
| 1591 | APInt LHSPoisonElts(OpWidth, 0); | 
|---|
| 1592 | simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts); | 
|---|
| 1593 | if (LHSPoisonElts[0]) | 
|---|
| 1594 | PoisonElts = EltMask; | 
|---|
| 1595 | else | 
|---|
| 1596 | PoisonElts.clearAllBits(); | 
|---|
| 1597 | break; | 
|---|
| 1598 | } | 
|---|
| 1599 |  | 
|---|
| 1600 | APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0); | 
|---|
| 1601 | for (unsigned i = 0; i < VWidth; i++) { | 
|---|
| 1602 | if (DemandedElts[i]) { | 
|---|
| 1603 | unsigned MaskVal = Shuffle->getMaskValue(Elt: i); | 
|---|
| 1604 | if (MaskVal != -1u) { | 
|---|
| 1605 | assert(MaskVal < OpWidth * 2 && | 
|---|
| 1606 | "shufflevector mask index out of range!"); | 
|---|
| 1607 | if (MaskVal < OpWidth) | 
|---|
| 1608 | LeftDemanded.setBit(MaskVal); | 
|---|
| 1609 | else | 
|---|
| 1610 | RightDemanded.setBit(MaskVal - OpWidth); | 
|---|
| 1611 | } | 
|---|
| 1612 | } | 
|---|
| 1613 | } | 
|---|
| 1614 |  | 
|---|
| 1615 | APInt LHSPoisonElts(OpWidth, 0); | 
|---|
| 1616 | simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts); | 
|---|
| 1617 |  | 
|---|
| 1618 | APInt RHSPoisonElts(OpWidth, 0); | 
|---|
| 1619 | simplifyAndSetOp(I, 1, RightDemanded, RHSPoisonElts); | 
|---|
| 1620 |  | 
|---|
| 1621 | // If this shuffle does not change the vector length and the elements | 
|---|
| 1622 | // demanded by this shuffle are an identity mask, then this shuffle is | 
|---|
| 1623 | // unnecessary. | 
|---|
| 1624 | // | 
|---|
| 1625 | // We are assuming canonical form for the mask, so the source vector is | 
|---|
| 1626 | // operand 0 and operand 1 is not used. | 
|---|
| 1627 | // | 
|---|
| 1628 | // Note that if an element is demanded and this shuffle mask is undefined | 
|---|
| 1629 | // for that element, then the shuffle is not considered an identity | 
|---|
| 1630 | // operation. The shuffle prevents poison from the operand vector from | 
|---|
| 1631 | // leaking to the result by replacing poison with an undefined value. | 
|---|
| 1632 | if (VWidth == OpWidth) { | 
|---|
| 1633 | bool IsIdentityShuffle = true; | 
|---|
| 1634 | for (unsigned i = 0; i < VWidth; i++) { | 
|---|
| 1635 | unsigned MaskVal = Shuffle->getMaskValue(Elt: i); | 
|---|
| 1636 | if (DemandedElts[i] && i != MaskVal) { | 
|---|
| 1637 | IsIdentityShuffle = false; | 
|---|
| 1638 | break; | 
|---|
| 1639 | } | 
|---|
| 1640 | } | 
|---|
| 1641 | if (IsIdentityShuffle) | 
|---|
| 1642 | return Shuffle->getOperand(i_nocapture: 0); | 
|---|
| 1643 | } | 
|---|
| 1644 |  | 
|---|
| 1645 | bool NewPoisonElts = false; | 
|---|
| 1646 | unsigned LHSIdx = -1u, LHSValIdx = -1u; | 
|---|
| 1647 | unsigned RHSIdx = -1u, RHSValIdx = -1u; | 
|---|
| 1648 | bool LHSUniform = true; | 
|---|
| 1649 | bool RHSUniform = true; | 
|---|
| 1650 | for (unsigned i = 0; i < VWidth; i++) { | 
|---|
| 1651 | unsigned MaskVal = Shuffle->getMaskValue(Elt: i); | 
|---|
| 1652 | if (MaskVal == -1u) { | 
|---|
| 1653 | PoisonElts.setBit(i); | 
|---|
| 1654 | } else if (!DemandedElts[i]) { | 
|---|
| 1655 | NewPoisonElts = true; | 
|---|
| 1656 | PoisonElts.setBit(i); | 
|---|
| 1657 | } else if (MaskVal < OpWidth) { | 
|---|
| 1658 | if (LHSPoisonElts[MaskVal]) { | 
|---|
| 1659 | NewPoisonElts = true; | 
|---|
| 1660 | PoisonElts.setBit(i); | 
|---|
| 1661 | } else { | 
|---|
| 1662 | LHSIdx = LHSIdx == -1u ? i : OpWidth; | 
|---|
| 1663 | LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth; | 
|---|
| 1664 | LHSUniform = LHSUniform && (MaskVal == i); | 
|---|
| 1665 | } | 
|---|
| 1666 | } else { | 
|---|
| 1667 | if (RHSPoisonElts[MaskVal - OpWidth]) { | 
|---|
| 1668 | NewPoisonElts = true; | 
|---|
| 1669 | PoisonElts.setBit(i); | 
|---|
| 1670 | } else { | 
|---|
| 1671 | RHSIdx = RHSIdx == -1u ? i : OpWidth; | 
|---|
| 1672 | RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth; | 
|---|
| 1673 | RHSUniform = RHSUniform && (MaskVal - OpWidth == i); | 
|---|
| 1674 | } | 
|---|
| 1675 | } | 
|---|
| 1676 | } | 
|---|
| 1677 |  | 
|---|
| 1678 | // Try to transform shuffle with constant vector and single element from | 
|---|
| 1679 | // this constant vector to single insertelement instruction. | 
|---|
| 1680 | // shufflevector V, C, <v1, v2, .., ci, .., vm> -> | 
|---|
| 1681 | // insertelement V, C[ci], ci-n | 
|---|
| 1682 | if (OpWidth == | 
|---|
| 1683 | cast<FixedVectorType>(Val: Shuffle->getType())->getNumElements()) { | 
|---|
| 1684 | Value *Op = nullptr; | 
|---|
| 1685 | Constant *Value = nullptr; | 
|---|
| 1686 | unsigned Idx = -1u; | 
|---|
| 1687 |  | 
|---|
| 1688 | // Find constant vector with the single element in shuffle (LHS or RHS). | 
|---|
| 1689 | if (LHSIdx < OpWidth && RHSUniform) { | 
|---|
| 1690 | if (auto *CV = dyn_cast<ConstantVector>(Val: Shuffle->getOperand(i_nocapture: 0))) { | 
|---|
| 1691 | Op = Shuffle->getOperand(i_nocapture: 1); | 
|---|
| 1692 | Value = CV->getOperand(i_nocapture: LHSValIdx); | 
|---|
| 1693 | Idx = LHSIdx; | 
|---|
| 1694 | } | 
|---|
| 1695 | } | 
|---|
| 1696 | if (RHSIdx < OpWidth && LHSUniform) { | 
|---|
| 1697 | if (auto *CV = dyn_cast<ConstantVector>(Val: Shuffle->getOperand(i_nocapture: 1))) { | 
|---|
| 1698 | Op = Shuffle->getOperand(i_nocapture: 0); | 
|---|
| 1699 | Value = CV->getOperand(i_nocapture: RHSValIdx); | 
|---|
| 1700 | Idx = RHSIdx; | 
|---|
| 1701 | } | 
|---|
| 1702 | } | 
|---|
| 1703 | // Found constant vector with single element - convert to insertelement. | 
|---|
| 1704 | if (Op && Value) { | 
|---|
| 1705 | Instruction *New = InsertElementInst::Create( | 
|---|
| 1706 | Vec: Op, NewElt: Value, Idx: ConstantInt::get(Ty: Type::getInt64Ty(C&: I->getContext()), V: Idx), | 
|---|
| 1707 | NameStr: Shuffle->getName()); | 
|---|
| 1708 | InsertNewInstWith(New, Old: Shuffle->getIterator()); | 
|---|
| 1709 | return New; | 
|---|
| 1710 | } | 
|---|
| 1711 | } | 
|---|
| 1712 | if (NewPoisonElts) { | 
|---|
| 1713 | // Add additional discovered undefs. | 
|---|
| 1714 | SmallVector<int, 16> Elts; | 
|---|
| 1715 | for (unsigned i = 0; i < VWidth; ++i) { | 
|---|
| 1716 | if (PoisonElts[i]) | 
|---|
| 1717 | Elts.push_back(Elt: PoisonMaskElem); | 
|---|
| 1718 | else | 
|---|
| 1719 | Elts.push_back(Elt: Shuffle->getMaskValue(Elt: i)); | 
|---|
| 1720 | } | 
|---|
| 1721 | Shuffle->setShuffleMask(Elts); | 
|---|
| 1722 | MadeChange = true; | 
|---|
| 1723 | } | 
|---|
| 1724 | break; | 
|---|
| 1725 | } | 
|---|
| 1726 | case Instruction::Select: { | 
|---|
| 1727 | // If this is a vector select, try to transform the select condition based | 
|---|
| 1728 | // on the current demanded elements. | 
|---|
| 1729 | SelectInst *Sel = cast<SelectInst>(Val: I); | 
|---|
| 1730 | if (Sel->getCondition()->getType()->isVectorTy()) { | 
|---|
| 1731 | // TODO: We are not doing anything with PoisonElts based on this call. | 
|---|
| 1732 | // It is overwritten below based on the other select operands. If an | 
|---|
| 1733 | // element of the select condition is known undef, then we are free to | 
|---|
| 1734 | // choose the output value from either arm of the select. If we know that | 
|---|
| 1735 | // one of those values is undef, then the output can be undef. | 
|---|
| 1736 | simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); | 
|---|
| 1737 | } | 
|---|
| 1738 |  | 
|---|
| 1739 | // Next, see if we can transform the arms of the select. | 
|---|
| 1740 | APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts); | 
|---|
| 1741 | if (auto *CV = dyn_cast<ConstantVector>(Val: Sel->getCondition())) { | 
|---|
| 1742 | for (unsigned i = 0; i < VWidth; i++) { | 
|---|
| 1743 | Constant *CElt = CV->getAggregateElement(Elt: i); | 
|---|
| 1744 |  | 
|---|
| 1745 | // isNullValue() always returns false when called on a ConstantExpr. | 
|---|
| 1746 | if (CElt->isNullValue()) | 
|---|
| 1747 | DemandedLHS.clearBit(BitPosition: i); | 
|---|
| 1748 | else if (CElt->isOneValue()) | 
|---|
| 1749 | DemandedRHS.clearBit(BitPosition: i); | 
|---|
| 1750 | } | 
|---|
| 1751 | } | 
|---|
| 1752 |  | 
|---|
| 1753 | simplifyAndSetOp(I, 1, DemandedLHS, PoisonElts2); | 
|---|
| 1754 | simplifyAndSetOp(I, 2, DemandedRHS, PoisonElts3); | 
|---|
| 1755 |  | 
|---|
| 1756 | // Output elements are undefined if the element from each arm is undefined. | 
|---|
| 1757 | // TODO: This can be improved. See comment in select condition handling. | 
|---|
| 1758 | PoisonElts = PoisonElts2 & PoisonElts3; | 
|---|
| 1759 | break; | 
|---|
| 1760 | } | 
|---|
| 1761 | case Instruction::BitCast: { | 
|---|
| 1762 | // Vector->vector casts only. | 
|---|
| 1763 | VectorType *VTy = dyn_cast<VectorType>(Val: I->getOperand(i: 0)->getType()); | 
|---|
| 1764 | if (!VTy) break; | 
|---|
| 1765 | unsigned InVWidth = cast<FixedVectorType>(Val: VTy)->getNumElements(); | 
|---|
| 1766 | APInt InputDemandedElts(InVWidth, 0); | 
|---|
| 1767 | PoisonElts2 = APInt(InVWidth, 0); | 
|---|
| 1768 | unsigned Ratio; | 
|---|
| 1769 |  | 
|---|
| 1770 | if (VWidth == InVWidth) { | 
|---|
| 1771 | // If we are converting from <4 x i32> -> <4 x f32>, we demand the same | 
|---|
| 1772 | // elements as are demanded of us. | 
|---|
| 1773 | Ratio = 1; | 
|---|
| 1774 | InputDemandedElts = DemandedElts; | 
|---|
| 1775 | } else if ((VWidth % InVWidth) == 0) { | 
|---|
| 1776 | // If the number of elements in the output is a multiple of the number of | 
|---|
| 1777 | // elements in the input then an input element is live if any of the | 
|---|
| 1778 | // corresponding output elements are live. | 
|---|
| 1779 | Ratio = VWidth / InVWidth; | 
|---|
| 1780 | for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) | 
|---|
| 1781 | if (DemandedElts[OutIdx]) | 
|---|
| 1782 | InputDemandedElts.setBit(OutIdx / Ratio); | 
|---|
| 1783 | } else if ((InVWidth % VWidth) == 0) { | 
|---|
| 1784 | // If the number of elements in the input is a multiple of the number of | 
|---|
| 1785 | // elements in the output then an input element is live if the | 
|---|
| 1786 | // corresponding output element is live. | 
|---|
| 1787 | Ratio = InVWidth / VWidth; | 
|---|
| 1788 | for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) | 
|---|
| 1789 | if (DemandedElts[InIdx / Ratio]) | 
|---|
| 1790 | InputDemandedElts.setBit(InIdx); | 
|---|
| 1791 | } else { | 
|---|
| 1792 | // Unsupported so far. | 
|---|
| 1793 | break; | 
|---|
| 1794 | } | 
|---|
| 1795 |  | 
|---|
| 1796 | simplifyAndSetOp(I, 0, InputDemandedElts, PoisonElts2); | 
|---|
| 1797 |  | 
|---|
| 1798 | if (VWidth == InVWidth) { | 
|---|
| 1799 | PoisonElts = PoisonElts2; | 
|---|
| 1800 | } else if ((VWidth % InVWidth) == 0) { | 
|---|
| 1801 | // If the number of elements in the output is a multiple of the number of | 
|---|
| 1802 | // elements in the input then an output element is undef if the | 
|---|
| 1803 | // corresponding input element is undef. | 
|---|
| 1804 | for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) | 
|---|
| 1805 | if (PoisonElts2[OutIdx / Ratio]) | 
|---|
| 1806 | PoisonElts.setBit(OutIdx); | 
|---|
| 1807 | } else if ((InVWidth % VWidth) == 0) { | 
|---|
| 1808 | // If the number of elements in the input is a multiple of the number of | 
|---|
| 1809 | // elements in the output then an output element is undef if all of the | 
|---|
| 1810 | // corresponding input elements are undef. | 
|---|
| 1811 | for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { | 
|---|
| 1812 | APInt SubUndef = PoisonElts2.lshr(shiftAmt: OutIdx * Ratio).zextOrTrunc(width: Ratio); | 
|---|
| 1813 | if (SubUndef.popcount() == Ratio) | 
|---|
| 1814 | PoisonElts.setBit(OutIdx); | 
|---|
| 1815 | } | 
|---|
| 1816 | } else { | 
|---|
| 1817 | llvm_unreachable( "Unimp"); | 
|---|
| 1818 | } | 
|---|
| 1819 | break; | 
|---|
| 1820 | } | 
|---|
| 1821 | case Instruction::FPTrunc: | 
|---|
| 1822 | case Instruction::FPExt: | 
|---|
| 1823 | simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); | 
|---|
| 1824 | break; | 
|---|
| 1825 |  | 
|---|
| 1826 | case Instruction::Call: { | 
|---|
| 1827 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I); | 
|---|
| 1828 | if (!II) break; | 
|---|
| 1829 | switch (II->getIntrinsicID()) { | 
|---|
| 1830 | case Intrinsic::masked_gather: // fallthrough | 
|---|
| 1831 | case Intrinsic::masked_load: { | 
|---|
| 1832 | // Subtlety: If we load from a pointer, the pointer must be valid | 
|---|
| 1833 | // regardless of whether the element is demanded.  Doing otherwise risks | 
|---|
| 1834 | // segfaults which didn't exist in the original program. | 
|---|
| 1835 | APInt DemandedPtrs(APInt::getAllOnes(numBits: VWidth)), | 
|---|
| 1836 | DemandedPassThrough(DemandedElts); | 
|---|
| 1837 | if (auto *CV = dyn_cast<ConstantVector>(Val: II->getOperand(i_nocapture: 2))) | 
|---|
| 1838 | for (unsigned i = 0; i < VWidth; i++) { | 
|---|
| 1839 | Constant *CElt = CV->getAggregateElement(Elt: i); | 
|---|
| 1840 | if (CElt->isNullValue()) | 
|---|
| 1841 | DemandedPtrs.clearBit(BitPosition: i); | 
|---|
| 1842 | else if (CElt->isAllOnesValue()) | 
|---|
| 1843 | DemandedPassThrough.clearBit(BitPosition: i); | 
|---|
| 1844 | } | 
|---|
| 1845 | if (II->getIntrinsicID() == Intrinsic::masked_gather) | 
|---|
| 1846 | simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2); | 
|---|
| 1847 | simplifyAndSetOp(II, 3, DemandedPassThrough, PoisonElts3); | 
|---|
| 1848 |  | 
|---|
| 1849 | // Output elements are undefined if the element from both sources are. | 
|---|
| 1850 | // TODO: can strengthen via mask as well. | 
|---|
| 1851 | PoisonElts = PoisonElts2 & PoisonElts3; | 
|---|
| 1852 | break; | 
|---|
| 1853 | } | 
|---|
| 1854 | default: { | 
|---|
| 1855 | // Handle target specific intrinsics | 
|---|
| 1856 | std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic( | 
|---|
| 1857 | II&: *II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3, | 
|---|
| 1858 | SimplifyAndSetOp: simplifyAndSetOp); | 
|---|
| 1859 | if (V) | 
|---|
| 1860 | return *V; | 
|---|
| 1861 | break; | 
|---|
| 1862 | } | 
|---|
| 1863 | } // switch on IntrinsicID | 
|---|
| 1864 | break; | 
|---|
| 1865 | } // case Call | 
|---|
| 1866 | } // switch on Opcode | 
|---|
| 1867 |  | 
|---|
| 1868 | // TODO: We bail completely on integer div/rem and shifts because they have | 
|---|
| 1869 | // UB/poison potential, but that should be refined. | 
|---|
| 1870 | BinaryOperator *BO; | 
|---|
| 1871 | if (match(V: I, P: m_BinOp(I&: BO)) && !BO->isIntDivRem() && !BO->isShift()) { | 
|---|
| 1872 | Value *X = BO->getOperand(i_nocapture: 0); | 
|---|
| 1873 | Value *Y = BO->getOperand(i_nocapture: 1); | 
|---|
| 1874 |  | 
|---|
| 1875 | // Look for an equivalent binop except that one operand has been shuffled. | 
|---|
| 1876 | // If the demand for this binop only includes elements that are the same as | 
|---|
| 1877 | // the other binop, then we may be able to replace this binop with a use of | 
|---|
| 1878 | // the earlier one. | 
|---|
| 1879 | // | 
|---|
| 1880 | // Example: | 
|---|
| 1881 | // %other_bo = bo (shuf X, {0}), Y | 
|---|
| 1882 | // %this_extracted_bo = extelt (bo X, Y), 0 | 
|---|
| 1883 | // --> | 
|---|
| 1884 | // %other_bo = bo (shuf X, {0}), Y | 
|---|
| 1885 | // %this_extracted_bo = extelt %other_bo, 0 | 
|---|
| 1886 | // | 
|---|
| 1887 | // TODO: Handle demand of an arbitrary single element or more than one | 
|---|
| 1888 | //       element instead of just element 0. | 
|---|
| 1889 | // TODO: Unlike general demanded elements transforms, this should be safe | 
|---|
| 1890 | //       for any (div/rem/shift) opcode too. | 
|---|
| 1891 | if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() && | 
|---|
| 1892 | BO->hasOneUse() ) { | 
|---|
| 1893 |  | 
|---|
| 1894 | auto findShufBO = [&](bool MatchShufAsOp0) -> User * { | 
|---|
| 1895 | // Try to use shuffle-of-operand in place of an operand: | 
|---|
| 1896 | // bo X, Y --> bo (shuf X), Y | 
|---|
| 1897 | // bo X, Y --> bo X, (shuf Y) | 
|---|
| 1898 |  | 
|---|
| 1899 | Value *OtherOp = MatchShufAsOp0 ? Y : X; | 
|---|
| 1900 | if (!OtherOp->hasUseList()) | 
|---|
| 1901 | return nullptr; | 
|---|
| 1902 |  | 
|---|
| 1903 | BinaryOperator::BinaryOps Opcode = BO->getOpcode(); | 
|---|
| 1904 | Value *ShufOp = MatchShufAsOp0 ? X : Y; | 
|---|
| 1905 |  | 
|---|
| 1906 | for (User *U : OtherOp->users()) { | 
|---|
| 1907 | ArrayRef<int> Mask; | 
|---|
| 1908 | auto Shuf = m_Shuffle(v1: m_Specific(V: ShufOp), v2: m_Value(), mask: m_Mask(Mask)); | 
|---|
| 1909 | if (BO->isCommutative() | 
|---|
| 1910 | ? match(V: U, P: m_c_BinOp(Opcode, L: Shuf, R: m_Specific(V: OtherOp))) | 
|---|
| 1911 | : MatchShufAsOp0 | 
|---|
| 1912 | ? match(V: U, P: m_BinOp(Opcode, L: Shuf, R: m_Specific(V: OtherOp))) | 
|---|
| 1913 | : match(V: U, P: m_BinOp(Opcode, L: m_Specific(V: OtherOp), R: Shuf))) | 
|---|
| 1914 | if (match(Mask, P: m_ZeroMask()) && Mask[0] != PoisonMaskElem) | 
|---|
| 1915 | if (DT.dominates(Def: U, User: I)) | 
|---|
| 1916 | return U; | 
|---|
| 1917 | } | 
|---|
| 1918 | return nullptr; | 
|---|
| 1919 | }; | 
|---|
| 1920 |  | 
|---|
| 1921 | if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ true)) | 
|---|
| 1922 | return ShufBO; | 
|---|
| 1923 | if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ false)) | 
|---|
| 1924 | return ShufBO; | 
|---|
| 1925 | } | 
|---|
| 1926 |  | 
|---|
| 1927 | simplifyAndSetOp(I, 0, DemandedElts, PoisonElts); | 
|---|
| 1928 | simplifyAndSetOp(I, 1, DemandedElts, PoisonElts2); | 
|---|
| 1929 |  | 
|---|
| 1930 | // Output elements are undefined if both are undefined. Consider things | 
|---|
| 1931 | // like undef & 0. The result is known zero, not undef. | 
|---|
| 1932 | PoisonElts &= PoisonElts2; | 
|---|
| 1933 | } | 
|---|
| 1934 |  | 
|---|
| 1935 | // If we've proven all of the lanes poison, return a poison value. | 
|---|
| 1936 | // TODO: Intersect w/demanded lanes | 
|---|
| 1937 | if (PoisonElts.isAllOnes()) | 
|---|
| 1938 | return PoisonValue::get(T: I->getType()); | 
|---|
| 1939 |  | 
|---|
| 1940 | return MadeChange ? I : nullptr; | 
|---|
| 1941 | } | 
|---|
| 1942 |  | 
|---|
| 1943 | /// For floating-point classes that resolve to a single bit pattern, return that | 
|---|
| 1944 | /// value. | 
|---|
| 1945 | static Constant *getFPClassConstant(Type *Ty, FPClassTest Mask) { | 
|---|
| 1946 | if (Mask == fcNone) | 
|---|
| 1947 | return PoisonValue::get(T: Ty); | 
|---|
| 1948 |  | 
|---|
| 1949 | if (Mask == fcPosZero) | 
|---|
| 1950 | return Constant::getNullValue(Ty); | 
|---|
| 1951 |  | 
|---|
| 1952 | // TODO: Support aggregate types that are allowed by FPMathOperator. | 
|---|
| 1953 | if (Ty->isAggregateType()) | 
|---|
| 1954 | return nullptr; | 
|---|
| 1955 |  | 
|---|
| 1956 | switch (Mask) { | 
|---|
| 1957 | case fcNegZero: | 
|---|
| 1958 | return ConstantFP::getZero(Ty, Negative: true); | 
|---|
| 1959 | case fcPosInf: | 
|---|
| 1960 | return ConstantFP::getInfinity(Ty); | 
|---|
| 1961 | case fcNegInf: | 
|---|
| 1962 | return ConstantFP::getInfinity(Ty, Negative: true); | 
|---|
| 1963 | default: | 
|---|
| 1964 | return nullptr; | 
|---|
| 1965 | } | 
|---|
| 1966 | } | 
|---|
| 1967 |  | 
|---|
| 1968 | Value *InstCombinerImpl::SimplifyDemandedUseFPClass(Value *V, | 
|---|
| 1969 | FPClassTest DemandedMask, | 
|---|
| 1970 | KnownFPClass &Known, | 
|---|
| 1971 | Instruction *CxtI, | 
|---|
| 1972 | unsigned Depth) { | 
|---|
| 1973 | assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); | 
|---|
| 1974 | Type *VTy = V->getType(); | 
|---|
| 1975 |  | 
|---|
| 1976 | assert(Known == KnownFPClass() && "expected uninitialized state"); | 
|---|
| 1977 |  | 
|---|
| 1978 | if (DemandedMask == fcNone) | 
|---|
| 1979 | return isa<UndefValue>(Val: V) ? nullptr : PoisonValue::get(T: VTy); | 
|---|
| 1980 |  | 
|---|
| 1981 | if (Depth == MaxAnalysisRecursionDepth) | 
|---|
| 1982 | return nullptr; | 
|---|
| 1983 |  | 
|---|
| 1984 | Instruction *I = dyn_cast<Instruction>(Val: V); | 
|---|
| 1985 | if (!I) { | 
|---|
| 1986 | // Handle constants and arguments | 
|---|
| 1987 | Known = computeKnownFPClass(Val: V, Interested: fcAllFlags, CtxI: CxtI, Depth: Depth + 1); | 
|---|
| 1988 | Value *FoldedToConst = | 
|---|
| 1989 | getFPClassConstant(Ty: VTy, Mask: DemandedMask & Known.KnownFPClasses); | 
|---|
| 1990 | return FoldedToConst == V ? nullptr : FoldedToConst; | 
|---|
| 1991 | } | 
|---|
| 1992 |  | 
|---|
| 1993 | if (!I->hasOneUse()) | 
|---|
| 1994 | return nullptr; | 
|---|
| 1995 |  | 
|---|
| 1996 | if (auto *FPOp = dyn_cast<FPMathOperator>(Val: I)) { | 
|---|
| 1997 | if (FPOp->hasNoNaNs()) | 
|---|
| 1998 | DemandedMask &= ~fcNan; | 
|---|
| 1999 | if (FPOp->hasNoInfs()) | 
|---|
| 2000 | DemandedMask &= ~fcInf; | 
|---|
| 2001 | } | 
|---|
| 2002 | switch (I->getOpcode()) { | 
|---|
| 2003 | case Instruction::FNeg: { | 
|---|
| 2004 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: llvm::fneg(Mask: DemandedMask), Known, | 
|---|
| 2005 | Depth: Depth + 1)) | 
|---|
| 2006 | return I; | 
|---|
| 2007 | Known.fneg(); | 
|---|
| 2008 | break; | 
|---|
| 2009 | } | 
|---|
| 2010 | case Instruction::Call: { | 
|---|
| 2011 | CallInst *CI = cast<CallInst>(Val: I); | 
|---|
| 2012 | switch (CI->getIntrinsicID()) { | 
|---|
| 2013 | case Intrinsic::fabs: | 
|---|
| 2014 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: llvm::inverse_fabs(Mask: DemandedMask), Known, | 
|---|
| 2015 | Depth: Depth + 1)) | 
|---|
| 2016 | return I; | 
|---|
| 2017 | Known.fabs(); | 
|---|
| 2018 | break; | 
|---|
| 2019 | case Intrinsic::arithmetic_fence: | 
|---|
| 2020 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask, Known, Depth: Depth + 1)) | 
|---|
| 2021 | return I; | 
|---|
| 2022 | break; | 
|---|
| 2023 | case Intrinsic::copysign: { | 
|---|
| 2024 | // Flip on more potentially demanded classes | 
|---|
| 2025 | const FPClassTest DemandedMaskAnySign = llvm::unknown_sign(Mask: DemandedMask); | 
|---|
| 2026 | if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: DemandedMaskAnySign, Known, Depth: Depth + 1)) | 
|---|
| 2027 | return I; | 
|---|
| 2028 |  | 
|---|
| 2029 | if ((DemandedMask & fcNegative) == DemandedMask) { | 
|---|
| 2030 | // Roundabout way of replacing with fneg(fabs) | 
|---|
| 2031 | I->setOperand(i: 1, Val: ConstantFP::get(Ty: VTy, V: -1.0)); | 
|---|
| 2032 | return I; | 
|---|
| 2033 | } | 
|---|
| 2034 |  | 
|---|
| 2035 | if ((DemandedMask & fcPositive) == DemandedMask) { | 
|---|
| 2036 | // Roundabout way of replacing with fabs | 
|---|
| 2037 | I->setOperand(i: 1, Val: ConstantFP::getZero(Ty: VTy)); | 
|---|
| 2038 | return I; | 
|---|
| 2039 | } | 
|---|
| 2040 |  | 
|---|
| 2041 | KnownFPClass KnownSign = | 
|---|
| 2042 | computeKnownFPClass(Val: I->getOperand(i: 1), Interested: fcAllFlags, CtxI: CxtI, Depth: Depth + 1); | 
|---|
| 2043 | Known.copysign(Sign: KnownSign); | 
|---|
| 2044 | break; | 
|---|
| 2045 | } | 
|---|
| 2046 | default: | 
|---|
| 2047 | Known = computeKnownFPClass(Val: I, Interested: ~DemandedMask, CtxI: CxtI, Depth: Depth + 1); | 
|---|
| 2048 | break; | 
|---|
| 2049 | } | 
|---|
| 2050 |  | 
|---|
| 2051 | break; | 
|---|
| 2052 | } | 
|---|
| 2053 | case Instruction::Select: { | 
|---|
| 2054 | KnownFPClass KnownLHS, KnownRHS; | 
|---|
| 2055 | if (SimplifyDemandedFPClass(I, Op: 2, DemandedMask, Known&: KnownRHS, Depth: Depth + 1) || | 
|---|
| 2056 | SimplifyDemandedFPClass(I, Op: 1, DemandedMask, Known&: KnownLHS, Depth: Depth + 1)) | 
|---|
| 2057 | return I; | 
|---|
| 2058 |  | 
|---|
| 2059 | if (KnownLHS.isKnownNever(Mask: DemandedMask)) | 
|---|
| 2060 | return I->getOperand(i: 2); | 
|---|
| 2061 | if (KnownRHS.isKnownNever(Mask: DemandedMask)) | 
|---|
| 2062 | return I->getOperand(i: 1); | 
|---|
| 2063 |  | 
|---|
| 2064 | // TODO: Recognize clamping patterns | 
|---|
| 2065 | Known = KnownLHS | KnownRHS; | 
|---|
| 2066 | break; | 
|---|
| 2067 | } | 
|---|
| 2068 | default: | 
|---|
| 2069 | Known = computeKnownFPClass(Val: I, Interested: ~DemandedMask, CtxI: CxtI, Depth: Depth + 1); | 
|---|
| 2070 | break; | 
|---|
| 2071 | } | 
|---|
| 2072 |  | 
|---|
| 2073 | return getFPClassConstant(Ty: VTy, Mask: DemandedMask & Known.KnownFPClasses); | 
|---|
| 2074 | } | 
|---|
| 2075 |  | 
|---|
| 2076 | bool InstCombinerImpl::SimplifyDemandedFPClass(Instruction *I, unsigned OpNo, | 
|---|
| 2077 | FPClassTest DemandedMask, | 
|---|
| 2078 | KnownFPClass &Known, | 
|---|
| 2079 | unsigned Depth) { | 
|---|
| 2080 | Use &U = I->getOperandUse(i: OpNo); | 
|---|
| 2081 | Value *NewVal = | 
|---|
| 2082 | SimplifyDemandedUseFPClass(V: U.get(), DemandedMask, Known, CxtI: I, Depth); | 
|---|
| 2083 | if (!NewVal) | 
|---|
| 2084 | return false; | 
|---|
| 2085 | if (Instruction *OpInst = dyn_cast<Instruction>(Val&: U)) | 
|---|
| 2086 | salvageDebugInfo(I&: *OpInst); | 
|---|
| 2087 |  | 
|---|
| 2088 | replaceUse(U, NewValue: NewVal); | 
|---|
| 2089 | return true; | 
|---|
| 2090 | } | 
|---|
| 2091 |  | 
|---|