| 1 | //===- InstructionCombining.cpp - Combine multiple instructions -----------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // InstructionCombining - Combine instructions to form fewer, simple |
| 10 | // instructions. This pass does not modify the CFG. This pass is where |
| 11 | // algebraic simplification happens. |
| 12 | // |
| 13 | // This pass combines things like: |
| 14 | // %Y = add i32 %X, 1 |
| 15 | // %Z = add i32 %Y, 1 |
| 16 | // into: |
| 17 | // %Z = add i32 %X, 2 |
| 18 | // |
| 19 | // This is a simple worklist driven algorithm. |
| 20 | // |
| 21 | // This pass guarantees that the following canonicalizations are performed on |
| 22 | // the program: |
| 23 | // 1. If a binary operator has a constant operand, it is moved to the RHS |
| 24 | // 2. Bitwise operators with constant operands are always grouped so that |
| 25 | // shifts are performed first, then or's, then and's, then xor's. |
| 26 | // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible |
| 27 | // 4. All cmp instructions on boolean values are replaced with logical ops |
| 28 | // 5. add X, X is represented as (X*2) => (X << 1) |
| 29 | // 6. Multiplies with a power-of-two constant argument are transformed into |
| 30 | // shifts. |
| 31 | // ... etc. |
| 32 | // |
| 33 | //===----------------------------------------------------------------------===// |
| 34 | |
| 35 | #include "InstCombineInternal.h" |
| 36 | #include "llvm/ADT/APFloat.h" |
| 37 | #include "llvm/ADT/APInt.h" |
| 38 | #include "llvm/ADT/ArrayRef.h" |
| 39 | #include "llvm/ADT/DenseMap.h" |
| 40 | #include "llvm/ADT/SmallPtrSet.h" |
| 41 | #include "llvm/ADT/SmallVector.h" |
| 42 | #include "llvm/ADT/Statistic.h" |
| 43 | #include "llvm/Analysis/AliasAnalysis.h" |
| 44 | #include "llvm/Analysis/AssumptionCache.h" |
| 45 | #include "llvm/Analysis/BasicAliasAnalysis.h" |
| 46 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
| 47 | #include "llvm/Analysis/CFG.h" |
| 48 | #include "llvm/Analysis/ConstantFolding.h" |
| 49 | #include "llvm/Analysis/GlobalsModRef.h" |
| 50 | #include "llvm/Analysis/InstructionSimplify.h" |
| 51 | #include "llvm/Analysis/LastRunTrackingAnalysis.h" |
| 52 | #include "llvm/Analysis/LazyBlockFrequencyInfo.h" |
| 53 | #include "llvm/Analysis/MemoryBuiltins.h" |
| 54 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
| 55 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
| 56 | #include "llvm/Analysis/TargetFolder.h" |
| 57 | #include "llvm/Analysis/TargetLibraryInfo.h" |
| 58 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 59 | #include "llvm/Analysis/Utils/Local.h" |
| 60 | #include "llvm/Analysis/ValueTracking.h" |
| 61 | #include "llvm/Analysis/VectorUtils.h" |
| 62 | #include "llvm/IR/BasicBlock.h" |
| 63 | #include "llvm/IR/CFG.h" |
| 64 | #include "llvm/IR/Constant.h" |
| 65 | #include "llvm/IR/Constants.h" |
| 66 | #include "llvm/IR/DIBuilder.h" |
| 67 | #include "llvm/IR/DataLayout.h" |
| 68 | #include "llvm/IR/DebugInfo.h" |
| 69 | #include "llvm/IR/DerivedTypes.h" |
| 70 | #include "llvm/IR/Dominators.h" |
| 71 | #include "llvm/IR/EHPersonalities.h" |
| 72 | #include "llvm/IR/Function.h" |
| 73 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
| 74 | #include "llvm/IR/IRBuilder.h" |
| 75 | #include "llvm/IR/InstrTypes.h" |
| 76 | #include "llvm/IR/Instruction.h" |
| 77 | #include "llvm/IR/Instructions.h" |
| 78 | #include "llvm/IR/IntrinsicInst.h" |
| 79 | #include "llvm/IR/Intrinsics.h" |
| 80 | #include "llvm/IR/Metadata.h" |
| 81 | #include "llvm/IR/Operator.h" |
| 82 | #include "llvm/IR/PassManager.h" |
| 83 | #include "llvm/IR/PatternMatch.h" |
| 84 | #include "llvm/IR/Type.h" |
| 85 | #include "llvm/IR/Use.h" |
| 86 | #include "llvm/IR/User.h" |
| 87 | #include "llvm/IR/Value.h" |
| 88 | #include "llvm/IR/ValueHandle.h" |
| 89 | #include "llvm/InitializePasses.h" |
| 90 | #include "llvm/Support/Casting.h" |
| 91 | #include "llvm/Support/CommandLine.h" |
| 92 | #include "llvm/Support/Compiler.h" |
| 93 | #include "llvm/Support/Debug.h" |
| 94 | #include "llvm/Support/DebugCounter.h" |
| 95 | #include "llvm/Support/ErrorHandling.h" |
| 96 | #include "llvm/Support/KnownBits.h" |
| 97 | #include "llvm/Support/KnownFPClass.h" |
| 98 | #include "llvm/Support/raw_ostream.h" |
| 99 | #include "llvm/Transforms/InstCombine/InstCombine.h" |
| 100 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 101 | #include "llvm/Transforms/Utils/Local.h" |
| 102 | #include <algorithm> |
| 103 | #include <cassert> |
| 104 | #include <cstdint> |
| 105 | #include <memory> |
| 106 | #include <optional> |
| 107 | #include <string> |
| 108 | #include <utility> |
| 109 | |
| 110 | #define DEBUG_TYPE "instcombine" |
| 111 | #include "llvm/Transforms/Utils/InstructionWorklist.h" |
| 112 | #include <optional> |
| 113 | |
| 114 | using namespace llvm; |
| 115 | using namespace llvm::PatternMatch; |
| 116 | |
| 117 | STATISTIC(NumWorklistIterations, |
| 118 | "Number of instruction combining iterations performed" ); |
| 119 | STATISTIC(NumOneIteration, "Number of functions with one iteration" ); |
| 120 | STATISTIC(NumTwoIterations, "Number of functions with two iterations" ); |
| 121 | STATISTIC(NumThreeIterations, "Number of functions with three iterations" ); |
| 122 | STATISTIC(NumFourOrMoreIterations, |
| 123 | "Number of functions with four or more iterations" ); |
| 124 | |
| 125 | STATISTIC(NumCombined , "Number of insts combined" ); |
| 126 | STATISTIC(NumConstProp, "Number of constant folds" ); |
| 127 | STATISTIC(NumDeadInst , "Number of dead inst eliminated" ); |
| 128 | STATISTIC(NumSunkInst , "Number of instructions sunk" ); |
| 129 | STATISTIC(NumExpand, "Number of expansions" ); |
| 130 | STATISTIC(NumFactor , "Number of factorizations" ); |
| 131 | STATISTIC(NumReassoc , "Number of reassociations" ); |
| 132 | DEBUG_COUNTER(VisitCounter, "instcombine-visit" , |
| 133 | "Controls which instructions are visited" ); |
| 134 | |
| 135 | static cl::opt<bool> |
| 136 | EnableCodeSinking("instcombine-code-sinking" , cl::desc("Enable code sinking" ), |
| 137 | cl::init(Val: true)); |
| 138 | |
| 139 | static cl::opt<unsigned> MaxSinkNumUsers( |
| 140 | "instcombine-max-sink-users" , cl::init(Val: 32), |
| 141 | cl::desc("Maximum number of undroppable users for instruction sinking" )); |
| 142 | |
| 143 | static cl::opt<unsigned> |
| 144 | MaxArraySize("instcombine-maxarray-size" , cl::init(Val: 1024), |
| 145 | cl::desc("Maximum array size considered when doing a combine" )); |
| 146 | |
| 147 | // FIXME: Remove this flag when it is no longer necessary to convert |
| 148 | // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false |
| 149 | // increases variable availability at the cost of accuracy. Variables that |
| 150 | // cannot be promoted by mem2reg or SROA will be described as living in memory |
| 151 | // for their entire lifetime. However, passes like DSE and instcombine can |
| 152 | // delete stores to the alloca, leading to misleading and inaccurate debug |
| 153 | // information. This flag can be removed when those passes are fixed. |
| 154 | static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare" , |
| 155 | cl::Hidden, cl::init(Val: true)); |
| 156 | |
| 157 | std::optional<Instruction *> |
| 158 | InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) { |
| 159 | // Handle target specific intrinsics |
| 160 | if (II.getCalledFunction()->isTargetIntrinsic()) { |
| 161 | return TTIForTargetIntrinsicsOnly.instCombineIntrinsic(IC&: *this, II); |
| 162 | } |
| 163 | return std::nullopt; |
| 164 | } |
| 165 | |
| 166 | std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic( |
| 167 | IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, |
| 168 | bool &KnownBitsComputed) { |
| 169 | // Handle target specific intrinsics |
| 170 | if (II.getCalledFunction()->isTargetIntrinsic()) { |
| 171 | return TTIForTargetIntrinsicsOnly.simplifyDemandedUseBitsIntrinsic( |
| 172 | IC&: *this, II, DemandedMask, Known, KnownBitsComputed); |
| 173 | } |
| 174 | return std::nullopt; |
| 175 | } |
| 176 | |
| 177 | std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic( |
| 178 | IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts, |
| 179 | APInt &PoisonElts2, APInt &PoisonElts3, |
| 180 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
| 181 | SimplifyAndSetOp) { |
| 182 | // Handle target specific intrinsics |
| 183 | if (II.getCalledFunction()->isTargetIntrinsic()) { |
| 184 | return TTIForTargetIntrinsicsOnly.simplifyDemandedVectorEltsIntrinsic( |
| 185 | IC&: *this, II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3, |
| 186 | SimplifyAndSetOp); |
| 187 | } |
| 188 | return std::nullopt; |
| 189 | } |
| 190 | |
| 191 | bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { |
| 192 | // Approved exception for TTI use: This queries a legality property of the |
| 193 | // target, not an profitability heuristic. Ideally this should be part of |
| 194 | // DataLayout instead. |
| 195 | return TTIForTargetIntrinsicsOnly.isValidAddrSpaceCast(FromAS, ToAS); |
| 196 | } |
| 197 | |
| 198 | Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) { |
| 199 | if (!RewriteGEP) |
| 200 | return llvm::emitGEPOffset(Builder: &Builder, DL, GEP); |
| 201 | |
| 202 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 203 | auto *Inst = dyn_cast<Instruction>(Val: GEP); |
| 204 | if (Inst) |
| 205 | Builder.SetInsertPoint(Inst); |
| 206 | |
| 207 | Value *Offset = EmitGEPOffset(GEP); |
| 208 | // Rewrite non-trivial GEPs to avoid duplicating the offset arithmetic. |
| 209 | if (Inst && !GEP->hasAllConstantIndices() && |
| 210 | !GEP->getSourceElementType()->isIntegerTy(Bitwidth: 8)) { |
| 211 | replaceInstUsesWith( |
| 212 | I&: *Inst, V: Builder.CreateGEP(Ty: Builder.getInt8Ty(), Ptr: GEP->getPointerOperand(), |
| 213 | IdxList: Offset, Name: "" , NW: GEP->getNoWrapFlags())); |
| 214 | eraseInstFromFunction(I&: *Inst); |
| 215 | } |
| 216 | return Offset; |
| 217 | } |
| 218 | |
| 219 | Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator *> GEPs, |
| 220 | GEPNoWrapFlags NW, Type *IdxTy, |
| 221 | bool RewriteGEPs) { |
| 222 | Value *Sum = nullptr; |
| 223 | for (GEPOperator *GEP : reverse(C&: GEPs)) { |
| 224 | Value *Offset = EmitGEPOffset(GEP, RewriteGEP: RewriteGEPs); |
| 225 | if (Offset->getType() != IdxTy) |
| 226 | Offset = Builder.CreateVectorSplat( |
| 227 | EC: cast<VectorType>(Val: IdxTy)->getElementCount(), V: Offset); |
| 228 | if (Sum) |
| 229 | Sum = Builder.CreateAdd(LHS: Sum, RHS: Offset, Name: "" , HasNUW: NW.hasNoUnsignedWrap(), |
| 230 | HasNSW: NW.isInBounds()); |
| 231 | else |
| 232 | Sum = Offset; |
| 233 | } |
| 234 | if (!Sum) |
| 235 | return Constant::getNullValue(Ty: IdxTy); |
| 236 | return Sum; |
| 237 | } |
| 238 | |
| 239 | /// Legal integers and common types are considered desirable. This is used to |
| 240 | /// avoid creating instructions with types that may not be supported well by the |
| 241 | /// the backend. |
| 242 | /// NOTE: This treats i8, i16 and i32 specially because they are common |
| 243 | /// types in frontend languages. |
| 244 | bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const { |
| 245 | switch (BitWidth) { |
| 246 | case 8: |
| 247 | case 16: |
| 248 | case 32: |
| 249 | return true; |
| 250 | default: |
| 251 | return DL.isLegalInteger(Width: BitWidth); |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | /// Return true if it is desirable to convert an integer computation from a |
| 256 | /// given bit width to a new bit width. |
| 257 | /// We don't want to convert from a legal or desirable type (like i8) to an |
| 258 | /// illegal type or from a smaller to a larger illegal type. A width of '1' |
| 259 | /// is always treated as a desirable type because i1 is a fundamental type in |
| 260 | /// IR, and there are many specialized optimizations for i1 types. |
| 261 | /// Common/desirable widths are equally treated as legal to convert to, in |
| 262 | /// order to open up more combining opportunities. |
| 263 | bool InstCombinerImpl::shouldChangeType(unsigned FromWidth, |
| 264 | unsigned ToWidth) const { |
| 265 | bool FromLegal = FromWidth == 1 || DL.isLegalInteger(Width: FromWidth); |
| 266 | bool ToLegal = ToWidth == 1 || DL.isLegalInteger(Width: ToWidth); |
| 267 | |
| 268 | // Convert to desirable widths even if they are not legal types. |
| 269 | // Only shrink types, to prevent infinite loops. |
| 270 | if (ToWidth < FromWidth && isDesirableIntType(BitWidth: ToWidth)) |
| 271 | return true; |
| 272 | |
| 273 | // If this is a legal or desiable integer from type, and the result would be |
| 274 | // an illegal type, don't do the transformation. |
| 275 | if ((FromLegal || isDesirableIntType(BitWidth: FromWidth)) && !ToLegal) |
| 276 | return false; |
| 277 | |
| 278 | // Otherwise, if both are illegal, do not increase the size of the result. We |
| 279 | // do allow things like i160 -> i64, but not i64 -> i160. |
| 280 | if (!FromLegal && !ToLegal && ToWidth > FromWidth) |
| 281 | return false; |
| 282 | |
| 283 | return true; |
| 284 | } |
| 285 | |
| 286 | /// Return true if it is desirable to convert a computation from 'From' to 'To'. |
| 287 | /// We don't want to convert from a legal to an illegal type or from a smaller |
| 288 | /// to a larger illegal type. i1 is always treated as a legal type because it is |
| 289 | /// a fundamental type in IR, and there are many specialized optimizations for |
| 290 | /// i1 types. |
| 291 | bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const { |
| 292 | // TODO: This could be extended to allow vectors. Datalayout changes might be |
| 293 | // needed to properly support that. |
| 294 | if (!From->isIntegerTy() || !To->isIntegerTy()) |
| 295 | return false; |
| 296 | |
| 297 | unsigned FromWidth = From->getPrimitiveSizeInBits(); |
| 298 | unsigned ToWidth = To->getPrimitiveSizeInBits(); |
| 299 | return shouldChangeType(FromWidth, ToWidth); |
| 300 | } |
| 301 | |
| 302 | // Return true, if No Signed Wrap should be maintained for I. |
| 303 | // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C", |
| 304 | // where both B and C should be ConstantInts, results in a constant that does |
| 305 | // not overflow. This function only handles the Add/Sub/Mul opcodes. For |
| 306 | // all other opcodes, the function conservatively returns false. |
| 307 | static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { |
| 308 | auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I); |
| 309 | if (!OBO || !OBO->hasNoSignedWrap()) |
| 310 | return false; |
| 311 | |
| 312 | const APInt *BVal, *CVal; |
| 313 | if (!match(V: B, P: m_APInt(Res&: BVal)) || !match(V: C, P: m_APInt(Res&: CVal))) |
| 314 | return false; |
| 315 | |
| 316 | // We reason about Add/Sub/Mul Only. |
| 317 | bool Overflow = false; |
| 318 | switch (I.getOpcode()) { |
| 319 | case Instruction::Add: |
| 320 | (void)BVal->sadd_ov(RHS: *CVal, Overflow); |
| 321 | break; |
| 322 | case Instruction::Sub: |
| 323 | (void)BVal->ssub_ov(RHS: *CVal, Overflow); |
| 324 | break; |
| 325 | case Instruction::Mul: |
| 326 | (void)BVal->smul_ov(RHS: *CVal, Overflow); |
| 327 | break; |
| 328 | default: |
| 329 | // Conservatively return false for other opcodes. |
| 330 | return false; |
| 331 | } |
| 332 | return !Overflow; |
| 333 | } |
| 334 | |
| 335 | static bool hasNoUnsignedWrap(BinaryOperator &I) { |
| 336 | auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I); |
| 337 | return OBO && OBO->hasNoUnsignedWrap(); |
| 338 | } |
| 339 | |
| 340 | static bool hasNoSignedWrap(BinaryOperator &I) { |
| 341 | auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I); |
| 342 | return OBO && OBO->hasNoSignedWrap(); |
| 343 | } |
| 344 | |
| 345 | /// Conservatively clears subclassOptionalData after a reassociation or |
| 346 | /// commutation. We preserve fast-math flags when applicable as they can be |
| 347 | /// preserved. |
| 348 | static void ClearSubclassDataAfterReassociation(BinaryOperator &I) { |
| 349 | FPMathOperator *FPMO = dyn_cast<FPMathOperator>(Val: &I); |
| 350 | if (!FPMO) { |
| 351 | I.clearSubclassOptionalData(); |
| 352 | return; |
| 353 | } |
| 354 | |
| 355 | FastMathFlags FMF = I.getFastMathFlags(); |
| 356 | I.clearSubclassOptionalData(); |
| 357 | I.setFastMathFlags(FMF); |
| 358 | } |
| 359 | |
| 360 | /// Combine constant operands of associative operations either before or after a |
| 361 | /// cast to eliminate one of the associative operations: |
| 362 | /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2))) |
| 363 | /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2)) |
| 364 | static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, |
| 365 | InstCombinerImpl &IC) { |
| 366 | auto *Cast = dyn_cast<CastInst>(Val: BinOp1->getOperand(i_nocapture: 0)); |
| 367 | if (!Cast || !Cast->hasOneUse()) |
| 368 | return false; |
| 369 | |
| 370 | // TODO: Enhance logic for other casts and remove this check. |
| 371 | auto CastOpcode = Cast->getOpcode(); |
| 372 | if (CastOpcode != Instruction::ZExt) |
| 373 | return false; |
| 374 | |
| 375 | // TODO: Enhance logic for other BinOps and remove this check. |
| 376 | if (!BinOp1->isBitwiseLogicOp()) |
| 377 | return false; |
| 378 | |
| 379 | auto AssocOpcode = BinOp1->getOpcode(); |
| 380 | auto *BinOp2 = dyn_cast<BinaryOperator>(Val: Cast->getOperand(i_nocapture: 0)); |
| 381 | if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode) |
| 382 | return false; |
| 383 | |
| 384 | Constant *C1, *C2; |
| 385 | if (!match(V: BinOp1->getOperand(i_nocapture: 1), P: m_Constant(C&: C1)) || |
| 386 | !match(V: BinOp2->getOperand(i_nocapture: 1), P: m_Constant(C&: C2))) |
| 387 | return false; |
| 388 | |
| 389 | // TODO: This assumes a zext cast. |
| 390 | // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2 |
| 391 | // to the destination type might lose bits. |
| 392 | |
| 393 | // Fold the constants together in the destination type: |
| 394 | // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC) |
| 395 | const DataLayout &DL = IC.getDataLayout(); |
| 396 | Type *DestTy = C1->getType(); |
| 397 | Constant *CastC2 = ConstantFoldCastOperand(Opcode: CastOpcode, C: C2, DestTy, DL); |
| 398 | if (!CastC2) |
| 399 | return false; |
| 400 | Constant *FoldedC = ConstantFoldBinaryOpOperands(Opcode: AssocOpcode, LHS: C1, RHS: CastC2, DL); |
| 401 | if (!FoldedC) |
| 402 | return false; |
| 403 | |
| 404 | IC.replaceOperand(I&: *Cast, OpNum: 0, V: BinOp2->getOperand(i_nocapture: 0)); |
| 405 | IC.replaceOperand(I&: *BinOp1, OpNum: 1, V: FoldedC); |
| 406 | BinOp1->dropPoisonGeneratingFlags(); |
| 407 | Cast->dropPoisonGeneratingFlags(); |
| 408 | return true; |
| 409 | } |
| 410 | |
| 411 | // Simplifies IntToPtr/PtrToInt RoundTrip Cast. |
| 412 | // inttoptr ( ptrtoint (x) ) --> x |
| 413 | Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) { |
| 414 | auto *IntToPtr = dyn_cast<IntToPtrInst>(Val); |
| 415 | if (IntToPtr && DL.getTypeSizeInBits(Ty: IntToPtr->getDestTy()) == |
| 416 | DL.getTypeSizeInBits(Ty: IntToPtr->getSrcTy())) { |
| 417 | auto *PtrToInt = dyn_cast<PtrToIntInst>(Val: IntToPtr->getOperand(i_nocapture: 0)); |
| 418 | Type *CastTy = IntToPtr->getDestTy(); |
| 419 | if (PtrToInt && |
| 420 | CastTy->getPointerAddressSpace() == |
| 421 | PtrToInt->getSrcTy()->getPointerAddressSpace() && |
| 422 | DL.getTypeSizeInBits(Ty: PtrToInt->getSrcTy()) == |
| 423 | DL.getTypeSizeInBits(Ty: PtrToInt->getDestTy())) |
| 424 | return PtrToInt->getOperand(i_nocapture: 0); |
| 425 | } |
| 426 | return nullptr; |
| 427 | } |
| 428 | |
| 429 | /// This performs a few simplifications for operators that are associative or |
| 430 | /// commutative: |
| 431 | /// |
| 432 | /// Commutative operators: |
| 433 | /// |
| 434 | /// 1. Order operands such that they are listed from right (least complex) to |
| 435 | /// left (most complex). This puts constants before unary operators before |
| 436 | /// binary operators. |
| 437 | /// |
| 438 | /// Associative operators: |
| 439 | /// |
| 440 | /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. |
| 441 | /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. |
| 442 | /// |
| 443 | /// Associative and commutative operators: |
| 444 | /// |
| 445 | /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. |
| 446 | /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. |
| 447 | /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" |
| 448 | /// if C1 and C2 are constants. |
| 449 | bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) { |
| 450 | Instruction::BinaryOps Opcode = I.getOpcode(); |
| 451 | bool Changed = false; |
| 452 | |
| 453 | do { |
| 454 | // Order operands such that they are listed from right (least complex) to |
| 455 | // left (most complex). This puts constants before unary operators before |
| 456 | // binary operators. |
| 457 | if (I.isCommutative() && getComplexity(V: I.getOperand(i_nocapture: 0)) < |
| 458 | getComplexity(V: I.getOperand(i_nocapture: 1))) |
| 459 | Changed = !I.swapOperands(); |
| 460 | |
| 461 | if (I.isCommutative()) { |
| 462 | if (auto Pair = matchSymmetricPair(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1))) { |
| 463 | replaceOperand(I, OpNum: 0, V: Pair->first); |
| 464 | replaceOperand(I, OpNum: 1, V: Pair->second); |
| 465 | Changed = true; |
| 466 | } |
| 467 | } |
| 468 | |
| 469 | BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 0)); |
| 470 | BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 1)); |
| 471 | |
| 472 | if (I.isAssociative()) { |
| 473 | // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. |
| 474 | if (Op0 && Op0->getOpcode() == Opcode) { |
| 475 | Value *A = Op0->getOperand(i_nocapture: 0); |
| 476 | Value *B = Op0->getOperand(i_nocapture: 1); |
| 477 | Value *C = I.getOperand(i_nocapture: 1); |
| 478 | |
| 479 | // Does "B op C" simplify? |
| 480 | if (Value *V = simplifyBinOp(Opcode, LHS: B, RHS: C, Q: SQ.getWithInstruction(I: &I))) { |
| 481 | // It simplifies to V. Form "A op V". |
| 482 | replaceOperand(I, OpNum: 0, V: A); |
| 483 | replaceOperand(I, OpNum: 1, V); |
| 484 | bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(I&: *Op0); |
| 485 | bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(I&: *Op0); |
| 486 | |
| 487 | // Conservatively clear all optional flags since they may not be |
| 488 | // preserved by the reassociation. Reset nsw/nuw based on the above |
| 489 | // analysis. |
| 490 | ClearSubclassDataAfterReassociation(I); |
| 491 | |
| 492 | // Note: this is only valid because SimplifyBinOp doesn't look at |
| 493 | // the operands to Op0. |
| 494 | if (IsNUW) |
| 495 | I.setHasNoUnsignedWrap(true); |
| 496 | |
| 497 | if (IsNSW) |
| 498 | I.setHasNoSignedWrap(true); |
| 499 | |
| 500 | Changed = true; |
| 501 | ++NumReassoc; |
| 502 | continue; |
| 503 | } |
| 504 | } |
| 505 | |
| 506 | // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. |
| 507 | if (Op1 && Op1->getOpcode() == Opcode) { |
| 508 | Value *A = I.getOperand(i_nocapture: 0); |
| 509 | Value *B = Op1->getOperand(i_nocapture: 0); |
| 510 | Value *C = Op1->getOperand(i_nocapture: 1); |
| 511 | |
| 512 | // Does "A op B" simplify? |
| 513 | if (Value *V = simplifyBinOp(Opcode, LHS: A, RHS: B, Q: SQ.getWithInstruction(I: &I))) { |
| 514 | // It simplifies to V. Form "V op C". |
| 515 | replaceOperand(I, OpNum: 0, V); |
| 516 | replaceOperand(I, OpNum: 1, V: C); |
| 517 | // Conservatively clear the optional flags, since they may not be |
| 518 | // preserved by the reassociation. |
| 519 | ClearSubclassDataAfterReassociation(I); |
| 520 | Changed = true; |
| 521 | ++NumReassoc; |
| 522 | continue; |
| 523 | } |
| 524 | } |
| 525 | } |
| 526 | |
| 527 | if (I.isAssociative() && I.isCommutative()) { |
| 528 | if (simplifyAssocCastAssoc(BinOp1: &I, IC&: *this)) { |
| 529 | Changed = true; |
| 530 | ++NumReassoc; |
| 531 | continue; |
| 532 | } |
| 533 | |
| 534 | // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. |
| 535 | if (Op0 && Op0->getOpcode() == Opcode) { |
| 536 | Value *A = Op0->getOperand(i_nocapture: 0); |
| 537 | Value *B = Op0->getOperand(i_nocapture: 1); |
| 538 | Value *C = I.getOperand(i_nocapture: 1); |
| 539 | |
| 540 | // Does "C op A" simplify? |
| 541 | if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) { |
| 542 | // It simplifies to V. Form "V op B". |
| 543 | replaceOperand(I, OpNum: 0, V); |
| 544 | replaceOperand(I, OpNum: 1, V: B); |
| 545 | // Conservatively clear the optional flags, since they may not be |
| 546 | // preserved by the reassociation. |
| 547 | ClearSubclassDataAfterReassociation(I); |
| 548 | Changed = true; |
| 549 | ++NumReassoc; |
| 550 | continue; |
| 551 | } |
| 552 | } |
| 553 | |
| 554 | // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. |
| 555 | if (Op1 && Op1->getOpcode() == Opcode) { |
| 556 | Value *A = I.getOperand(i_nocapture: 0); |
| 557 | Value *B = Op1->getOperand(i_nocapture: 0); |
| 558 | Value *C = Op1->getOperand(i_nocapture: 1); |
| 559 | |
| 560 | // Does "C op A" simplify? |
| 561 | if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) { |
| 562 | // It simplifies to V. Form "B op V". |
| 563 | replaceOperand(I, OpNum: 0, V: B); |
| 564 | replaceOperand(I, OpNum: 1, V); |
| 565 | // Conservatively clear the optional flags, since they may not be |
| 566 | // preserved by the reassociation. |
| 567 | ClearSubclassDataAfterReassociation(I); |
| 568 | Changed = true; |
| 569 | ++NumReassoc; |
| 570 | continue; |
| 571 | } |
| 572 | } |
| 573 | |
| 574 | // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" |
| 575 | // if C1 and C2 are constants. |
| 576 | Value *A, *B; |
| 577 | Constant *C1, *C2, *CRes; |
| 578 | if (Op0 && Op1 && |
| 579 | Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && |
| 580 | match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: A), R: m_Constant(C&: C1)))) && |
| 581 | match(V: Op1, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: B), R: m_Constant(C&: C2)))) && |
| 582 | (CRes = ConstantFoldBinaryOpOperands(Opcode, LHS: C1, RHS: C2, DL))) { |
| 583 | bool IsNUW = hasNoUnsignedWrap(I) && |
| 584 | hasNoUnsignedWrap(I&: *Op0) && |
| 585 | hasNoUnsignedWrap(I&: *Op1); |
| 586 | BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ? |
| 587 | BinaryOperator::CreateNUW(Opc: Opcode, V1: A, V2: B) : |
| 588 | BinaryOperator::Create(Op: Opcode, S1: A, S2: B); |
| 589 | |
| 590 | if (isa<FPMathOperator>(Val: NewBO)) { |
| 591 | FastMathFlags Flags = I.getFastMathFlags() & |
| 592 | Op0->getFastMathFlags() & |
| 593 | Op1->getFastMathFlags(); |
| 594 | NewBO->setFastMathFlags(Flags); |
| 595 | } |
| 596 | InsertNewInstWith(New: NewBO, Old: I.getIterator()); |
| 597 | NewBO->takeName(V: Op1); |
| 598 | replaceOperand(I, OpNum: 0, V: NewBO); |
| 599 | replaceOperand(I, OpNum: 1, V: CRes); |
| 600 | // Conservatively clear the optional flags, since they may not be |
| 601 | // preserved by the reassociation. |
| 602 | ClearSubclassDataAfterReassociation(I); |
| 603 | if (IsNUW) |
| 604 | I.setHasNoUnsignedWrap(true); |
| 605 | |
| 606 | Changed = true; |
| 607 | continue; |
| 608 | } |
| 609 | } |
| 610 | |
| 611 | // No further simplifications. |
| 612 | return Changed; |
| 613 | } while (true); |
| 614 | } |
| 615 | |
| 616 | /// Return whether "X LOp (Y ROp Z)" is always equal to |
| 617 | /// "(X LOp Y) ROp (X LOp Z)". |
| 618 | static bool leftDistributesOverRight(Instruction::BinaryOps LOp, |
| 619 | Instruction::BinaryOps ROp) { |
| 620 | // X & (Y | Z) <--> (X & Y) | (X & Z) |
| 621 | // X & (Y ^ Z) <--> (X & Y) ^ (X & Z) |
| 622 | if (LOp == Instruction::And) |
| 623 | return ROp == Instruction::Or || ROp == Instruction::Xor; |
| 624 | |
| 625 | // X | (Y & Z) <--> (X | Y) & (X | Z) |
| 626 | if (LOp == Instruction::Or) |
| 627 | return ROp == Instruction::And; |
| 628 | |
| 629 | // X * (Y + Z) <--> (X * Y) + (X * Z) |
| 630 | // X * (Y - Z) <--> (X * Y) - (X * Z) |
| 631 | if (LOp == Instruction::Mul) |
| 632 | return ROp == Instruction::Add || ROp == Instruction::Sub; |
| 633 | |
| 634 | return false; |
| 635 | } |
| 636 | |
| 637 | /// Return whether "(X LOp Y) ROp Z" is always equal to |
| 638 | /// "(X ROp Z) LOp (Y ROp Z)". |
| 639 | static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, |
| 640 | Instruction::BinaryOps ROp) { |
| 641 | if (Instruction::isCommutative(Opcode: ROp)) |
| 642 | return leftDistributesOverRight(LOp: ROp, ROp: LOp); |
| 643 | |
| 644 | // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts. |
| 645 | return Instruction::isBitwiseLogicOp(Opcode: LOp) && Instruction::isShift(Opcode: ROp); |
| 646 | |
| 647 | // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z", |
| 648 | // but this requires knowing that the addition does not overflow and other |
| 649 | // such subtleties. |
| 650 | } |
| 651 | |
| 652 | /// This function returns identity value for given opcode, which can be used to |
| 653 | /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1). |
| 654 | static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) { |
| 655 | if (isa<Constant>(Val: V)) |
| 656 | return nullptr; |
| 657 | |
| 658 | return ConstantExpr::getBinOpIdentity(Opcode, Ty: V->getType()); |
| 659 | } |
| 660 | |
| 661 | /// This function predicates factorization using distributive laws. By default, |
| 662 | /// it just returns the 'Op' inputs. But for special-cases like |
| 663 | /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add |
| 664 | /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to |
| 665 | /// allow more factorization opportunities. |
| 666 | static Instruction::BinaryOps |
| 667 | getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, |
| 668 | Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) { |
| 669 | assert(Op && "Expected a binary operator" ); |
| 670 | LHS = Op->getOperand(i_nocapture: 0); |
| 671 | RHS = Op->getOperand(i_nocapture: 1); |
| 672 | if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) { |
| 673 | Constant *C; |
| 674 | if (match(V: Op, P: m_Shl(L: m_Value(), R: m_ImmConstant(C)))) { |
| 675 | // X << C --> X * (1 << C) |
| 676 | RHS = ConstantFoldBinaryInstruction( |
| 677 | Opcode: Instruction::Shl, V1: ConstantInt::get(Ty: Op->getType(), V: 1), V2: C); |
| 678 | assert(RHS && "Constant folding of immediate constants failed" ); |
| 679 | return Instruction::Mul; |
| 680 | } |
| 681 | // TODO: We can add other conversions e.g. shr => div etc. |
| 682 | } |
| 683 | if (Instruction::isBitwiseLogicOp(Opcode: TopOpcode)) { |
| 684 | if (OtherOp && OtherOp->getOpcode() == Instruction::AShr && |
| 685 | match(V: Op, P: m_LShr(L: m_NonNegative(), R: m_Value()))) { |
| 686 | // lshr nneg C, X --> ashr nneg C, X |
| 687 | return Instruction::AShr; |
| 688 | } |
| 689 | } |
| 690 | return Op->getOpcode(); |
| 691 | } |
| 692 | |
| 693 | /// This tries to simplify binary operations by factorizing out common terms |
| 694 | /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). |
| 695 | static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, |
| 696 | InstCombiner::BuilderTy &Builder, |
| 697 | Instruction::BinaryOps InnerOpcode, Value *A, |
| 698 | Value *B, Value *C, Value *D) { |
| 699 | assert(A && B && C && D && "All values must be provided" ); |
| 700 | |
| 701 | Value *V = nullptr; |
| 702 | Value *RetVal = nullptr; |
| 703 | Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1); |
| 704 | Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); |
| 705 | |
| 706 | // Does "X op' Y" always equal "Y op' X"? |
| 707 | bool InnerCommutative = Instruction::isCommutative(Opcode: InnerOpcode); |
| 708 | |
| 709 | // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? |
| 710 | if (leftDistributesOverRight(LOp: InnerOpcode, ROp: TopLevelOpcode)) { |
| 711 | // Does the instruction have the form "(A op' B) op (A op' D)" or, in the |
| 712 | // commutative case, "(A op' B) op (C op' A)"? |
| 713 | if (A == C || (InnerCommutative && A == D)) { |
| 714 | if (A != C) |
| 715 | std::swap(a&: C, b&: D); |
| 716 | // Consider forming "A op' (B op D)". |
| 717 | // If "B op D" simplifies then it can be formed with no cost. |
| 718 | V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: D, Q: SQ.getWithInstruction(I: &I)); |
| 719 | |
| 720 | // If "B op D" doesn't simplify then only go on if one of the existing |
| 721 | // operations "A op' B" and "C op' D" will be zapped as no longer used. |
| 722 | if (!V && (LHS->hasOneUse() || RHS->hasOneUse())) |
| 723 | V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: D, Name: RHS->getName()); |
| 724 | if (V) |
| 725 | RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: V); |
| 726 | } |
| 727 | } |
| 728 | |
| 729 | // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? |
| 730 | if (!RetVal && rightDistributesOverLeft(LOp: TopLevelOpcode, ROp: InnerOpcode)) { |
| 731 | // Does the instruction have the form "(A op' B) op (C op' B)" or, in the |
| 732 | // commutative case, "(A op' B) op (B op' D)"? |
| 733 | if (B == D || (InnerCommutative && B == C)) { |
| 734 | if (B != D) |
| 735 | std::swap(a&: C, b&: D); |
| 736 | // Consider forming "(A op C) op' B". |
| 737 | // If "A op C" simplifies then it can be formed with no cost. |
| 738 | V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQ.getWithInstruction(I: &I)); |
| 739 | |
| 740 | // If "A op C" doesn't simplify then only go on if one of the existing |
| 741 | // operations "A op' B" and "C op' D" will be zapped as no longer used. |
| 742 | if (!V && (LHS->hasOneUse() || RHS->hasOneUse())) |
| 743 | V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C, Name: LHS->getName()); |
| 744 | if (V) |
| 745 | RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: V, RHS: B); |
| 746 | } |
| 747 | } |
| 748 | |
| 749 | if (!RetVal) |
| 750 | return nullptr; |
| 751 | |
| 752 | ++NumFactor; |
| 753 | RetVal->takeName(V: &I); |
| 754 | |
| 755 | // Try to add no-overflow flags to the final value. |
| 756 | if (isa<BinaryOperator>(Val: RetVal)) { |
| 757 | bool HasNSW = false; |
| 758 | bool HasNUW = false; |
| 759 | if (isa<OverflowingBinaryOperator>(Val: &I)) { |
| 760 | HasNSW = I.hasNoSignedWrap(); |
| 761 | HasNUW = I.hasNoUnsignedWrap(); |
| 762 | } |
| 763 | if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(Val: LHS)) { |
| 764 | HasNSW &= LOBO->hasNoSignedWrap(); |
| 765 | HasNUW &= LOBO->hasNoUnsignedWrap(); |
| 766 | } |
| 767 | |
| 768 | if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(Val: RHS)) { |
| 769 | HasNSW &= ROBO->hasNoSignedWrap(); |
| 770 | HasNUW &= ROBO->hasNoUnsignedWrap(); |
| 771 | } |
| 772 | |
| 773 | if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) { |
| 774 | // We can propagate 'nsw' if we know that |
| 775 | // %Y = mul nsw i16 %X, C |
| 776 | // %Z = add nsw i16 %Y, %X |
| 777 | // => |
| 778 | // %Z = mul nsw i16 %X, C+1 |
| 779 | // |
| 780 | // iff C+1 isn't INT_MIN |
| 781 | const APInt *CInt; |
| 782 | if (match(V, P: m_APInt(Res&: CInt)) && !CInt->isMinSignedValue()) |
| 783 | cast<Instruction>(Val: RetVal)->setHasNoSignedWrap(HasNSW); |
| 784 | |
| 785 | // nuw can be propagated with any constant or nuw value. |
| 786 | cast<Instruction>(Val: RetVal)->setHasNoUnsignedWrap(HasNUW); |
| 787 | } |
| 788 | } |
| 789 | return RetVal; |
| 790 | } |
| 791 | |
| 792 | // If `I` has one Const operand and the other matches `(ctpop (not x))`, |
| 793 | // replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`. |
| 794 | // This is only useful is the new subtract can fold so we only handle the |
| 795 | // following cases: |
| 796 | // 1) (add/sub/disjoint_or C, (ctpop (not x)) |
| 797 | // -> (add/sub/disjoint_or C', (ctpop x)) |
| 798 | // 1) (cmp pred C, (ctpop (not x)) |
| 799 | // -> (cmp pred C', (ctpop x)) |
| 800 | Instruction *InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction *I) { |
| 801 | unsigned Opc = I->getOpcode(); |
| 802 | unsigned ConstIdx = 1; |
| 803 | switch (Opc) { |
| 804 | default: |
| 805 | return nullptr; |
| 806 | // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x)) |
| 807 | // We can fold the BitWidth(x) with add/sub/icmp as long the other operand |
| 808 | // is constant. |
| 809 | case Instruction::Sub: |
| 810 | ConstIdx = 0; |
| 811 | break; |
| 812 | case Instruction::ICmp: |
| 813 | // Signed predicates aren't correct in some edge cases like for i2 types, as |
| 814 | // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed |
| 815 | // comparisons against it are simplfied to unsigned. |
| 816 | if (cast<ICmpInst>(Val: I)->isSigned()) |
| 817 | return nullptr; |
| 818 | break; |
| 819 | case Instruction::Or: |
| 820 | if (!match(V: I, P: m_DisjointOr(L: m_Value(), R: m_Value()))) |
| 821 | return nullptr; |
| 822 | [[fallthrough]]; |
| 823 | case Instruction::Add: |
| 824 | break; |
| 825 | } |
| 826 | |
| 827 | Value *Op; |
| 828 | // Find ctpop. |
| 829 | if (!match(V: I->getOperand(i: 1 - ConstIdx), |
| 830 | P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: Op))))) |
| 831 | return nullptr; |
| 832 | |
| 833 | Constant *C; |
| 834 | // Check other operand is ImmConstant. |
| 835 | if (!match(V: I->getOperand(i: ConstIdx), P: m_ImmConstant(C))) |
| 836 | return nullptr; |
| 837 | |
| 838 | Type *Ty = Op->getType(); |
| 839 | Constant *BitWidthC = ConstantInt::get(Ty, V: Ty->getScalarSizeInBits()); |
| 840 | // Need extra check for icmp. Note if this check is true, it generally means |
| 841 | // the icmp will simplify to true/false. |
| 842 | if (Opc == Instruction::ICmp && !cast<ICmpInst>(Val: I)->isEquality()) { |
| 843 | Constant *Cmp = |
| 844 | ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_UGT, LHS: C, RHS: BitWidthC, DL); |
| 845 | if (!Cmp || !Cmp->isZeroValue()) |
| 846 | return nullptr; |
| 847 | } |
| 848 | |
| 849 | // Check we can invert `(not x)` for free. |
| 850 | bool Consumes = false; |
| 851 | if (!isFreeToInvert(V: Op, WillInvertAllUses: Op->hasOneUse(), DoesConsume&: Consumes) || !Consumes) |
| 852 | return nullptr; |
| 853 | Value *NotOp = getFreelyInverted(V: Op, WillInvertAllUses: Op->hasOneUse(), Builder: &Builder); |
| 854 | assert(NotOp != nullptr && |
| 855 | "Desync between isFreeToInvert and getFreelyInverted" ); |
| 856 | |
| 857 | Value *CtpopOfNotOp = Builder.CreateIntrinsic(RetTy: Ty, ID: Intrinsic::ctpop, Args: NotOp); |
| 858 | |
| 859 | Value *R = nullptr; |
| 860 | |
| 861 | // Do the transformation here to avoid potentially introducing an infinite |
| 862 | // loop. |
| 863 | switch (Opc) { |
| 864 | case Instruction::Sub: |
| 865 | R = Builder.CreateAdd(LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: C, C2: BitWidthC)); |
| 866 | break; |
| 867 | case Instruction::Or: |
| 868 | case Instruction::Add: |
| 869 | R = Builder.CreateSub(LHS: ConstantExpr::getAdd(C1: C, C2: BitWidthC), RHS: CtpopOfNotOp); |
| 870 | break; |
| 871 | case Instruction::ICmp: |
| 872 | R = Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getSwappedPredicate(), |
| 873 | LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: BitWidthC, C2: C)); |
| 874 | break; |
| 875 | default: |
| 876 | llvm_unreachable("Unhandled Opcode" ); |
| 877 | } |
| 878 | assert(R != nullptr); |
| 879 | return replaceInstUsesWith(I&: *I, V: R); |
| 880 | } |
| 881 | |
| 882 | // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C)) |
| 883 | // IFF |
| 884 | // 1) the logic_shifts match |
| 885 | // 2) either both binops are binops and one is `and` or |
| 886 | // BinOp1 is `and` |
| 887 | // (logic_shift (inv_logic_shift C1, C), C) == C1 or |
| 888 | // |
| 889 | // -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C) |
| 890 | // |
| 891 | // (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt)) |
| 892 | // IFF |
| 893 | // 1) the logic_shifts match |
| 894 | // 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`). |
| 895 | // |
| 896 | // -> (BinOp (logic_shift (BinOp X, Y)), Mask) |
| 897 | // |
| 898 | // (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt)) |
| 899 | // IFF |
| 900 | // 1) Binop1 is bitwise logical operator `and`, `or` or `xor` |
| 901 | // 2) Binop2 is `not` |
| 902 | // |
| 903 | // -> (arithmetic_shift Binop1((not X), Y), Amt) |
| 904 | |
| 905 | Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) { |
| 906 | const DataLayout &DL = I.getDataLayout(); |
| 907 | auto IsValidBinOpc = [](unsigned Opc) { |
| 908 | switch (Opc) { |
| 909 | default: |
| 910 | return false; |
| 911 | case Instruction::And: |
| 912 | case Instruction::Or: |
| 913 | case Instruction::Xor: |
| 914 | case Instruction::Add: |
| 915 | // Skip Sub as we only match constant masks which will canonicalize to use |
| 916 | // add. |
| 917 | return true; |
| 918 | } |
| 919 | }; |
| 920 | |
| 921 | // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra |
| 922 | // constraints. |
| 923 | auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2, |
| 924 | unsigned ShOpc) { |
| 925 | assert(ShOpc != Instruction::AShr); |
| 926 | return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) || |
| 927 | ShOpc == Instruction::Shl; |
| 928 | }; |
| 929 | |
| 930 | auto GetInvShift = [](unsigned ShOpc) { |
| 931 | assert(ShOpc != Instruction::AShr); |
| 932 | return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr; |
| 933 | }; |
| 934 | |
| 935 | auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2, |
| 936 | unsigned ShOpc, Constant *CMask, |
| 937 | Constant *CShift) { |
| 938 | // If the BinOp1 is `and` we don't need to check the mask. |
| 939 | if (BinOpc1 == Instruction::And) |
| 940 | return true; |
| 941 | |
| 942 | // For all other possible transfers we need complete distributable |
| 943 | // binop/shift (anything but `add` + `lshr`). |
| 944 | if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc)) |
| 945 | return false; |
| 946 | |
| 947 | // If BinOp2 is `and`, any mask works (this only really helps for non-splat |
| 948 | // vecs, otherwise the mask will be simplified and the following check will |
| 949 | // handle it). |
| 950 | if (BinOpc2 == Instruction::And) |
| 951 | return true; |
| 952 | |
| 953 | // Otherwise, need mask that meets the below requirement. |
| 954 | // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask |
| 955 | Constant *MaskInvShift = |
| 956 | ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL); |
| 957 | return ConstantFoldBinaryOpOperands(Opcode: ShOpc, LHS: MaskInvShift, RHS: CShift, DL) == |
| 958 | CMask; |
| 959 | }; |
| 960 | |
| 961 | auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * { |
| 962 | Constant *CMask, *CShift; |
| 963 | Value *X, *Y, *ShiftedX, *Mask, *Shift; |
| 964 | if (!match(V: I.getOperand(i_nocapture: ShOpnum), |
| 965 | P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: Y), R: m_Value(V&: Shift))))) |
| 966 | return nullptr; |
| 967 | if (!match(V: I.getOperand(i_nocapture: 1 - ShOpnum), |
| 968 | P: m_c_BinOp(L: m_CombineAnd( |
| 969 | L: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: X), R: m_Specific(V: Shift))), |
| 970 | R: m_Value(V&: ShiftedX)), |
| 971 | R: m_Value(V&: Mask)))) |
| 972 | return nullptr; |
| 973 | // Make sure we are matching instruction shifts and not ConstantExpr |
| 974 | auto *IY = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: ShOpnum)); |
| 975 | auto *IX = dyn_cast<Instruction>(Val: ShiftedX); |
| 976 | if (!IY || !IX) |
| 977 | return nullptr; |
| 978 | |
| 979 | // LHS and RHS need same shift opcode |
| 980 | unsigned ShOpc = IY->getOpcode(); |
| 981 | if (ShOpc != IX->getOpcode()) |
| 982 | return nullptr; |
| 983 | |
| 984 | // Make sure binop is real instruction and not ConstantExpr |
| 985 | auto *BO2 = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: 1 - ShOpnum)); |
| 986 | if (!BO2) |
| 987 | return nullptr; |
| 988 | |
| 989 | unsigned BinOpc = BO2->getOpcode(); |
| 990 | // Make sure we have valid binops. |
| 991 | if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc)) |
| 992 | return nullptr; |
| 993 | |
| 994 | if (ShOpc == Instruction::AShr) { |
| 995 | if (Instruction::isBitwiseLogicOp(Opcode: I.getOpcode()) && |
| 996 | BinOpc == Instruction::Xor && match(V: Mask, P: m_AllOnes())) { |
| 997 | Value *NotX = Builder.CreateNot(V: X); |
| 998 | Value *NewBinOp = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NotX); |
| 999 | return BinaryOperator::Create( |
| 1000 | Op: static_cast<Instruction::BinaryOps>(ShOpc), S1: NewBinOp, S2: Shift); |
| 1001 | } |
| 1002 | |
| 1003 | return nullptr; |
| 1004 | } |
| 1005 | |
| 1006 | // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just |
| 1007 | // distribute to drop the shift irrelevant of constants. |
| 1008 | if (BinOpc == I.getOpcode() && |
| 1009 | IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) { |
| 1010 | Value *NewBinOp2 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X, RHS: Y); |
| 1011 | Value *NewBinOp1 = Builder.CreateBinOp( |
| 1012 | Opc: static_cast<Instruction::BinaryOps>(ShOpc), LHS: NewBinOp2, RHS: Shift); |
| 1013 | return BinaryOperator::Create(Op: I.getOpcode(), S1: NewBinOp1, S2: Mask); |
| 1014 | } |
| 1015 | |
| 1016 | // Otherwise we can only distribute by constant shifting the mask, so |
| 1017 | // ensure we have constants. |
| 1018 | if (!match(V: Shift, P: m_ImmConstant(C&: CShift))) |
| 1019 | return nullptr; |
| 1020 | if (!match(V: Mask, P: m_ImmConstant(C&: CMask))) |
| 1021 | return nullptr; |
| 1022 | |
| 1023 | // Check if we can distribute the binops. |
| 1024 | if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift)) |
| 1025 | return nullptr; |
| 1026 | |
| 1027 | Constant *NewCMask = |
| 1028 | ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL); |
| 1029 | Value *NewBinOp2 = Builder.CreateBinOp( |
| 1030 | Opc: static_cast<Instruction::BinaryOps>(BinOpc), LHS: X, RHS: NewCMask); |
| 1031 | Value *NewBinOp1 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NewBinOp2); |
| 1032 | return BinaryOperator::Create(Op: static_cast<Instruction::BinaryOps>(ShOpc), |
| 1033 | S1: NewBinOp1, S2: CShift); |
| 1034 | }; |
| 1035 | |
| 1036 | if (Instruction *R = MatchBinOp(0)) |
| 1037 | return R; |
| 1038 | return MatchBinOp(1); |
| 1039 | } |
| 1040 | |
| 1041 | // (Binop (zext C), (select C, T, F)) |
| 1042 | // -> (select C, (binop 1, T), (binop 0, F)) |
| 1043 | // |
| 1044 | // (Binop (sext C), (select C, T, F)) |
| 1045 | // -> (select C, (binop -1, T), (binop 0, F)) |
| 1046 | // |
| 1047 | // Attempt to simplify binary operations into a select with folded args, when |
| 1048 | // one operand of the binop is a select instruction and the other operand is a |
| 1049 | // zext/sext extension, whose value is the select condition. |
| 1050 | Instruction * |
| 1051 | InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) { |
| 1052 | // TODO: this simplification may be extended to any speculatable instruction, |
| 1053 | // not just binops, and would possibly be handled better in FoldOpIntoSelect. |
| 1054 | Instruction::BinaryOps Opc = I.getOpcode(); |
| 1055 | Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1); |
| 1056 | Value *A, *CondVal, *TrueVal, *FalseVal; |
| 1057 | Value *CastOp; |
| 1058 | |
| 1059 | auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) { |
| 1060 | return match(V: CastOp, P: m_ZExtOrSExt(Op: m_Value(V&: A))) && |
| 1061 | A->getType()->getScalarSizeInBits() == 1 && |
| 1062 | match(V: SelectOp, P: m_Select(C: m_Value(V&: CondVal), L: m_Value(V&: TrueVal), |
| 1063 | R: m_Value(V&: FalseVal))); |
| 1064 | }; |
| 1065 | |
| 1066 | // Make sure one side of the binop is a select instruction, and the other is a |
| 1067 | // zero/sign extension operating on a i1. |
| 1068 | if (MatchSelectAndCast(LHS, RHS)) |
| 1069 | CastOp = LHS; |
| 1070 | else if (MatchSelectAndCast(RHS, LHS)) |
| 1071 | CastOp = RHS; |
| 1072 | else |
| 1073 | return nullptr; |
| 1074 | |
| 1075 | auto NewFoldedConst = [&](bool IsTrueArm, Value *V) { |
| 1076 | bool IsCastOpRHS = (CastOp == RHS); |
| 1077 | bool IsZExt = isa<ZExtInst>(Val: CastOp); |
| 1078 | Constant *C; |
| 1079 | |
| 1080 | if (IsTrueArm) { |
| 1081 | C = Constant::getNullValue(Ty: V->getType()); |
| 1082 | } else if (IsZExt) { |
| 1083 | unsigned BitWidth = V->getType()->getScalarSizeInBits(); |
| 1084 | C = Constant::getIntegerValue(Ty: V->getType(), V: APInt(BitWidth, 1)); |
| 1085 | } else { |
| 1086 | C = Constant::getAllOnesValue(Ty: V->getType()); |
| 1087 | } |
| 1088 | |
| 1089 | return IsCastOpRHS ? Builder.CreateBinOp(Opc, LHS: V, RHS: C) |
| 1090 | : Builder.CreateBinOp(Opc, LHS: C, RHS: V); |
| 1091 | }; |
| 1092 | |
| 1093 | // If the value used in the zext/sext is the select condition, or the negated |
| 1094 | // of the select condition, the binop can be simplified. |
| 1095 | if (CondVal == A) { |
| 1096 | Value *NewTrueVal = NewFoldedConst(false, TrueVal); |
| 1097 | return SelectInst::Create(C: CondVal, S1: NewTrueVal, |
| 1098 | S2: NewFoldedConst(true, FalseVal)); |
| 1099 | } |
| 1100 | |
| 1101 | if (match(V: A, P: m_Not(V: m_Specific(V: CondVal)))) { |
| 1102 | Value *NewTrueVal = NewFoldedConst(true, TrueVal); |
| 1103 | return SelectInst::Create(C: CondVal, S1: NewTrueVal, |
| 1104 | S2: NewFoldedConst(false, FalseVal)); |
| 1105 | } |
| 1106 | |
| 1107 | return nullptr; |
| 1108 | } |
| 1109 | |
| 1110 | Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) { |
| 1111 | Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1); |
| 1112 | BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS); |
| 1113 | BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS); |
| 1114 | Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); |
| 1115 | Value *A, *B, *C, *D; |
| 1116 | Instruction::BinaryOps LHSOpcode, RHSOpcode; |
| 1117 | |
| 1118 | if (Op0) |
| 1119 | LHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op0, LHS&: A, RHS&: B, OtherOp: Op1); |
| 1120 | if (Op1) |
| 1121 | RHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op1, LHS&: C, RHS&: D, OtherOp: Op0); |
| 1122 | |
| 1123 | // The instruction has the form "(A op' B) op (C op' D)". Try to factorize |
| 1124 | // a common term. |
| 1125 | if (Op0 && Op1 && LHSOpcode == RHSOpcode) |
| 1126 | if (Value *V = tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C, D)) |
| 1127 | return V; |
| 1128 | |
| 1129 | // The instruction has the form "(A op' B) op (C)". Try to factorize common |
| 1130 | // term. |
| 1131 | if (Op0) |
| 1132 | if (Value *Ident = getIdentityValue(Opcode: LHSOpcode, V: RHS)) |
| 1133 | if (Value *V = |
| 1134 | tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C: RHS, D: Ident)) |
| 1135 | return V; |
| 1136 | |
| 1137 | // The instruction has the form "(B) op (C op' D)". Try to factorize common |
| 1138 | // term. |
| 1139 | if (Op1) |
| 1140 | if (Value *Ident = getIdentityValue(Opcode: RHSOpcode, V: LHS)) |
| 1141 | if (Value *V = |
| 1142 | tryFactorization(I, SQ, Builder, InnerOpcode: RHSOpcode, A: LHS, B: Ident, C, D)) |
| 1143 | return V; |
| 1144 | |
| 1145 | return nullptr; |
| 1146 | } |
| 1147 | |
| 1148 | /// This tries to simplify binary operations which some other binary operation |
| 1149 | /// distributes over either by factorizing out common terms |
| 1150 | /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in |
| 1151 | /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win). |
| 1152 | /// Returns the simplified value, or null if it didn't simplify. |
| 1153 | Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) { |
| 1154 | Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1); |
| 1155 | BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS); |
| 1156 | BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS); |
| 1157 | Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); |
| 1158 | |
| 1159 | // Factorization. |
| 1160 | if (Value *R = tryFactorizationFolds(I)) |
| 1161 | return R; |
| 1162 | |
| 1163 | // Expansion. |
| 1164 | if (Op0 && rightDistributesOverLeft(LOp: Op0->getOpcode(), ROp: TopLevelOpcode)) { |
| 1165 | // The instruction has the form "(A op' B) op C". See if expanding it out |
| 1166 | // to "(A op C) op' (B op C)" results in simplifications. |
| 1167 | Value *A = Op0->getOperand(i_nocapture: 0), *B = Op0->getOperand(i_nocapture: 1), *C = RHS; |
| 1168 | Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' |
| 1169 | |
| 1170 | // Disable the use of undef because it's not safe to distribute undef. |
| 1171 | auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef(); |
| 1172 | Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive); |
| 1173 | Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: C, Q: SQDistributive); |
| 1174 | |
| 1175 | // Do "A op C" and "B op C" both simplify? |
| 1176 | if (L && R) { |
| 1177 | // They do! Return "L op' R". |
| 1178 | ++NumExpand; |
| 1179 | C = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R); |
| 1180 | C->takeName(V: &I); |
| 1181 | return C; |
| 1182 | } |
| 1183 | |
| 1184 | // Does "A op C" simplify to the identity value for the inner opcode? |
| 1185 | if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) { |
| 1186 | // They do! Return "B op C". |
| 1187 | ++NumExpand; |
| 1188 | C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: C); |
| 1189 | C->takeName(V: &I); |
| 1190 | return C; |
| 1191 | } |
| 1192 | |
| 1193 | // Does "B op C" simplify to the identity value for the inner opcode? |
| 1194 | if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) { |
| 1195 | // They do! Return "A op C". |
| 1196 | ++NumExpand; |
| 1197 | C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C); |
| 1198 | C->takeName(V: &I); |
| 1199 | return C; |
| 1200 | } |
| 1201 | } |
| 1202 | |
| 1203 | if (Op1 && leftDistributesOverRight(LOp: TopLevelOpcode, ROp: Op1->getOpcode())) { |
| 1204 | // The instruction has the form "A op (B op' C)". See if expanding it out |
| 1205 | // to "(A op B) op' (A op C)" results in simplifications. |
| 1206 | Value *A = LHS, *B = Op1->getOperand(i_nocapture: 0), *C = Op1->getOperand(i_nocapture: 1); |
| 1207 | Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' |
| 1208 | |
| 1209 | // Disable the use of undef because it's not safe to distribute undef. |
| 1210 | auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef(); |
| 1211 | Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: B, Q: SQDistributive); |
| 1212 | Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive); |
| 1213 | |
| 1214 | // Do "A op B" and "A op C" both simplify? |
| 1215 | if (L && R) { |
| 1216 | // They do! Return "L op' R". |
| 1217 | ++NumExpand; |
| 1218 | A = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R); |
| 1219 | A->takeName(V: &I); |
| 1220 | return A; |
| 1221 | } |
| 1222 | |
| 1223 | // Does "A op B" simplify to the identity value for the inner opcode? |
| 1224 | if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) { |
| 1225 | // They do! Return "A op C". |
| 1226 | ++NumExpand; |
| 1227 | A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C); |
| 1228 | A->takeName(V: &I); |
| 1229 | return A; |
| 1230 | } |
| 1231 | |
| 1232 | // Does "A op C" simplify to the identity value for the inner opcode? |
| 1233 | if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) { |
| 1234 | // They do! Return "A op B". |
| 1235 | ++NumExpand; |
| 1236 | A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: B); |
| 1237 | A->takeName(V: &I); |
| 1238 | return A; |
| 1239 | } |
| 1240 | } |
| 1241 | |
| 1242 | return SimplifySelectsFeedingBinaryOp(I, LHS, RHS); |
| 1243 | } |
| 1244 | |
| 1245 | static std::optional<std::pair<Value *, Value *>> |
| 1246 | matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) { |
| 1247 | if (LHS->getParent() != RHS->getParent()) |
| 1248 | return std::nullopt; |
| 1249 | |
| 1250 | if (LHS->getNumIncomingValues() < 2) |
| 1251 | return std::nullopt; |
| 1252 | |
| 1253 | if (!equal(LRange: LHS->blocks(), RRange: RHS->blocks())) |
| 1254 | return std::nullopt; |
| 1255 | |
| 1256 | Value *L0 = LHS->getIncomingValue(i: 0); |
| 1257 | Value *R0 = RHS->getIncomingValue(i: 0); |
| 1258 | |
| 1259 | for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) { |
| 1260 | Value *L1 = LHS->getIncomingValue(i: I); |
| 1261 | Value *R1 = RHS->getIncomingValue(i: I); |
| 1262 | |
| 1263 | if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1)) |
| 1264 | continue; |
| 1265 | |
| 1266 | return std::nullopt; |
| 1267 | } |
| 1268 | |
| 1269 | return std::optional(std::pair(L0, R0)); |
| 1270 | } |
| 1271 | |
| 1272 | std::optional<std::pair<Value *, Value *>> |
| 1273 | InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) { |
| 1274 | Instruction *LHSInst = dyn_cast<Instruction>(Val: LHS); |
| 1275 | Instruction *RHSInst = dyn_cast<Instruction>(Val: RHS); |
| 1276 | if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode()) |
| 1277 | return std::nullopt; |
| 1278 | switch (LHSInst->getOpcode()) { |
| 1279 | case Instruction::PHI: |
| 1280 | return matchSymmetricPhiNodesPair(LHS: cast<PHINode>(Val: LHS), RHS: cast<PHINode>(Val: RHS)); |
| 1281 | case Instruction::Select: { |
| 1282 | Value *Cond = LHSInst->getOperand(i: 0); |
| 1283 | Value *TrueVal = LHSInst->getOperand(i: 1); |
| 1284 | Value *FalseVal = LHSInst->getOperand(i: 2); |
| 1285 | if (Cond == RHSInst->getOperand(i: 0) && TrueVal == RHSInst->getOperand(i: 2) && |
| 1286 | FalseVal == RHSInst->getOperand(i: 1)) |
| 1287 | return std::pair(TrueVal, FalseVal); |
| 1288 | return std::nullopt; |
| 1289 | } |
| 1290 | case Instruction::Call: { |
| 1291 | // Match min(a, b) and max(a, b) |
| 1292 | MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: LHSInst); |
| 1293 | MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: RHSInst); |
| 1294 | if (LHSMinMax && RHSMinMax && |
| 1295 | LHSMinMax->getPredicate() == |
| 1296 | ICmpInst::getSwappedPredicate(pred: RHSMinMax->getPredicate()) && |
| 1297 | ((LHSMinMax->getLHS() == RHSMinMax->getLHS() && |
| 1298 | LHSMinMax->getRHS() == RHSMinMax->getRHS()) || |
| 1299 | (LHSMinMax->getLHS() == RHSMinMax->getRHS() && |
| 1300 | LHSMinMax->getRHS() == RHSMinMax->getLHS()))) |
| 1301 | return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS()); |
| 1302 | return std::nullopt; |
| 1303 | } |
| 1304 | default: |
| 1305 | return std::nullopt; |
| 1306 | } |
| 1307 | } |
| 1308 | |
| 1309 | Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, |
| 1310 | Value *LHS, |
| 1311 | Value *RHS) { |
| 1312 | Value *A, *B, *C, *D, *E, *F; |
| 1313 | bool LHSIsSelect = match(V: LHS, P: m_Select(C: m_Value(V&: A), L: m_Value(V&: B), R: m_Value(V&: C))); |
| 1314 | bool RHSIsSelect = match(V: RHS, P: m_Select(C: m_Value(V&: D), L: m_Value(V&: E), R: m_Value(V&: F))); |
| 1315 | if (!LHSIsSelect && !RHSIsSelect) |
| 1316 | return nullptr; |
| 1317 | |
| 1318 | FastMathFlags FMF; |
| 1319 | BuilderTy::FastMathFlagGuard Guard(Builder); |
| 1320 | if (isa<FPMathOperator>(Val: &I)) { |
| 1321 | FMF = I.getFastMathFlags(); |
| 1322 | Builder.setFastMathFlags(FMF); |
| 1323 | } |
| 1324 | |
| 1325 | Instruction::BinaryOps Opcode = I.getOpcode(); |
| 1326 | SimplifyQuery Q = SQ.getWithInstruction(I: &I); |
| 1327 | |
| 1328 | Value *Cond, *True = nullptr, *False = nullptr; |
| 1329 | |
| 1330 | // Special-case for add/negate combination. Replace the zero in the negation |
| 1331 | // with the trailing add operand: |
| 1332 | // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N) |
| 1333 | // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False |
| 1334 | auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * { |
| 1335 | // We need an 'add' and exactly 1 arm of the select to have been simplified. |
| 1336 | if (Opcode != Instruction::Add || (!True && !False) || (True && False)) |
| 1337 | return nullptr; |
| 1338 | |
| 1339 | Value *N; |
| 1340 | if (True && match(V: FVal, P: m_Neg(V: m_Value(V&: N)))) { |
| 1341 | Value *Sub = Builder.CreateSub(LHS: Z, RHS: N); |
| 1342 | return Builder.CreateSelect(C: Cond, True, False: Sub, Name: I.getName()); |
| 1343 | } |
| 1344 | if (False && match(V: TVal, P: m_Neg(V: m_Value(V&: N)))) { |
| 1345 | Value *Sub = Builder.CreateSub(LHS: Z, RHS: N); |
| 1346 | return Builder.CreateSelect(C: Cond, True: Sub, False, Name: I.getName()); |
| 1347 | } |
| 1348 | return nullptr; |
| 1349 | }; |
| 1350 | |
| 1351 | if (LHSIsSelect && RHSIsSelect && A == D) { |
| 1352 | // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F) |
| 1353 | Cond = A; |
| 1354 | True = simplifyBinOp(Opcode, LHS: B, RHS: E, FMF, Q); |
| 1355 | False = simplifyBinOp(Opcode, LHS: C, RHS: F, FMF, Q); |
| 1356 | |
| 1357 | if (LHS->hasOneUse() && RHS->hasOneUse()) { |
| 1358 | if (False && !True) |
| 1359 | True = Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: E); |
| 1360 | else if (True && !False) |
| 1361 | False = Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: F); |
| 1362 | } |
| 1363 | } else if (LHSIsSelect && LHS->hasOneUse()) { |
| 1364 | // (A ? B : C) op Y -> A ? (B op Y) : (C op Y) |
| 1365 | Cond = A; |
| 1366 | True = simplifyBinOp(Opcode, LHS: B, RHS, FMF, Q); |
| 1367 | False = simplifyBinOp(Opcode, LHS: C, RHS, FMF, Q); |
| 1368 | if (Value *NewSel = foldAddNegate(B, C, RHS)) |
| 1369 | return NewSel; |
| 1370 | } else if (RHSIsSelect && RHS->hasOneUse()) { |
| 1371 | // X op (D ? E : F) -> D ? (X op E) : (X op F) |
| 1372 | Cond = D; |
| 1373 | True = simplifyBinOp(Opcode, LHS, RHS: E, FMF, Q); |
| 1374 | False = simplifyBinOp(Opcode, LHS, RHS: F, FMF, Q); |
| 1375 | if (Value *NewSel = foldAddNegate(E, F, LHS)) |
| 1376 | return NewSel; |
| 1377 | } |
| 1378 | |
| 1379 | if (!True || !False) |
| 1380 | return nullptr; |
| 1381 | |
| 1382 | Value *SI = Builder.CreateSelect(C: Cond, True, False); |
| 1383 | SI->takeName(V: &I); |
| 1384 | return SI; |
| 1385 | } |
| 1386 | |
| 1387 | /// Freely adapt every user of V as-if V was changed to !V. |
| 1388 | /// WARNING: only if canFreelyInvertAllUsersOf() said this can be done. |
| 1389 | void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) { |
| 1390 | assert(!isa<Constant>(I) && "Shouldn't invert users of constant" ); |
| 1391 | for (User *U : make_early_inc_range(Range: I->users())) { |
| 1392 | if (U == IgnoredUser) |
| 1393 | continue; // Don't consider this user. |
| 1394 | switch (cast<Instruction>(Val: U)->getOpcode()) { |
| 1395 | case Instruction::Select: { |
| 1396 | auto *SI = cast<SelectInst>(Val: U); |
| 1397 | SI->swapValues(); |
| 1398 | SI->swapProfMetadata(); |
| 1399 | break; |
| 1400 | } |
| 1401 | case Instruction::Br: { |
| 1402 | BranchInst *BI = cast<BranchInst>(Val: U); |
| 1403 | BI->swapSuccessors(); // swaps prof metadata too |
| 1404 | if (BPI) |
| 1405 | BPI->swapSuccEdgesProbabilities(Src: BI->getParent()); |
| 1406 | break; |
| 1407 | } |
| 1408 | case Instruction::Xor: |
| 1409 | replaceInstUsesWith(I&: cast<Instruction>(Val&: *U), V: I); |
| 1410 | // Add to worklist for DCE. |
| 1411 | addToWorklist(I: cast<Instruction>(Val: U)); |
| 1412 | break; |
| 1413 | default: |
| 1414 | llvm_unreachable("Got unexpected user - out of sync with " |
| 1415 | "canFreelyInvertAllUsersOf() ?" ); |
| 1416 | } |
| 1417 | } |
| 1418 | |
| 1419 | // Update pre-existing debug value uses. |
| 1420 | SmallVector<DbgValueInst *, 4> DbgValues; |
| 1421 | SmallVector<DbgVariableRecord *, 4> DbgVariableRecords; |
| 1422 | llvm::findDbgValues(DbgValues, V: I, DbgVariableRecords: &DbgVariableRecords); |
| 1423 | |
| 1424 | auto InvertDbgValueUse = [&](auto *DbgVal) { |
| 1425 | SmallVector<uint64_t, 1> Ops = {dwarf::DW_OP_not}; |
| 1426 | for (unsigned Idx = 0, End = DbgVal->getNumVariableLocationOps(); |
| 1427 | Idx != End; ++Idx) |
| 1428 | if (DbgVal->getVariableLocationOp(Idx) == I) |
| 1429 | DbgVal->setExpression( |
| 1430 | DIExpression::appendOpsToArg(Expr: DbgVal->getExpression(), Ops, ArgNo: Idx)); |
| 1431 | }; |
| 1432 | |
| 1433 | for (DbgValueInst *DVI : DbgValues) |
| 1434 | InvertDbgValueUse(DVI); |
| 1435 | |
| 1436 | for (DbgVariableRecord *DVR : DbgVariableRecords) |
| 1437 | InvertDbgValueUse(DVR); |
| 1438 | } |
| 1439 | |
| 1440 | /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a |
| 1441 | /// constant zero (which is the 'negate' form). |
| 1442 | Value *InstCombinerImpl::dyn_castNegVal(Value *V) const { |
| 1443 | Value *NegV; |
| 1444 | if (match(V, P: m_Neg(V: m_Value(V&: NegV)))) |
| 1445 | return NegV; |
| 1446 | |
| 1447 | // Constants can be considered to be negated values if they can be folded. |
| 1448 | if (ConstantInt *C = dyn_cast<ConstantInt>(Val: V)) |
| 1449 | return ConstantExpr::getNeg(C); |
| 1450 | |
| 1451 | if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(Val: V)) |
| 1452 | if (C->getType()->getElementType()->isIntegerTy()) |
| 1453 | return ConstantExpr::getNeg(C); |
| 1454 | |
| 1455 | if (ConstantVector *CV = dyn_cast<ConstantVector>(Val: V)) { |
| 1456 | for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { |
| 1457 | Constant *Elt = CV->getAggregateElement(Elt: i); |
| 1458 | if (!Elt) |
| 1459 | return nullptr; |
| 1460 | |
| 1461 | if (isa<UndefValue>(Val: Elt)) |
| 1462 | continue; |
| 1463 | |
| 1464 | if (!isa<ConstantInt>(Val: Elt)) |
| 1465 | return nullptr; |
| 1466 | } |
| 1467 | return ConstantExpr::getNeg(C: CV); |
| 1468 | } |
| 1469 | |
| 1470 | // Negate integer vector splats. |
| 1471 | if (auto *CV = dyn_cast<Constant>(Val: V)) |
| 1472 | if (CV->getType()->isVectorTy() && |
| 1473 | CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue()) |
| 1474 | return ConstantExpr::getNeg(C: CV); |
| 1475 | |
| 1476 | return nullptr; |
| 1477 | } |
| 1478 | |
| 1479 | // Try to fold: |
| 1480 | // 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y)) |
| 1481 | // -> ({s|u}itofp (int_binop x, y)) |
| 1482 | // 2) (fp_binop ({s|u}itofp x), FpC) |
| 1483 | // -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC))) |
| 1484 | // |
| 1485 | // Assuming the sign of the cast for x/y is `OpsFromSigned`. |
| 1486 | Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign( |
| 1487 | BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps, |
| 1488 | Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown) { |
| 1489 | |
| 1490 | Type *FPTy = BO.getType(); |
| 1491 | Type *IntTy = IntOps[0]->getType(); |
| 1492 | |
| 1493 | unsigned IntSz = IntTy->getScalarSizeInBits(); |
| 1494 | // This is the maximum number of inuse bits by the integer where the int -> fp |
| 1495 | // casts are exact. |
| 1496 | unsigned MaxRepresentableBits = |
| 1497 | APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics()); |
| 1498 | |
| 1499 | // Preserve known number of leading bits. This can allow us to trivial nsw/nuw |
| 1500 | // checks later on. |
| 1501 | unsigned NumUsedLeadingBits[2] = {IntSz, IntSz}; |
| 1502 | |
| 1503 | // NB: This only comes up if OpsFromSigned is true, so there is no need to |
| 1504 | // cache if between calls to `foldFBinOpOfIntCastsFromSign`. |
| 1505 | auto IsNonZero = [&](unsigned OpNo) -> bool { |
| 1506 | if (OpsKnown[OpNo].hasKnownBits() && |
| 1507 | OpsKnown[OpNo].getKnownBits(Q: SQ).isNonZero()) |
| 1508 | return true; |
| 1509 | return isKnownNonZero(V: IntOps[OpNo], Q: SQ); |
| 1510 | }; |
| 1511 | |
| 1512 | auto IsNonNeg = [&](unsigned OpNo) -> bool { |
| 1513 | // NB: This matches the impl in ValueTracking, we just try to use cached |
| 1514 | // knownbits here. If we ever start supporting WithCache for |
| 1515 | // `isKnownNonNegative`, change this to an explicit call. |
| 1516 | return OpsKnown[OpNo].getKnownBits(Q: SQ).isNonNegative(); |
| 1517 | }; |
| 1518 | |
| 1519 | // Check if we know for certain that ({s|u}itofp op) is exact. |
| 1520 | auto IsValidPromotion = [&](unsigned OpNo) -> bool { |
| 1521 | // Can we treat this operand as the desired sign? |
| 1522 | if (OpsFromSigned != isa<SIToFPInst>(Val: BO.getOperand(i_nocapture: OpNo)) && |
| 1523 | !IsNonNeg(OpNo)) |
| 1524 | return false; |
| 1525 | |
| 1526 | // If fp precision >= bitwidth(op) then its exact. |
| 1527 | // NB: This is slightly conservative for `sitofp`. For signed conversion, we |
| 1528 | // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be |
| 1529 | // handled specially. We can't, however, increase the bound arbitrarily for |
| 1530 | // `sitofp` as for larger sizes, it won't sign extend. |
| 1531 | if (MaxRepresentableBits < IntSz) { |
| 1532 | // Otherwise if its signed cast check that fp precisions >= bitwidth(op) - |
| 1533 | // numSignBits(op). |
| 1534 | // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change |
| 1535 | // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`. |
| 1536 | if (OpsFromSigned) |
| 1537 | NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(Op: IntOps[OpNo]); |
| 1538 | // Finally for unsigned check that fp precision >= bitwidth(op) - |
| 1539 | // numLeadingZeros(op). |
| 1540 | else { |
| 1541 | NumUsedLeadingBits[OpNo] = |
| 1542 | IntSz - OpsKnown[OpNo].getKnownBits(Q: SQ).countMinLeadingZeros(); |
| 1543 | } |
| 1544 | } |
| 1545 | // NB: We could also check if op is known to be a power of 2 or zero (which |
| 1546 | // will always be representable). Its unlikely, however, that is we are |
| 1547 | // unable to bound op in any way we will be able to pass the overflow checks |
| 1548 | // later on. |
| 1549 | |
| 1550 | if (MaxRepresentableBits < NumUsedLeadingBits[OpNo]) |
| 1551 | return false; |
| 1552 | // Signed + Mul also requires that op is non-zero to avoid -0 cases. |
| 1553 | return !OpsFromSigned || BO.getOpcode() != Instruction::FMul || |
| 1554 | IsNonZero(OpNo); |
| 1555 | }; |
| 1556 | |
| 1557 | // If we have a constant rhs, see if we can losslessly convert it to an int. |
| 1558 | if (Op1FpC != nullptr) { |
| 1559 | // Signed + Mul req non-zero |
| 1560 | if (OpsFromSigned && BO.getOpcode() == Instruction::FMul && |
| 1561 | !match(V: Op1FpC, P: m_NonZeroFP())) |
| 1562 | return nullptr; |
| 1563 | |
| 1564 | Constant *Op1IntC = ConstantFoldCastOperand( |
| 1565 | Opcode: OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, C: Op1FpC, |
| 1566 | DestTy: IntTy, DL); |
| 1567 | if (Op1IntC == nullptr) |
| 1568 | return nullptr; |
| 1569 | if (ConstantFoldCastOperand(Opcode: OpsFromSigned ? Instruction::SIToFP |
| 1570 | : Instruction::UIToFP, |
| 1571 | C: Op1IntC, DestTy: FPTy, DL) != Op1FpC) |
| 1572 | return nullptr; |
| 1573 | |
| 1574 | // First try to keep sign of cast the same. |
| 1575 | IntOps[1] = Op1IntC; |
| 1576 | } |
| 1577 | |
| 1578 | // Ensure lhs/rhs integer types match. |
| 1579 | if (IntTy != IntOps[1]->getType()) |
| 1580 | return nullptr; |
| 1581 | |
| 1582 | if (Op1FpC == nullptr) { |
| 1583 | if (!IsValidPromotion(1)) |
| 1584 | return nullptr; |
| 1585 | } |
| 1586 | if (!IsValidPromotion(0)) |
| 1587 | return nullptr; |
| 1588 | |
| 1589 | // Final we check if the integer version of the binop will not overflow. |
| 1590 | BinaryOperator::BinaryOps IntOpc; |
| 1591 | // Because of the precision check, we can often rule out overflows. |
| 1592 | bool NeedsOverflowCheck = true; |
| 1593 | // Try to conservatively rule out overflow based on the already done precision |
| 1594 | // checks. |
| 1595 | unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1; |
| 1596 | unsigned OverflowMaxCurBits = |
| 1597 | std::max(a: NumUsedLeadingBits[0], b: NumUsedLeadingBits[1]); |
| 1598 | bool OutputSigned = OpsFromSigned; |
| 1599 | switch (BO.getOpcode()) { |
| 1600 | case Instruction::FAdd: |
| 1601 | IntOpc = Instruction::Add; |
| 1602 | OverflowMaxOutputBits += OverflowMaxCurBits; |
| 1603 | break; |
| 1604 | case Instruction::FSub: |
| 1605 | IntOpc = Instruction::Sub; |
| 1606 | OverflowMaxOutputBits += OverflowMaxCurBits; |
| 1607 | break; |
| 1608 | case Instruction::FMul: |
| 1609 | IntOpc = Instruction::Mul; |
| 1610 | OverflowMaxOutputBits += OverflowMaxCurBits * 2; |
| 1611 | break; |
| 1612 | default: |
| 1613 | llvm_unreachable("Unsupported binop" ); |
| 1614 | } |
| 1615 | // The precision check may have already ruled out overflow. |
| 1616 | if (OverflowMaxOutputBits < IntSz) { |
| 1617 | NeedsOverflowCheck = false; |
| 1618 | // We can bound unsigned overflow from sub to in range signed value (this is |
| 1619 | // what allows us to avoid the overflow check for sub). |
| 1620 | if (IntOpc == Instruction::Sub) |
| 1621 | OutputSigned = true; |
| 1622 | } |
| 1623 | |
| 1624 | // Precision check did not rule out overflow, so need to check. |
| 1625 | // TODO: If we add support for `WithCache` in `willNotOverflow`, change |
| 1626 | // `IntOps[...]` arguments to `KnownOps[...]`. |
| 1627 | if (NeedsOverflowCheck && |
| 1628 | !willNotOverflow(Opcode: IntOpc, LHS: IntOps[0], RHS: IntOps[1], CxtI: BO, IsSigned: OutputSigned)) |
| 1629 | return nullptr; |
| 1630 | |
| 1631 | Value *IntBinOp = Builder.CreateBinOp(Opc: IntOpc, LHS: IntOps[0], RHS: IntOps[1]); |
| 1632 | if (auto *IntBO = dyn_cast<BinaryOperator>(Val: IntBinOp)) { |
| 1633 | IntBO->setHasNoSignedWrap(OutputSigned); |
| 1634 | IntBO->setHasNoUnsignedWrap(!OutputSigned); |
| 1635 | } |
| 1636 | if (OutputSigned) |
| 1637 | return new SIToFPInst(IntBinOp, FPTy); |
| 1638 | return new UIToFPInst(IntBinOp, FPTy); |
| 1639 | } |
| 1640 | |
| 1641 | // Try to fold: |
| 1642 | // 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y)) |
| 1643 | // -> ({s|u}itofp (int_binop x, y)) |
| 1644 | // 2) (fp_binop ({s|u}itofp x), FpC) |
| 1645 | // -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC))) |
| 1646 | Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) { |
| 1647 | std::array<Value *, 2> IntOps = {nullptr, nullptr}; |
| 1648 | Constant *Op1FpC = nullptr; |
| 1649 | // Check for: |
| 1650 | // 1) (binop ({s|u}itofp x), ({s|u}itofp y)) |
| 1651 | // 2) (binop ({s|u}itofp x), FpC) |
| 1652 | if (!match(V: BO.getOperand(i_nocapture: 0), P: m_SIToFP(Op: m_Value(V&: IntOps[0]))) && |
| 1653 | !match(V: BO.getOperand(i_nocapture: 0), P: m_UIToFP(Op: m_Value(V&: IntOps[0])))) |
| 1654 | return nullptr; |
| 1655 | |
| 1656 | if (!match(V: BO.getOperand(i_nocapture: 1), P: m_Constant(C&: Op1FpC)) && |
| 1657 | !match(V: BO.getOperand(i_nocapture: 1), P: m_SIToFP(Op: m_Value(V&: IntOps[1]))) && |
| 1658 | !match(V: BO.getOperand(i_nocapture: 1), P: m_UIToFP(Op: m_Value(V&: IntOps[1])))) |
| 1659 | return nullptr; |
| 1660 | |
| 1661 | // Cache KnownBits a bit to potentially save some analysis. |
| 1662 | SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]}; |
| 1663 | |
| 1664 | // Try treating x/y as coming from both `uitofp` and `sitofp`. There are |
| 1665 | // different constraints depending on the sign of the cast. |
| 1666 | // NB: `(uitofp nneg X)` == `(sitofp nneg X)`. |
| 1667 | if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false, |
| 1668 | IntOps, Op1FpC, OpsKnown)) |
| 1669 | return R; |
| 1670 | return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps, |
| 1671 | Op1FpC, OpsKnown); |
| 1672 | } |
| 1673 | |
| 1674 | /// A binop with a constant operand and a sign-extended boolean operand may be |
| 1675 | /// converted into a select of constants by applying the binary operation to |
| 1676 | /// the constant with the two possible values of the extended boolean (0 or -1). |
| 1677 | Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { |
| 1678 | // TODO: Handle non-commutative binop (constant is operand 0). |
| 1679 | // TODO: Handle zext. |
| 1680 | // TODO: Peek through 'not' of cast. |
| 1681 | Value *BO0 = BO.getOperand(i_nocapture: 0); |
| 1682 | Value *BO1 = BO.getOperand(i_nocapture: 1); |
| 1683 | Value *X; |
| 1684 | Constant *C; |
| 1685 | if (!match(V: BO0, P: m_SExt(Op: m_Value(V&: X))) || !match(V: BO1, P: m_ImmConstant(C)) || |
| 1686 | !X->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
| 1687 | return nullptr; |
| 1688 | |
| 1689 | // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C) |
| 1690 | Constant *Ones = ConstantInt::getAllOnesValue(Ty: BO.getType()); |
| 1691 | Constant *Zero = ConstantInt::getNullValue(Ty: BO.getType()); |
| 1692 | Value *TVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Ones, RHS: C); |
| 1693 | Value *FVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Zero, RHS: C); |
| 1694 | return SelectInst::Create(C: X, S1: TVal, S2: FVal); |
| 1695 | } |
| 1696 | |
| 1697 | static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI, |
| 1698 | bool IsTrueArm) { |
| 1699 | SmallVector<Value *> Ops; |
| 1700 | for (Value *Op : I.operands()) { |
| 1701 | Value *V = nullptr; |
| 1702 | if (Op == SI) { |
| 1703 | V = IsTrueArm ? SI->getTrueValue() : SI->getFalseValue(); |
| 1704 | } else if (match(V: SI->getCondition(), |
| 1705 | P: m_SpecificICmp(MatchPred: IsTrueArm ? ICmpInst::ICMP_EQ |
| 1706 | : ICmpInst::ICMP_NE, |
| 1707 | L: m_Specific(V: Op), R: m_Value(V))) && |
| 1708 | isGuaranteedNotToBeUndefOrPoison(V)) { |
| 1709 | // Pass |
| 1710 | } else { |
| 1711 | V = Op; |
| 1712 | } |
| 1713 | Ops.push_back(Elt: V); |
| 1714 | } |
| 1715 | |
| 1716 | return simplifyInstructionWithOperands(I: &I, NewOps: Ops, Q: I.getDataLayout()); |
| 1717 | } |
| 1718 | |
| 1719 | static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, |
| 1720 | Value *NewOp, InstCombiner &IC) { |
| 1721 | Instruction *Clone = I.clone(); |
| 1722 | Clone->replaceUsesOfWith(From: SI, To: NewOp); |
| 1723 | Clone->dropUBImplyingAttrsAndMetadata(); |
| 1724 | IC.InsertNewInstBefore(New: Clone, Old: I.getIterator()); |
| 1725 | return Clone; |
| 1726 | } |
| 1727 | |
| 1728 | Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, |
| 1729 | bool FoldWithMultiUse) { |
| 1730 | // Don't modify shared select instructions unless set FoldWithMultiUse |
| 1731 | if (!SI->hasOneUse() && !FoldWithMultiUse) |
| 1732 | return nullptr; |
| 1733 | |
| 1734 | Value *TV = SI->getTrueValue(); |
| 1735 | Value *FV = SI->getFalseValue(); |
| 1736 | |
| 1737 | // Bool selects with constant operands can be folded to logical ops. |
| 1738 | if (SI->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
| 1739 | return nullptr; |
| 1740 | |
| 1741 | // Avoid breaking min/max reduction pattern, |
| 1742 | // which is necessary for vectorization later. |
| 1743 | if (isa<MinMaxIntrinsic>(Val: &Op)) |
| 1744 | for (Value *IntrinOp : Op.operands()) |
| 1745 | if (auto *PN = dyn_cast<PHINode>(Val: IntrinOp)) |
| 1746 | for (Value *PhiOp : PN->operands()) |
| 1747 | if (PhiOp == &Op) |
| 1748 | return nullptr; |
| 1749 | |
| 1750 | // Test if a FCmpInst instruction is used exclusively by a select as |
| 1751 | // part of a minimum or maximum operation. If so, refrain from doing |
| 1752 | // any other folding. This helps out other analyses which understand |
| 1753 | // non-obfuscated minimum and maximum idioms. And in this case, at |
| 1754 | // least one of the comparison operands has at least one user besides |
| 1755 | // the compare (the select), which would often largely negate the |
| 1756 | // benefit of folding anyway. |
| 1757 | if (auto *CI = dyn_cast<FCmpInst>(Val: SI->getCondition())) { |
| 1758 | if (CI->hasOneUse()) { |
| 1759 | Value *Op0 = CI->getOperand(i_nocapture: 0), *Op1 = CI->getOperand(i_nocapture: 1); |
| 1760 | if (((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) && |
| 1761 | !CI->isCommutative()) |
| 1762 | return nullptr; |
| 1763 | } |
| 1764 | } |
| 1765 | |
| 1766 | // Make sure that one of the select arms folds successfully. |
| 1767 | Value *NewTV = simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/true); |
| 1768 | Value *NewFV = |
| 1769 | simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/false); |
| 1770 | if (!NewTV && !NewFV) |
| 1771 | return nullptr; |
| 1772 | |
| 1773 | // Create an instruction for the arm that did not fold. |
| 1774 | if (!NewTV) |
| 1775 | NewTV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: TV, IC&: *this); |
| 1776 | if (!NewFV) |
| 1777 | NewFV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: FV, IC&: *this); |
| 1778 | return SelectInst::Create(C: SI->getCondition(), S1: NewTV, S2: NewFV, NameStr: "" , InsertBefore: nullptr, MDFrom: SI); |
| 1779 | } |
| 1780 | |
| 1781 | static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN, |
| 1782 | Value *InValue, BasicBlock *InBB, |
| 1783 | const DataLayout &DL, |
| 1784 | const SimplifyQuery SQ) { |
| 1785 | // NB: It is a precondition of this transform that the operands be |
| 1786 | // phi translatable! |
| 1787 | SmallVector<Value *> Ops; |
| 1788 | for (Value *Op : I.operands()) { |
| 1789 | if (Op == PN) |
| 1790 | Ops.push_back(Elt: InValue); |
| 1791 | else |
| 1792 | Ops.push_back(Elt: Op->DoPHITranslation(CurBB: PN->getParent(), PredBB: InBB)); |
| 1793 | } |
| 1794 | |
| 1795 | // Don't consider the simplification successful if we get back a constant |
| 1796 | // expression. That's just an instruction in hiding. |
| 1797 | // Also reject the case where we simplify back to the phi node. We wouldn't |
| 1798 | // be able to remove it in that case. |
| 1799 | Value *NewVal = simplifyInstructionWithOperands( |
| 1800 | I: &I, NewOps: Ops, Q: SQ.getWithInstruction(I: InBB->getTerminator())); |
| 1801 | if (NewVal && NewVal != PN && !match(V: NewVal, P: m_ConstantExpr())) |
| 1802 | return NewVal; |
| 1803 | |
| 1804 | // Check if incoming PHI value can be replaced with constant |
| 1805 | // based on implied condition. |
| 1806 | BranchInst *TerminatorBI = dyn_cast<BranchInst>(Val: InBB->getTerminator()); |
| 1807 | const ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: &I); |
| 1808 | if (TerminatorBI && TerminatorBI->isConditional() && |
| 1809 | TerminatorBI->getSuccessor(i: 0) != TerminatorBI->getSuccessor(i: 1) && ICmp) { |
| 1810 | bool LHSIsTrue = TerminatorBI->getSuccessor(i: 0) == PN->getParent(); |
| 1811 | std::optional<bool> ImpliedCond = isImpliedCondition( |
| 1812 | LHS: TerminatorBI->getCondition(), RHSPred: ICmp->getCmpPredicate(), RHSOp0: Ops[0], RHSOp1: Ops[1], |
| 1813 | DL, LHSIsTrue); |
| 1814 | if (ImpliedCond) |
| 1815 | return ConstantInt::getBool(Ty: I.getType(), V: ImpliedCond.value()); |
| 1816 | } |
| 1817 | |
| 1818 | return nullptr; |
| 1819 | } |
| 1820 | |
| 1821 | Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN, |
| 1822 | bool AllowMultipleUses) { |
| 1823 | unsigned NumPHIValues = PN->getNumIncomingValues(); |
| 1824 | if (NumPHIValues == 0) |
| 1825 | return nullptr; |
| 1826 | |
| 1827 | // We normally only transform phis with a single use. However, if a PHI has |
| 1828 | // multiple uses and they are all the same operation, we can fold *all* of the |
| 1829 | // uses into the PHI. |
| 1830 | bool OneUse = PN->hasOneUse(); |
| 1831 | bool IdenticalUsers = false; |
| 1832 | if (!AllowMultipleUses && !OneUse) { |
| 1833 | // Walk the use list for the instruction, comparing them to I. |
| 1834 | for (User *U : PN->users()) { |
| 1835 | Instruction *UI = cast<Instruction>(Val: U); |
| 1836 | if (UI != &I && !I.isIdenticalTo(I: UI)) |
| 1837 | return nullptr; |
| 1838 | } |
| 1839 | // Otherwise, we can replace *all* users with the new PHI we form. |
| 1840 | IdenticalUsers = true; |
| 1841 | } |
| 1842 | |
| 1843 | // Check that all operands are phi-translatable. |
| 1844 | for (Value *Op : I.operands()) { |
| 1845 | if (Op == PN) |
| 1846 | continue; |
| 1847 | |
| 1848 | // Non-instructions never require phi-translation. |
| 1849 | auto *I = dyn_cast<Instruction>(Val: Op); |
| 1850 | if (!I) |
| 1851 | continue; |
| 1852 | |
| 1853 | // Phi-translate can handle phi nodes in the same block. |
| 1854 | if (isa<PHINode>(Val: I)) |
| 1855 | if (I->getParent() == PN->getParent()) |
| 1856 | continue; |
| 1857 | |
| 1858 | // Operand dominates the block, no phi-translation necessary. |
| 1859 | if (DT.dominates(Def: I, BB: PN->getParent())) |
| 1860 | continue; |
| 1861 | |
| 1862 | // Not phi-translatable, bail out. |
| 1863 | return nullptr; |
| 1864 | } |
| 1865 | |
| 1866 | // Check to see whether the instruction can be folded into each phi operand. |
| 1867 | // If there is one operand that does not fold, remember the BB it is in. |
| 1868 | SmallVector<Value *> NewPhiValues; |
| 1869 | SmallVector<unsigned int> OpsToMoveUseToIncomingBB; |
| 1870 | bool SeenNonSimplifiedInVal = false; |
| 1871 | for (unsigned i = 0; i != NumPHIValues; ++i) { |
| 1872 | Value *InVal = PN->getIncomingValue(i); |
| 1873 | BasicBlock *InBB = PN->getIncomingBlock(i); |
| 1874 | |
| 1875 | if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InValue: InVal, InBB, DL, SQ)) { |
| 1876 | NewPhiValues.push_back(Elt: NewVal); |
| 1877 | continue; |
| 1878 | } |
| 1879 | |
| 1880 | // Handle some cases that can't be fully simplified, but where we know that |
| 1881 | // the two instructions will fold into one. |
| 1882 | auto WillFold = [&]() { |
| 1883 | if (!InVal->hasUseList() || !InVal->hasOneUser()) |
| 1884 | return false; |
| 1885 | |
| 1886 | // icmp of ucmp/scmp with constant will fold to icmp. |
| 1887 | const APInt *Ignored; |
| 1888 | if (isa<CmpIntrinsic>(Val: InVal) && |
| 1889 | match(V: &I, P: m_ICmp(L: m_Specific(V: PN), R: m_APInt(Res&: Ignored)))) |
| 1890 | return true; |
| 1891 | |
| 1892 | // icmp eq zext(bool), 0 will fold to !bool. |
| 1893 | if (isa<ZExtInst>(Val: InVal) && |
| 1894 | cast<ZExtInst>(Val: InVal)->getSrcTy()->isIntOrIntVectorTy(BitWidth: 1) && |
| 1895 | match(V: &I, |
| 1896 | P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, L: m_Specific(V: PN), R: m_Zero()))) |
| 1897 | return true; |
| 1898 | |
| 1899 | return false; |
| 1900 | }; |
| 1901 | |
| 1902 | if (WillFold()) { |
| 1903 | OpsToMoveUseToIncomingBB.push_back(Elt: i); |
| 1904 | NewPhiValues.push_back(Elt: nullptr); |
| 1905 | continue; |
| 1906 | } |
| 1907 | |
| 1908 | if (!OneUse && !IdenticalUsers) |
| 1909 | return nullptr; |
| 1910 | |
| 1911 | if (SeenNonSimplifiedInVal) |
| 1912 | return nullptr; // More than one non-simplified value. |
| 1913 | SeenNonSimplifiedInVal = true; |
| 1914 | |
| 1915 | // If there is exactly one non-simplified value, we can insert a copy of the |
| 1916 | // operation in that block. However, if this is a critical edge, we would |
| 1917 | // be inserting the computation on some other paths (e.g. inside a loop). |
| 1918 | // Only do this if the pred block is unconditionally branching into the phi |
| 1919 | // block. Also, make sure that the pred block is not dead code. |
| 1920 | BranchInst *BI = dyn_cast<BranchInst>(Val: InBB->getTerminator()); |
| 1921 | if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(A: InBB)) |
| 1922 | return nullptr; |
| 1923 | |
| 1924 | NewPhiValues.push_back(Elt: nullptr); |
| 1925 | OpsToMoveUseToIncomingBB.push_back(Elt: i); |
| 1926 | |
| 1927 | // If the InVal is an invoke at the end of the pred block, then we can't |
| 1928 | // insert a computation after it without breaking the edge. |
| 1929 | if (isa<InvokeInst>(Val: InVal)) |
| 1930 | if (cast<Instruction>(Val: InVal)->getParent() == InBB) |
| 1931 | return nullptr; |
| 1932 | |
| 1933 | // Do not push the operation across a loop backedge. This could result in |
| 1934 | // an infinite combine loop, and is generally non-profitable (especially |
| 1935 | // if the operation was originally outside the loop). |
| 1936 | if (isBackEdge(From: InBB, To: PN->getParent())) |
| 1937 | return nullptr; |
| 1938 | } |
| 1939 | |
| 1940 | // Clone the instruction that uses the phi node and move it into the incoming |
| 1941 | // BB because we know that the next iteration of InstCombine will simplify it. |
| 1942 | SmallDenseMap<BasicBlock *, Instruction *> Clones; |
| 1943 | for (auto OpIndex : OpsToMoveUseToIncomingBB) { |
| 1944 | Value *Op = PN->getIncomingValue(i: OpIndex); |
| 1945 | BasicBlock *OpBB = PN->getIncomingBlock(i: OpIndex); |
| 1946 | |
| 1947 | Instruction *Clone = Clones.lookup(Val: OpBB); |
| 1948 | if (!Clone) { |
| 1949 | Clone = I.clone(); |
| 1950 | for (Use &U : Clone->operands()) { |
| 1951 | if (U == PN) |
| 1952 | U = Op; |
| 1953 | else |
| 1954 | U = U->DoPHITranslation(CurBB: PN->getParent(), PredBB: OpBB); |
| 1955 | } |
| 1956 | Clone = InsertNewInstBefore(New: Clone, Old: OpBB->getTerminator()->getIterator()); |
| 1957 | Clones.insert(KV: {OpBB, Clone}); |
| 1958 | } |
| 1959 | |
| 1960 | NewPhiValues[OpIndex] = Clone; |
| 1961 | } |
| 1962 | |
| 1963 | // Okay, we can do the transformation: create the new PHI node. |
| 1964 | PHINode *NewPN = PHINode::Create(Ty: I.getType(), NumReservedValues: PN->getNumIncomingValues()); |
| 1965 | InsertNewInstBefore(New: NewPN, Old: PN->getIterator()); |
| 1966 | NewPN->takeName(V: PN); |
| 1967 | NewPN->setDebugLoc(PN->getDebugLoc()); |
| 1968 | |
| 1969 | for (unsigned i = 0; i != NumPHIValues; ++i) |
| 1970 | NewPN->addIncoming(V: NewPhiValues[i], BB: PN->getIncomingBlock(i)); |
| 1971 | |
| 1972 | if (IdenticalUsers) { |
| 1973 | for (User *U : make_early_inc_range(Range: PN->users())) { |
| 1974 | Instruction *User = cast<Instruction>(Val: U); |
| 1975 | if (User == &I) |
| 1976 | continue; |
| 1977 | replaceInstUsesWith(I&: *User, V: NewPN); |
| 1978 | eraseInstFromFunction(I&: *User); |
| 1979 | } |
| 1980 | OneUse = true; |
| 1981 | } |
| 1982 | |
| 1983 | if (OneUse) { |
| 1984 | replaceAllDbgUsesWith(From&: const_cast<PHINode &>(*PN), |
| 1985 | To&: const_cast<PHINode &>(*NewPN), |
| 1986 | DomPoint&: const_cast<PHINode &>(*PN), DT); |
| 1987 | } |
| 1988 | return replaceInstUsesWith(I, V: NewPN); |
| 1989 | } |
| 1990 | |
| 1991 | Instruction *InstCombinerImpl::foldBinopWithRecurrence(BinaryOperator &BO) { |
| 1992 | if (!BO.isAssociative()) |
| 1993 | return nullptr; |
| 1994 | |
| 1995 | // Find the interleaved binary ops. |
| 1996 | auto Opc = BO.getOpcode(); |
| 1997 | auto *BO0 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 0)); |
| 1998 | auto *BO1 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 1)); |
| 1999 | if (!BO0 || !BO1 || !BO0->hasNUses(N: 2) || !BO1->hasNUses(N: 2) || |
| 2000 | BO0->getOpcode() != Opc || BO1->getOpcode() != Opc || |
| 2001 | !BO0->isAssociative() || !BO1->isAssociative() || |
| 2002 | BO0->getParent() != BO1->getParent()) |
| 2003 | return nullptr; |
| 2004 | |
| 2005 | assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() && |
| 2006 | "Expected commutative instructions!" ); |
| 2007 | |
| 2008 | // Find the matching phis, forming the recurrences. |
| 2009 | PHINode *PN0, *PN1; |
| 2010 | Value *Start0, *Step0, *Start1, *Step1; |
| 2011 | if (!matchSimpleRecurrence(I: BO0, P&: PN0, Start&: Start0, Step&: Step0) || !PN0->hasOneUse() || |
| 2012 | !matchSimpleRecurrence(I: BO1, P&: PN1, Start&: Start1, Step&: Step1) || !PN1->hasOneUse() || |
| 2013 | PN0->getParent() != PN1->getParent()) |
| 2014 | return nullptr; |
| 2015 | |
| 2016 | assert(PN0->getNumIncomingValues() == 2 && PN1->getNumIncomingValues() == 2 && |
| 2017 | "Expected PHIs with two incoming values!" ); |
| 2018 | |
| 2019 | // Convert the start and step values to constants. |
| 2020 | auto *Init0 = dyn_cast<Constant>(Val: Start0); |
| 2021 | auto *Init1 = dyn_cast<Constant>(Val: Start1); |
| 2022 | auto *C0 = dyn_cast<Constant>(Val: Step0); |
| 2023 | auto *C1 = dyn_cast<Constant>(Val: Step1); |
| 2024 | if (!Init0 || !Init1 || !C0 || !C1) |
| 2025 | return nullptr; |
| 2026 | |
| 2027 | // Fold the recurrence constants. |
| 2028 | auto *Init = ConstantFoldBinaryInstruction(Opcode: Opc, V1: Init0, V2: Init1); |
| 2029 | auto *C = ConstantFoldBinaryInstruction(Opcode: Opc, V1: C0, V2: C1); |
| 2030 | if (!Init || !C) |
| 2031 | return nullptr; |
| 2032 | |
| 2033 | // Create the reduced PHI. |
| 2034 | auto *NewPN = PHINode::Create(Ty: PN0->getType(), NumReservedValues: PN0->getNumIncomingValues(), |
| 2035 | NameStr: "reduced.phi" ); |
| 2036 | |
| 2037 | // Create the new binary op. |
| 2038 | auto *NewBO = BinaryOperator::Create(Op: Opc, S1: NewPN, S2: C); |
| 2039 | if (Opc == Instruction::FAdd || Opc == Instruction::FMul) { |
| 2040 | // Intersect FMF flags for FADD and FMUL. |
| 2041 | FastMathFlags Intersect = BO0->getFastMathFlags() & |
| 2042 | BO1->getFastMathFlags() & BO.getFastMathFlags(); |
| 2043 | NewBO->setFastMathFlags(Intersect); |
| 2044 | } else { |
| 2045 | OverflowTracking Flags; |
| 2046 | Flags.AllKnownNonNegative = false; |
| 2047 | Flags.AllKnownNonZero = false; |
| 2048 | Flags.mergeFlags(I&: *BO0); |
| 2049 | Flags.mergeFlags(I&: *BO1); |
| 2050 | Flags.mergeFlags(I&: BO); |
| 2051 | Flags.applyFlags(I&: *NewBO); |
| 2052 | } |
| 2053 | NewBO->takeName(V: &BO); |
| 2054 | |
| 2055 | for (unsigned I = 0, E = PN0->getNumIncomingValues(); I != E; ++I) { |
| 2056 | auto *V = PN0->getIncomingValue(i: I); |
| 2057 | auto *BB = PN0->getIncomingBlock(i: I); |
| 2058 | if (V == Init0) { |
| 2059 | assert(((PN1->getIncomingValue(0) == Init1 && |
| 2060 | PN1->getIncomingBlock(0) == BB) || |
| 2061 | (PN1->getIncomingValue(1) == Init1 && |
| 2062 | PN1->getIncomingBlock(1) == BB)) && |
| 2063 | "Invalid incoming block!" ); |
| 2064 | NewPN->addIncoming(V: Init, BB); |
| 2065 | } else if (V == BO0) { |
| 2066 | assert(((PN1->getIncomingValue(0) == BO1 && |
| 2067 | PN1->getIncomingBlock(0) == BB) || |
| 2068 | (PN1->getIncomingValue(1) == BO1 && |
| 2069 | PN1->getIncomingBlock(1) == BB)) && |
| 2070 | "Invalid incoming block!" ); |
| 2071 | NewPN->addIncoming(V: NewBO, BB); |
| 2072 | } else |
| 2073 | llvm_unreachable("Unexpected incoming value!" ); |
| 2074 | } |
| 2075 | |
| 2076 | LLVM_DEBUG(dbgs() << " Combined " << *PN0 << "\n " << *BO0 |
| 2077 | << "\n with " << *PN1 << "\n " << *BO1 |
| 2078 | << '\n'); |
| 2079 | |
| 2080 | // Insert the new recurrence and remove the old (dead) ones. |
| 2081 | InsertNewInstWith(New: NewPN, Old: PN0->getIterator()); |
| 2082 | InsertNewInstWith(New: NewBO, Old: BO0->getIterator()); |
| 2083 | |
| 2084 | eraseInstFromFunction( |
| 2085 | I&: *replaceInstUsesWith(I&: *BO0, V: PoisonValue::get(T: BO0->getType()))); |
| 2086 | eraseInstFromFunction( |
| 2087 | I&: *replaceInstUsesWith(I&: *BO1, V: PoisonValue::get(T: BO1->getType()))); |
| 2088 | eraseInstFromFunction(I&: *PN0); |
| 2089 | eraseInstFromFunction(I&: *PN1); |
| 2090 | |
| 2091 | return replaceInstUsesWith(I&: BO, V: NewBO); |
| 2092 | } |
| 2093 | |
| 2094 | Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) { |
| 2095 | // Attempt to fold binary operators whose operands are simple recurrences. |
| 2096 | if (auto *NewBO = foldBinopWithRecurrence(BO)) |
| 2097 | return NewBO; |
| 2098 | |
| 2099 | // TODO: This should be similar to the incoming values check in foldOpIntoPhi: |
| 2100 | // we are guarding against replicating the binop in >1 predecessor. |
| 2101 | // This could miss matching a phi with 2 constant incoming values. |
| 2102 | auto *Phi0 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 0)); |
| 2103 | auto *Phi1 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 1)); |
| 2104 | if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() || |
| 2105 | Phi0->getNumOperands() != Phi1->getNumOperands()) |
| 2106 | return nullptr; |
| 2107 | |
| 2108 | // TODO: Remove the restriction for binop being in the same block as the phis. |
| 2109 | if (BO.getParent() != Phi0->getParent() || |
| 2110 | BO.getParent() != Phi1->getParent()) |
| 2111 | return nullptr; |
| 2112 | |
| 2113 | // Fold if there is at least one specific constant value in phi0 or phi1's |
| 2114 | // incoming values that comes from the same block and this specific constant |
| 2115 | // value can be used to do optimization for specific binary operator. |
| 2116 | // For example: |
| 2117 | // %phi0 = phi i32 [0, %bb0], [%i, %bb1] |
| 2118 | // %phi1 = phi i32 [%j, %bb0], [0, %bb1] |
| 2119 | // %add = add i32 %phi0, %phi1 |
| 2120 | // ==> |
| 2121 | // %add = phi i32 [%j, %bb0], [%i, %bb1] |
| 2122 | Constant *C = ConstantExpr::getBinOpIdentity(Opcode: BO.getOpcode(), Ty: BO.getType(), |
| 2123 | /*AllowRHSConstant*/ false); |
| 2124 | if (C) { |
| 2125 | SmallVector<Value *, 4> NewIncomingValues; |
| 2126 | auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) { |
| 2127 | auto &Phi0Use = std::get<0>(t&: T); |
| 2128 | auto &Phi1Use = std::get<1>(t&: T); |
| 2129 | if (Phi0->getIncomingBlock(U: Phi0Use) != Phi1->getIncomingBlock(U: Phi1Use)) |
| 2130 | return false; |
| 2131 | Value *Phi0UseV = Phi0Use.get(); |
| 2132 | Value *Phi1UseV = Phi1Use.get(); |
| 2133 | if (Phi0UseV == C) |
| 2134 | NewIncomingValues.push_back(Elt: Phi1UseV); |
| 2135 | else if (Phi1UseV == C) |
| 2136 | NewIncomingValues.push_back(Elt: Phi0UseV); |
| 2137 | else |
| 2138 | return false; |
| 2139 | return true; |
| 2140 | }; |
| 2141 | |
| 2142 | if (all_of(Range: zip(t: Phi0->operands(), u: Phi1->operands()), |
| 2143 | P: CanFoldIncomingValuePair)) { |
| 2144 | PHINode *NewPhi = |
| 2145 | PHINode::Create(Ty: Phi0->getType(), NumReservedValues: Phi0->getNumOperands()); |
| 2146 | assert(NewIncomingValues.size() == Phi0->getNumOperands() && |
| 2147 | "The number of collected incoming values should equal the number " |
| 2148 | "of the original PHINode operands!" ); |
| 2149 | for (unsigned I = 0; I < Phi0->getNumOperands(); I++) |
| 2150 | NewPhi->addIncoming(V: NewIncomingValues[I], BB: Phi0->getIncomingBlock(i: I)); |
| 2151 | return NewPhi; |
| 2152 | } |
| 2153 | } |
| 2154 | |
| 2155 | if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2) |
| 2156 | return nullptr; |
| 2157 | |
| 2158 | // Match a pair of incoming constants for one of the predecessor blocks. |
| 2159 | BasicBlock *ConstBB, *OtherBB; |
| 2160 | Constant *C0, *C1; |
| 2161 | if (match(V: Phi0->getIncomingValue(i: 0), P: m_ImmConstant(C&: C0))) { |
| 2162 | ConstBB = Phi0->getIncomingBlock(i: 0); |
| 2163 | OtherBB = Phi0->getIncomingBlock(i: 1); |
| 2164 | } else if (match(V: Phi0->getIncomingValue(i: 1), P: m_ImmConstant(C&: C0))) { |
| 2165 | ConstBB = Phi0->getIncomingBlock(i: 1); |
| 2166 | OtherBB = Phi0->getIncomingBlock(i: 0); |
| 2167 | } else { |
| 2168 | return nullptr; |
| 2169 | } |
| 2170 | if (!match(V: Phi1->getIncomingValueForBlock(BB: ConstBB), P: m_ImmConstant(C&: C1))) |
| 2171 | return nullptr; |
| 2172 | |
| 2173 | // The block that we are hoisting to must reach here unconditionally. |
| 2174 | // Otherwise, we could be speculatively executing an expensive or |
| 2175 | // non-speculative op. |
| 2176 | auto *PredBlockBranch = dyn_cast<BranchInst>(Val: OtherBB->getTerminator()); |
| 2177 | if (!PredBlockBranch || PredBlockBranch->isConditional() || |
| 2178 | !DT.isReachableFromEntry(A: OtherBB)) |
| 2179 | return nullptr; |
| 2180 | |
| 2181 | // TODO: This check could be tightened to only apply to binops (div/rem) that |
| 2182 | // are not safe to speculatively execute. But that could allow hoisting |
| 2183 | // potentially expensive instructions (fdiv for example). |
| 2184 | for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter) |
| 2185 | if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBIter)) |
| 2186 | return nullptr; |
| 2187 | |
| 2188 | // Fold constants for the predecessor block with constant incoming values. |
| 2189 | Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: BO.getOpcode(), LHS: C0, RHS: C1, DL); |
| 2190 | if (!NewC) |
| 2191 | return nullptr; |
| 2192 | |
| 2193 | // Make a new binop in the predecessor block with the non-constant incoming |
| 2194 | // values. |
| 2195 | Builder.SetInsertPoint(PredBlockBranch); |
| 2196 | Value *NewBO = Builder.CreateBinOp(Opc: BO.getOpcode(), |
| 2197 | LHS: Phi0->getIncomingValueForBlock(BB: OtherBB), |
| 2198 | RHS: Phi1->getIncomingValueForBlock(BB: OtherBB)); |
| 2199 | if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(Val: NewBO)) |
| 2200 | NotFoldedNewBO->copyIRFlags(V: &BO); |
| 2201 | |
| 2202 | // Replace the binop with a phi of the new values. The old phis are dead. |
| 2203 | PHINode *NewPhi = PHINode::Create(Ty: BO.getType(), NumReservedValues: 2); |
| 2204 | NewPhi->addIncoming(V: NewBO, BB: OtherBB); |
| 2205 | NewPhi->addIncoming(V: NewC, BB: ConstBB); |
| 2206 | return NewPhi; |
| 2207 | } |
| 2208 | |
| 2209 | Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) { |
| 2210 | if (!isa<Constant>(Val: I.getOperand(i_nocapture: 1))) |
| 2211 | return nullptr; |
| 2212 | |
| 2213 | if (auto *Sel = dyn_cast<SelectInst>(Val: I.getOperand(i_nocapture: 0))) { |
| 2214 | if (Instruction *NewSel = FoldOpIntoSelect(Op&: I, SI: Sel)) |
| 2215 | return NewSel; |
| 2216 | } else if (auto *PN = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: 0))) { |
| 2217 | if (Instruction *NewPhi = foldOpIntoPhi(I, PN)) |
| 2218 | return NewPhi; |
| 2219 | } |
| 2220 | return nullptr; |
| 2221 | } |
| 2222 | |
| 2223 | static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { |
| 2224 | // If this GEP has only 0 indices, it is the same pointer as |
| 2225 | // Src. If Src is not a trivial GEP too, don't combine |
| 2226 | // the indices. |
| 2227 | if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() && |
| 2228 | !Src.hasOneUse()) |
| 2229 | return false; |
| 2230 | return true; |
| 2231 | } |
| 2232 | |
| 2233 | /// Find a constant NewC that has property: |
| 2234 | /// shuffle(NewC, ShMask) = C |
| 2235 | /// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2> |
| 2236 | /// |
| 2237 | /// A 1-to-1 mapping is not required. Example: |
| 2238 | /// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison> |
| 2239 | Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C, |
| 2240 | VectorType *NewCTy) { |
| 2241 | if (isa<ScalableVectorType>(Val: NewCTy)) { |
| 2242 | Constant *Splat = C->getSplatValue(); |
| 2243 | if (!Splat) |
| 2244 | return nullptr; |
| 2245 | return ConstantVector::getSplat(EC: NewCTy->getElementCount(), Elt: Splat); |
| 2246 | } |
| 2247 | |
| 2248 | if (cast<FixedVectorType>(Val: NewCTy)->getNumElements() > |
| 2249 | cast<FixedVectorType>(Val: C->getType())->getNumElements()) |
| 2250 | return nullptr; |
| 2251 | |
| 2252 | unsigned NewCNumElts = cast<FixedVectorType>(Val: NewCTy)->getNumElements(); |
| 2253 | PoisonValue *PoisonScalar = PoisonValue::get(T: C->getType()->getScalarType()); |
| 2254 | SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar); |
| 2255 | unsigned NumElts = cast<FixedVectorType>(Val: C->getType())->getNumElements(); |
| 2256 | for (unsigned I = 0; I < NumElts; ++I) { |
| 2257 | Constant *CElt = C->getAggregateElement(Elt: I); |
| 2258 | if (ShMask[I] >= 0) { |
| 2259 | assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle" ); |
| 2260 | Constant *NewCElt = NewVecC[ShMask[I]]; |
| 2261 | // Bail out if: |
| 2262 | // 1. The constant vector contains a constant expression. |
| 2263 | // 2. The shuffle needs an element of the constant vector that can't |
| 2264 | // be mapped to a new constant vector. |
| 2265 | // 3. This is a widening shuffle that copies elements of V1 into the |
| 2266 | // extended elements (extending with poison is allowed). |
| 2267 | if (!CElt || (!isa<PoisonValue>(Val: NewCElt) && NewCElt != CElt) || |
| 2268 | I >= NewCNumElts) |
| 2269 | return nullptr; |
| 2270 | NewVecC[ShMask[I]] = CElt; |
| 2271 | } |
| 2272 | } |
| 2273 | return ConstantVector::get(V: NewVecC); |
| 2274 | } |
| 2275 | |
| 2276 | Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { |
| 2277 | if (!isa<VectorType>(Val: Inst.getType())) |
| 2278 | return nullptr; |
| 2279 | |
| 2280 | BinaryOperator::BinaryOps Opcode = Inst.getOpcode(); |
| 2281 | Value *LHS = Inst.getOperand(i_nocapture: 0), *RHS = Inst.getOperand(i_nocapture: 1); |
| 2282 | assert(cast<VectorType>(LHS->getType())->getElementCount() == |
| 2283 | cast<VectorType>(Inst.getType())->getElementCount()); |
| 2284 | assert(cast<VectorType>(RHS->getType())->getElementCount() == |
| 2285 | cast<VectorType>(Inst.getType())->getElementCount()); |
| 2286 | |
| 2287 | // If both operands of the binop are vector concatenations, then perform the |
| 2288 | // narrow binop on each pair of the source operands followed by concatenation |
| 2289 | // of the results. |
| 2290 | Value *L0, *L1, *R0, *R1; |
| 2291 | ArrayRef<int> Mask; |
| 2292 | if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: L0), v2: m_Value(V&: L1), mask: m_Mask(Mask))) && |
| 2293 | match(V: RHS, P: m_Shuffle(v1: m_Value(V&: R0), v2: m_Value(V&: R1), mask: m_SpecificMask(Mask))) && |
| 2294 | LHS->hasOneUse() && RHS->hasOneUse() && |
| 2295 | cast<ShuffleVectorInst>(Val: LHS)->isConcat() && |
| 2296 | cast<ShuffleVectorInst>(Val: RHS)->isConcat()) { |
| 2297 | // This transform does not have the speculative execution constraint as |
| 2298 | // below because the shuffle is a concatenation. The new binops are |
| 2299 | // operating on exactly the same elements as the existing binop. |
| 2300 | // TODO: We could ease the mask requirement to allow different undef lanes, |
| 2301 | // but that requires an analysis of the binop-with-undef output value. |
| 2302 | Value *NewBO0 = Builder.CreateBinOp(Opc: Opcode, LHS: L0, RHS: R0); |
| 2303 | if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO0)) |
| 2304 | BO->copyIRFlags(V: &Inst); |
| 2305 | Value *NewBO1 = Builder.CreateBinOp(Opc: Opcode, LHS: L1, RHS: R1); |
| 2306 | if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO1)) |
| 2307 | BO->copyIRFlags(V: &Inst); |
| 2308 | return new ShuffleVectorInst(NewBO0, NewBO1, Mask); |
| 2309 | } |
| 2310 | |
| 2311 | auto createBinOpReverse = [&](Value *X, Value *Y) { |
| 2312 | Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName()); |
| 2313 | if (auto *BO = dyn_cast<BinaryOperator>(Val: V)) |
| 2314 | BO->copyIRFlags(V: &Inst); |
| 2315 | Module *M = Inst.getModule(); |
| 2316 | Function *F = Intrinsic::getOrInsertDeclaration( |
| 2317 | M, id: Intrinsic::vector_reverse, Tys: V->getType()); |
| 2318 | return CallInst::Create(Func: F, Args: V); |
| 2319 | }; |
| 2320 | |
| 2321 | // NOTE: Reverse shuffles don't require the speculative execution protection |
| 2322 | // below because they don't affect which lanes take part in the computation. |
| 2323 | |
| 2324 | Value *V1, *V2; |
| 2325 | if (match(V: LHS, P: m_VecReverse(Op0: m_Value(V&: V1)))) { |
| 2326 | // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2)) |
| 2327 | if (match(V: RHS, P: m_VecReverse(Op0: m_Value(V&: V2))) && |
| 2328 | (LHS->hasOneUse() || RHS->hasOneUse() || |
| 2329 | (LHS == RHS && LHS->hasNUses(N: 2)))) |
| 2330 | return createBinOpReverse(V1, V2); |
| 2331 | |
| 2332 | // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat)) |
| 2333 | if (LHS->hasOneUse() && isSplatValue(V: RHS)) |
| 2334 | return createBinOpReverse(V1, RHS); |
| 2335 | } |
| 2336 | // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2)) |
| 2337 | else if (isSplatValue(V: LHS) && match(V: RHS, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value(V&: V2))))) |
| 2338 | return createBinOpReverse(LHS, V2); |
| 2339 | |
| 2340 | auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) { |
| 2341 | Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName()); |
| 2342 | if (auto *BO = dyn_cast<BinaryOperator>(Val: V)) |
| 2343 | BO->copyIRFlags(V: &Inst); |
| 2344 | |
| 2345 | ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount(); |
| 2346 | Value *AllTrueMask = Builder.CreateVectorSplat(EC, V: Builder.getTrue()); |
| 2347 | Module *M = Inst.getModule(); |
| 2348 | Function *F = Intrinsic::getOrInsertDeclaration( |
| 2349 | M, id: Intrinsic::experimental_vp_reverse, Tys: V->getType()); |
| 2350 | return CallInst::Create(Func: F, Args: {V, AllTrueMask, EVL}); |
| 2351 | }; |
| 2352 | |
| 2353 | Value *EVL; |
| 2354 | if (match(V: LHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>( |
| 2355 | Op0: m_Value(V&: V1), Op1: m_AllOnes(), Op2: m_Value(V&: EVL)))) { |
| 2356 | // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2)) |
| 2357 | if (match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>( |
| 2358 | Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Specific(V: EVL))) && |
| 2359 | (LHS->hasOneUse() || RHS->hasOneUse() || |
| 2360 | (LHS == RHS && LHS->hasNUses(N: 2)))) |
| 2361 | return createBinOpVPReverse(V1, V2, EVL); |
| 2362 | |
| 2363 | // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat)) |
| 2364 | if (LHS->hasOneUse() && isSplatValue(V: RHS)) |
| 2365 | return createBinOpVPReverse(V1, RHS, EVL); |
| 2366 | } |
| 2367 | // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2)) |
| 2368 | else if (isSplatValue(V: LHS) && |
| 2369 | match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>( |
| 2370 | Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Value(V&: EVL)))) |
| 2371 | return createBinOpVPReverse(LHS, V2, EVL); |
| 2372 | |
| 2373 | // It may not be safe to reorder shuffles and things like div, urem, etc. |
| 2374 | // because we may trap when executing those ops on unknown vector elements. |
| 2375 | // See PR20059. |
| 2376 | if (!isSafeToSpeculativelyExecuteWithVariableReplaced(I: &Inst)) |
| 2377 | return nullptr; |
| 2378 | |
| 2379 | auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) { |
| 2380 | Value *XY = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y); |
| 2381 | if (auto *BO = dyn_cast<BinaryOperator>(Val: XY)) |
| 2382 | BO->copyIRFlags(V: &Inst); |
| 2383 | return new ShuffleVectorInst(XY, M); |
| 2384 | }; |
| 2385 | |
| 2386 | // If both arguments of the binary operation are shuffles that use the same |
| 2387 | // mask and shuffle within a single vector, move the shuffle after the binop. |
| 2388 | if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), mask: m_Mask(Mask))) && |
| 2389 | match(V: RHS, P: m_Shuffle(v1: m_Value(V&: V2), v2: m_Poison(), mask: m_SpecificMask(Mask))) && |
| 2390 | V1->getType() == V2->getType() && |
| 2391 | (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) { |
| 2392 | // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask) |
| 2393 | return createBinOpShuffle(V1, V2, Mask); |
| 2394 | } |
| 2395 | |
| 2396 | // If both arguments of a commutative binop are select-shuffles that use the |
| 2397 | // same mask with commuted operands, the shuffles are unnecessary. |
| 2398 | if (Inst.isCommutative() && |
| 2399 | match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Value(V&: V2), mask: m_Mask(Mask))) && |
| 2400 | match(V: RHS, |
| 2401 | P: m_Shuffle(v1: m_Specific(V: V2), v2: m_Specific(V: V1), mask: m_SpecificMask(Mask)))) { |
| 2402 | auto *LShuf = cast<ShuffleVectorInst>(Val: LHS); |
| 2403 | auto *RShuf = cast<ShuffleVectorInst>(Val: RHS); |
| 2404 | // TODO: Allow shuffles that contain undefs in the mask? |
| 2405 | // That is legal, but it reduces undef knowledge. |
| 2406 | // TODO: Allow arbitrary shuffles by shuffling after binop? |
| 2407 | // That might be legal, but we have to deal with poison. |
| 2408 | if (LShuf->isSelect() && |
| 2409 | !is_contained(Range: LShuf->getShuffleMask(), Element: PoisonMaskElem) && |
| 2410 | RShuf->isSelect() && |
| 2411 | !is_contained(Range: RShuf->getShuffleMask(), Element: PoisonMaskElem)) { |
| 2412 | // Example: |
| 2413 | // LHS = shuffle V1, V2, <0, 5, 6, 3> |
| 2414 | // RHS = shuffle V2, V1, <0, 5, 6, 3> |
| 2415 | // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2 |
| 2416 | Instruction *NewBO = BinaryOperator::Create(Op: Opcode, S1: V1, S2: V2); |
| 2417 | NewBO->copyIRFlags(V: &Inst); |
| 2418 | return NewBO; |
| 2419 | } |
| 2420 | } |
| 2421 | |
| 2422 | // If one argument is a shuffle within one vector and the other is a constant, |
| 2423 | // try moving the shuffle after the binary operation. This canonicalization |
| 2424 | // intends to move shuffles closer to other shuffles and binops closer to |
| 2425 | // other binops, so they can be folded. It may also enable demanded elements |
| 2426 | // transforms. |
| 2427 | Constant *C; |
| 2428 | if (match(V: &Inst, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), |
| 2429 | mask: m_Mask(Mask))), |
| 2430 | R: m_ImmConstant(C)))) { |
| 2431 | assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() && |
| 2432 | "Shuffle should not change scalar type" ); |
| 2433 | |
| 2434 | bool ConstOp1 = isa<Constant>(Val: RHS); |
| 2435 | if (Constant *NewC = |
| 2436 | unshuffleConstant(ShMask: Mask, C, NewCTy: cast<VectorType>(Val: V1->getType()))) { |
| 2437 | // For fixed vectors, lanes of NewC not used by the shuffle will be poison |
| 2438 | // which will cause UB for div/rem. Mask them with a safe constant. |
| 2439 | if (isa<FixedVectorType>(Val: V1->getType()) && Inst.isIntDivRem()) |
| 2440 | NewC = getSafeVectorConstantForBinop(Opcode, In: NewC, IsRHSConstant: ConstOp1); |
| 2441 | |
| 2442 | // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask) |
| 2443 | // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask) |
| 2444 | Value *NewLHS = ConstOp1 ? V1 : NewC; |
| 2445 | Value *NewRHS = ConstOp1 ? NewC : V1; |
| 2446 | return createBinOpShuffle(NewLHS, NewRHS, Mask); |
| 2447 | } |
| 2448 | } |
| 2449 | |
| 2450 | // Try to reassociate to sink a splat shuffle after a binary operation. |
| 2451 | if (Inst.isAssociative() && Inst.isCommutative()) { |
| 2452 | // Canonicalize shuffle operand as LHS. |
| 2453 | if (isa<ShuffleVectorInst>(Val: RHS)) |
| 2454 | std::swap(a&: LHS, b&: RHS); |
| 2455 | |
| 2456 | Value *X; |
| 2457 | ArrayRef<int> MaskC; |
| 2458 | int SplatIndex; |
| 2459 | Value *Y, *OtherOp; |
| 2460 | if (!match(V: LHS, |
| 2461 | P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask(MaskC)))) || |
| 2462 | !match(Mask: MaskC, P: m_SplatOrPoisonMask(SplatIndex)) || |
| 2463 | X->getType() != Inst.getType() || |
| 2464 | !match(V: RHS, P: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: Y), R: m_Value(V&: OtherOp))))) |
| 2465 | return nullptr; |
| 2466 | |
| 2467 | // FIXME: This may not be safe if the analysis allows undef elements. By |
| 2468 | // moving 'Y' before the splat shuffle, we are implicitly assuming |
| 2469 | // that it is not undef/poison at the splat index. |
| 2470 | if (isSplatValue(V: OtherOp, Index: SplatIndex)) { |
| 2471 | std::swap(a&: Y, b&: OtherOp); |
| 2472 | } else if (!isSplatValue(V: Y, Index: SplatIndex)) { |
| 2473 | return nullptr; |
| 2474 | } |
| 2475 | |
| 2476 | // X and Y are splatted values, so perform the binary operation on those |
| 2477 | // values followed by a splat followed by the 2nd binary operation: |
| 2478 | // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp |
| 2479 | Value *NewBO = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y); |
| 2480 | SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex); |
| 2481 | Value *NewSplat = Builder.CreateShuffleVector(V: NewBO, Mask: NewMask); |
| 2482 | Instruction *R = BinaryOperator::Create(Op: Opcode, S1: NewSplat, S2: OtherOp); |
| 2483 | |
| 2484 | // Intersect FMF on both new binops. Other (poison-generating) flags are |
| 2485 | // dropped to be safe. |
| 2486 | if (isa<FPMathOperator>(Val: R)) { |
| 2487 | R->copyFastMathFlags(I: &Inst); |
| 2488 | R->andIRFlags(V: RHS); |
| 2489 | } |
| 2490 | if (auto *NewInstBO = dyn_cast<BinaryOperator>(Val: NewBO)) |
| 2491 | NewInstBO->copyIRFlags(V: R); |
| 2492 | return R; |
| 2493 | } |
| 2494 | |
| 2495 | return nullptr; |
| 2496 | } |
| 2497 | |
| 2498 | /// Try to narrow the width of a binop if at least 1 operand is an extend of |
| 2499 | /// of a value. This requires a potentially expensive known bits check to make |
| 2500 | /// sure the narrow op does not overflow. |
| 2501 | Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) { |
| 2502 | // We need at least one extended operand. |
| 2503 | Value *Op0 = BO.getOperand(i_nocapture: 0), *Op1 = BO.getOperand(i_nocapture: 1); |
| 2504 | |
| 2505 | // If this is a sub, we swap the operands since we always want an extension |
| 2506 | // on the RHS. The LHS can be an extension or a constant. |
| 2507 | if (BO.getOpcode() == Instruction::Sub) |
| 2508 | std::swap(a&: Op0, b&: Op1); |
| 2509 | |
| 2510 | Value *X; |
| 2511 | bool IsSext = match(V: Op0, P: m_SExt(Op: m_Value(V&: X))); |
| 2512 | if (!IsSext && !match(V: Op0, P: m_ZExt(Op: m_Value(V&: X)))) |
| 2513 | return nullptr; |
| 2514 | |
| 2515 | // If both operands are the same extension from the same source type and we |
| 2516 | // can eliminate at least one (hasOneUse), this might work. |
| 2517 | CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt; |
| 2518 | Value *Y; |
| 2519 | if (!(match(V: Op1, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) && X->getType() == Y->getType() && |
| 2520 | cast<Operator>(Val: Op1)->getOpcode() == CastOpc && |
| 2521 | (Op0->hasOneUse() || Op1->hasOneUse()))) { |
| 2522 | // If that did not match, see if we have a suitable constant operand. |
| 2523 | // Truncating and extending must produce the same constant. |
| 2524 | Constant *WideC; |
| 2525 | if (!Op0->hasOneUse() || !match(V: Op1, P: m_Constant(C&: WideC))) |
| 2526 | return nullptr; |
| 2527 | Constant *NarrowC = getLosslessTrunc(C: WideC, TruncTy: X->getType(), ExtOp: CastOpc); |
| 2528 | if (!NarrowC) |
| 2529 | return nullptr; |
| 2530 | Y = NarrowC; |
| 2531 | } |
| 2532 | |
| 2533 | // Swap back now that we found our operands. |
| 2534 | if (BO.getOpcode() == Instruction::Sub) |
| 2535 | std::swap(a&: X, b&: Y); |
| 2536 | |
| 2537 | // Both operands have narrow versions. Last step: the math must not overflow |
| 2538 | // in the narrow width. |
| 2539 | if (!willNotOverflow(Opcode: BO.getOpcode(), LHS: X, RHS: Y, CxtI: BO, IsSigned: IsSext)) |
| 2540 | return nullptr; |
| 2541 | |
| 2542 | // bo (ext X), (ext Y) --> ext (bo X, Y) |
| 2543 | // bo (ext X), C --> ext (bo X, C') |
| 2544 | Value *NarrowBO = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: X, RHS: Y, Name: "narrow" ); |
| 2545 | if (auto *NewBinOp = dyn_cast<BinaryOperator>(Val: NarrowBO)) { |
| 2546 | if (IsSext) |
| 2547 | NewBinOp->setHasNoSignedWrap(); |
| 2548 | else |
| 2549 | NewBinOp->setHasNoUnsignedWrap(); |
| 2550 | } |
| 2551 | return CastInst::Create(CastOpc, S: NarrowBO, Ty: BO.getType()); |
| 2552 | } |
| 2553 | |
| 2554 | /// Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y)) |
| 2555 | /// transform. |
| 2556 | static GEPNoWrapFlags getMergedGEPNoWrapFlags(GEPOperator &GEP1, |
| 2557 | GEPOperator &GEP2) { |
| 2558 | return GEP1.getNoWrapFlags().intersectForOffsetAdd(Other: GEP2.getNoWrapFlags()); |
| 2559 | } |
| 2560 | |
| 2561 | /// Thread a GEP operation with constant indices through the constant true/false |
| 2562 | /// arms of a select. |
| 2563 | static Instruction *foldSelectGEP(GetElementPtrInst &GEP, |
| 2564 | InstCombiner::BuilderTy &Builder) { |
| 2565 | if (!GEP.hasAllConstantIndices()) |
| 2566 | return nullptr; |
| 2567 | |
| 2568 | Instruction *Sel; |
| 2569 | Value *Cond; |
| 2570 | Constant *TrueC, *FalseC; |
| 2571 | if (!match(V: GEP.getPointerOperand(), P: m_Instruction(I&: Sel)) || |
| 2572 | !match(V: Sel, |
| 2573 | P: m_Select(C: m_Value(V&: Cond), L: m_Constant(C&: TrueC), R: m_Constant(C&: FalseC)))) |
| 2574 | return nullptr; |
| 2575 | |
| 2576 | // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC' |
| 2577 | // Propagate 'inbounds' and metadata from existing instructions. |
| 2578 | // Note: using IRBuilder to create the constants for efficiency. |
| 2579 | SmallVector<Value *, 4> IndexC(GEP.indices()); |
| 2580 | GEPNoWrapFlags NW = GEP.getNoWrapFlags(); |
| 2581 | Type *Ty = GEP.getSourceElementType(); |
| 2582 | Value *NewTrueC = Builder.CreateGEP(Ty, Ptr: TrueC, IdxList: IndexC, Name: "" , NW); |
| 2583 | Value *NewFalseC = Builder.CreateGEP(Ty, Ptr: FalseC, IdxList: IndexC, Name: "" , NW); |
| 2584 | return SelectInst::Create(C: Cond, S1: NewTrueC, S2: NewFalseC, NameStr: "" , InsertBefore: nullptr, MDFrom: Sel); |
| 2585 | } |
| 2586 | |
| 2587 | // Canonicalization: |
| 2588 | // gep T, (gep i8, base, C1), (Index + C2) into |
| 2589 | // gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index |
| 2590 | static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, |
| 2591 | GEPOperator *Src, |
| 2592 | InstCombinerImpl &IC) { |
| 2593 | if (GEP.getNumIndices() != 1) |
| 2594 | return nullptr; |
| 2595 | auto &DL = IC.getDataLayout(); |
| 2596 | Value *Base; |
| 2597 | const APInt *C1; |
| 2598 | if (!match(V: Src, P: m_PtrAdd(PointerOp: m_Value(V&: Base), OffsetOp: m_APInt(Res&: C1)))) |
| 2599 | return nullptr; |
| 2600 | Value *VarIndex; |
| 2601 | const APInt *C2; |
| 2602 | Type *PtrTy = Src->getType()->getScalarType(); |
| 2603 | unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(Ty: PtrTy); |
| 2604 | if (!match(V: GEP.getOperand(i_nocapture: 1), P: m_AddLike(L: m_Value(V&: VarIndex), R: m_APInt(Res&: C2)))) |
| 2605 | return nullptr; |
| 2606 | if (C1->getBitWidth() != IndexSizeInBits || |
| 2607 | C2->getBitWidth() != IndexSizeInBits) |
| 2608 | return nullptr; |
| 2609 | Type *BaseType = GEP.getSourceElementType(); |
| 2610 | if (isa<ScalableVectorType>(Val: BaseType)) |
| 2611 | return nullptr; |
| 2612 | APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(Ty: BaseType)); |
| 2613 | APInt NewOffset = TypeSize * *C2 + *C1; |
| 2614 | if (NewOffset.isZero() || |
| 2615 | (Src->hasOneUse() && GEP.getOperand(i_nocapture: 1)->hasOneUse())) { |
| 2616 | Value *GEPConst = |
| 2617 | IC.Builder.CreatePtrAdd(Ptr: Base, Offset: IC.Builder.getInt(AI: NewOffset)); |
| 2618 | return GetElementPtrInst::Create(PointeeType: BaseType, Ptr: GEPConst, IdxList: VarIndex); |
| 2619 | } |
| 2620 | |
| 2621 | return nullptr; |
| 2622 | } |
| 2623 | |
| 2624 | Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, |
| 2625 | GEPOperator *Src) { |
| 2626 | // Combine Indices - If the source pointer to this getelementptr instruction |
| 2627 | // is a getelementptr instruction with matching element type, combine the |
| 2628 | // indices of the two getelementptr instructions into a single instruction. |
| 2629 | if (!shouldMergeGEPs(GEP&: *cast<GEPOperator>(Val: &GEP), Src&: *Src)) |
| 2630 | return nullptr; |
| 2631 | |
| 2632 | if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, IC&: *this)) |
| 2633 | return I; |
| 2634 | |
| 2635 | // For constant GEPs, use a more general offset-based folding approach. |
| 2636 | Type *PtrTy = Src->getType()->getScalarType(); |
| 2637 | if (GEP.hasAllConstantIndices() && |
| 2638 | (Src->hasOneUse() || Src->hasAllConstantIndices())) { |
| 2639 | // Split Src into a variable part and a constant suffix. |
| 2640 | gep_type_iterator GTI = gep_type_begin(GEP: *Src); |
| 2641 | Type *BaseType = GTI.getIndexedType(); |
| 2642 | bool IsFirstType = true; |
| 2643 | unsigned NumVarIndices = 0; |
| 2644 | for (auto Pair : enumerate(First: Src->indices())) { |
| 2645 | if (!isa<ConstantInt>(Val: Pair.value())) { |
| 2646 | BaseType = GTI.getIndexedType(); |
| 2647 | IsFirstType = false; |
| 2648 | NumVarIndices = Pair.index() + 1; |
| 2649 | } |
| 2650 | ++GTI; |
| 2651 | } |
| 2652 | |
| 2653 | // Determine the offset for the constant suffix of Src. |
| 2654 | APInt Offset(DL.getIndexTypeSizeInBits(Ty: PtrTy), 0); |
| 2655 | if (NumVarIndices != Src->getNumIndices()) { |
| 2656 | // FIXME: getIndexedOffsetInType() does not handled scalable vectors. |
| 2657 | if (BaseType->isScalableTy()) |
| 2658 | return nullptr; |
| 2659 | |
| 2660 | SmallVector<Value *> ConstantIndices; |
| 2661 | if (!IsFirstType) |
| 2662 | ConstantIndices.push_back( |
| 2663 | Elt: Constant::getNullValue(Ty: Type::getInt32Ty(C&: GEP.getContext()))); |
| 2664 | append_range(C&: ConstantIndices, R: drop_begin(RangeOrContainer: Src->indices(), N: NumVarIndices)); |
| 2665 | Offset += DL.getIndexedOffsetInType(ElemTy: BaseType, Indices: ConstantIndices); |
| 2666 | } |
| 2667 | |
| 2668 | // Add the offset for GEP (which is fully constant). |
| 2669 | if (!GEP.accumulateConstantOffset(DL, Offset)) |
| 2670 | return nullptr; |
| 2671 | |
| 2672 | // Convert the total offset back into indices. |
| 2673 | SmallVector<APInt> ConstIndices = |
| 2674 | DL.getGEPIndicesForOffset(ElemTy&: BaseType, Offset); |
| 2675 | if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) |
| 2676 | return nullptr; |
| 2677 | |
| 2678 | GEPNoWrapFlags NW = getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP)); |
| 2679 | SmallVector<Value *> Indices( |
| 2680 | drop_end(RangeOrContainer: Src->indices(), N: Src->getNumIndices() - NumVarIndices)); |
| 2681 | for (const APInt &Idx : drop_begin(RangeOrContainer&: ConstIndices, N: !IsFirstType)) { |
| 2682 | Indices.push_back(Elt: ConstantInt::get(Context&: GEP.getContext(), V: Idx)); |
| 2683 | // Even if the total offset is inbounds, we may end up representing it |
| 2684 | // by first performing a larger negative offset, and then a smaller |
| 2685 | // positive one. The large negative offset might go out of bounds. Only |
| 2686 | // preserve inbounds if all signs are the same. |
| 2687 | if (Idx.isNonNegative() != ConstIndices[0].isNonNegative()) |
| 2688 | NW = NW.withoutNoUnsignedSignedWrap(); |
| 2689 | if (!Idx.isNonNegative()) |
| 2690 | NW = NW.withoutNoUnsignedWrap(); |
| 2691 | } |
| 2692 | |
| 2693 | return replaceInstUsesWith( |
| 2694 | I&: GEP, V: Builder.CreateGEP(Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: 0), |
| 2695 | IdxList: Indices, Name: "" , NW)); |
| 2696 | } |
| 2697 | |
| 2698 | if (Src->getResultElementType() != GEP.getSourceElementType()) |
| 2699 | return nullptr; |
| 2700 | |
| 2701 | SmallVector<Value*, 8> Indices; |
| 2702 | |
| 2703 | // Find out whether the last index in the source GEP is a sequential idx. |
| 2704 | bool EndsWithSequential = false; |
| 2705 | for (gep_type_iterator I = gep_type_begin(GEP: *Src), E = gep_type_end(GEP: *Src); |
| 2706 | I != E; ++I) |
| 2707 | EndsWithSequential = I.isSequential(); |
| 2708 | |
| 2709 | // Can we combine the two pointer arithmetics offsets? |
| 2710 | if (EndsWithSequential) { |
| 2711 | // Replace: gep (gep %P, long B), long A, ... |
| 2712 | // With: T = long A+B; gep %P, T, ... |
| 2713 | Value *SO1 = Src->getOperand(i_nocapture: Src->getNumOperands()-1); |
| 2714 | Value *GO1 = GEP.getOperand(i_nocapture: 1); |
| 2715 | |
| 2716 | // If they aren't the same type, then the input hasn't been processed |
| 2717 | // by the loop above yet (which canonicalizes sequential index types to |
| 2718 | // intptr_t). Just avoid transforming this until the input has been |
| 2719 | // normalized. |
| 2720 | if (SO1->getType() != GO1->getType()) |
| 2721 | return nullptr; |
| 2722 | |
| 2723 | Value *Sum = |
| 2724 | simplifyAddInst(LHS: GO1, RHS: SO1, IsNSW: false, IsNUW: false, Q: SQ.getWithInstruction(I: &GEP)); |
| 2725 | // Only do the combine when we are sure the cost after the |
| 2726 | // merge is never more than that before the merge. |
| 2727 | if (Sum == nullptr) |
| 2728 | return nullptr; |
| 2729 | |
| 2730 | Indices.append(in_start: Src->op_begin()+1, in_end: Src->op_end()-1); |
| 2731 | Indices.push_back(Elt: Sum); |
| 2732 | Indices.append(in_start: GEP.op_begin()+2, in_end: GEP.op_end()); |
| 2733 | } else if (isa<Constant>(Val: *GEP.idx_begin()) && |
| 2734 | cast<Constant>(Val&: *GEP.idx_begin())->isNullValue() && |
| 2735 | Src->getNumOperands() != 1) { |
| 2736 | // Otherwise we can do the fold if the first index of the GEP is a zero |
| 2737 | Indices.append(in_start: Src->op_begin()+1, in_end: Src->op_end()); |
| 2738 | Indices.append(in_start: GEP.idx_begin()+1, in_end: GEP.idx_end()); |
| 2739 | } |
| 2740 | |
| 2741 | if (!Indices.empty()) |
| 2742 | return replaceInstUsesWith( |
| 2743 | I&: GEP, V: Builder.CreateGEP( |
| 2744 | Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: 0), IdxList: Indices, Name: "" , |
| 2745 | NW: getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP)))); |
| 2746 | |
| 2747 | return nullptr; |
| 2748 | } |
| 2749 | |
| 2750 | Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, |
| 2751 | BuilderTy *Builder, |
| 2752 | bool &DoesConsume, unsigned Depth) { |
| 2753 | static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1)); |
| 2754 | // ~(~(X)) -> X. |
| 2755 | Value *A, *B; |
| 2756 | if (match(V, P: m_Not(V: m_Value(V&: A)))) { |
| 2757 | DoesConsume = true; |
| 2758 | return A; |
| 2759 | } |
| 2760 | |
| 2761 | Constant *C; |
| 2762 | // Constants can be considered to be not'ed values. |
| 2763 | if (match(V, P: m_ImmConstant(C))) |
| 2764 | return ConstantExpr::getNot(C); |
| 2765 | |
| 2766 | if (Depth++ >= MaxAnalysisRecursionDepth) |
| 2767 | return nullptr; |
| 2768 | |
| 2769 | // The rest of the cases require that we invert all uses so don't bother |
| 2770 | // doing the analysis if we know we can't use the result. |
| 2771 | if (!WillInvertAllUses) |
| 2772 | return nullptr; |
| 2773 | |
| 2774 | // Compares can be inverted if all of their uses are being modified to use |
| 2775 | // the ~V. |
| 2776 | if (auto *I = dyn_cast<CmpInst>(Val: V)) { |
| 2777 | if (Builder != nullptr) |
| 2778 | return Builder->CreateCmp(Pred: I->getInversePredicate(), LHS: I->getOperand(i_nocapture: 0), |
| 2779 | RHS: I->getOperand(i_nocapture: 1)); |
| 2780 | return NonNull; |
| 2781 | } |
| 2782 | |
| 2783 | // If `V` is of the form `A + B` then `-1 - V` can be folded into |
| 2784 | // `(-1 - B) - A` if we are willing to invert all of the uses. |
| 2785 | if (match(V, P: m_Add(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
| 2786 | if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder, |
| 2787 | DoesConsume, Depth)) |
| 2788 | return Builder ? Builder->CreateSub(LHS: BV, RHS: A) : NonNull; |
| 2789 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
| 2790 | DoesConsume, Depth)) |
| 2791 | return Builder ? Builder->CreateSub(LHS: AV, RHS: B) : NonNull; |
| 2792 | return nullptr; |
| 2793 | } |
| 2794 | |
| 2795 | // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded |
| 2796 | // into `A ^ B` if we are willing to invert all of the uses. |
| 2797 | if (match(V, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
| 2798 | if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder, |
| 2799 | DoesConsume, Depth)) |
| 2800 | return Builder ? Builder->CreateXor(LHS: A, RHS: BV) : NonNull; |
| 2801 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
| 2802 | DoesConsume, Depth)) |
| 2803 | return Builder ? Builder->CreateXor(LHS: AV, RHS: B) : NonNull; |
| 2804 | return nullptr; |
| 2805 | } |
| 2806 | |
| 2807 | // If `V` is of the form `B - A` then `-1 - V` can be folded into |
| 2808 | // `A + (-1 - B)` if we are willing to invert all of the uses. |
| 2809 | if (match(V, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
| 2810 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
| 2811 | DoesConsume, Depth)) |
| 2812 | return Builder ? Builder->CreateAdd(LHS: AV, RHS: B) : NonNull; |
| 2813 | return nullptr; |
| 2814 | } |
| 2815 | |
| 2816 | // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded |
| 2817 | // into `A s>> B` if we are willing to invert all of the uses. |
| 2818 | if (match(V, P: m_AShr(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
| 2819 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
| 2820 | DoesConsume, Depth)) |
| 2821 | return Builder ? Builder->CreateAShr(LHS: AV, RHS: B) : NonNull; |
| 2822 | return nullptr; |
| 2823 | } |
| 2824 | |
| 2825 | Value *Cond; |
| 2826 | // LogicOps are special in that we canonicalize them at the cost of an |
| 2827 | // instruction. |
| 2828 | bool IsSelect = match(V, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: A), R: m_Value(V&: B))) && |
| 2829 | !shouldAvoidAbsorbingNotIntoSelect(SI: *cast<SelectInst>(Val: V)); |
| 2830 | // Selects/min/max with invertible operands are freely invertible |
| 2831 | if (IsSelect || match(V, P: m_MaxOrMin(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
| 2832 | bool LocalDoesConsume = DoesConsume; |
| 2833 | if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder*/ nullptr, |
| 2834 | DoesConsume&: LocalDoesConsume, Depth)) |
| 2835 | return nullptr; |
| 2836 | if (Value *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
| 2837 | DoesConsume&: LocalDoesConsume, Depth)) { |
| 2838 | DoesConsume = LocalDoesConsume; |
| 2839 | if (Builder != nullptr) { |
| 2840 | Value *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder, |
| 2841 | DoesConsume, Depth); |
| 2842 | assert(NotB != nullptr && |
| 2843 | "Unable to build inverted value for known freely invertable op" ); |
| 2844 | if (auto *II = dyn_cast<IntrinsicInst>(Val: V)) |
| 2845 | return Builder->CreateBinaryIntrinsic( |
| 2846 | ID: getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()), LHS: NotA, RHS: NotB); |
| 2847 | return Builder->CreateSelect(C: Cond, True: NotA, False: NotB); |
| 2848 | } |
| 2849 | return NonNull; |
| 2850 | } |
| 2851 | } |
| 2852 | |
| 2853 | if (PHINode *PN = dyn_cast<PHINode>(Val: V)) { |
| 2854 | bool LocalDoesConsume = DoesConsume; |
| 2855 | SmallVector<std::pair<Value *, BasicBlock *>, 8> IncomingValues; |
| 2856 | for (Use &U : PN->operands()) { |
| 2857 | BasicBlock *IncomingBlock = PN->getIncomingBlock(U); |
| 2858 | Value *NewIncomingVal = getFreelyInvertedImpl( |
| 2859 | V: U.get(), /*WillInvertAllUses=*/false, |
| 2860 | /*Builder=*/nullptr, DoesConsume&: LocalDoesConsume, Depth: MaxAnalysisRecursionDepth - 1); |
| 2861 | if (NewIncomingVal == nullptr) |
| 2862 | return nullptr; |
| 2863 | // Make sure that we can safely erase the original PHI node. |
| 2864 | if (NewIncomingVal == V) |
| 2865 | return nullptr; |
| 2866 | if (Builder != nullptr) |
| 2867 | IncomingValues.emplace_back(Args&: NewIncomingVal, Args&: IncomingBlock); |
| 2868 | } |
| 2869 | |
| 2870 | DoesConsume = LocalDoesConsume; |
| 2871 | if (Builder != nullptr) { |
| 2872 | IRBuilderBase::InsertPointGuard Guard(*Builder); |
| 2873 | Builder->SetInsertPoint(PN); |
| 2874 | PHINode *NewPN = |
| 2875 | Builder->CreatePHI(Ty: PN->getType(), NumReservedValues: PN->getNumIncomingValues()); |
| 2876 | for (auto [Val, Pred] : IncomingValues) |
| 2877 | NewPN->addIncoming(V: Val, BB: Pred); |
| 2878 | return NewPN; |
| 2879 | } |
| 2880 | return NonNull; |
| 2881 | } |
| 2882 | |
| 2883 | if (match(V, P: m_SExtLike(Op: m_Value(V&: A)))) { |
| 2884 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
| 2885 | DoesConsume, Depth)) |
| 2886 | return Builder ? Builder->CreateSExt(V: AV, DestTy: V->getType()) : NonNull; |
| 2887 | return nullptr; |
| 2888 | } |
| 2889 | |
| 2890 | if (match(V, P: m_Trunc(Op: m_Value(V&: A)))) { |
| 2891 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
| 2892 | DoesConsume, Depth)) |
| 2893 | return Builder ? Builder->CreateTrunc(V: AV, DestTy: V->getType()) : NonNull; |
| 2894 | return nullptr; |
| 2895 | } |
| 2896 | |
| 2897 | // De Morgan's Laws: |
| 2898 | // (~(A | B)) -> (~A & ~B) |
| 2899 | // (~(A & B)) -> (~A | ~B) |
| 2900 | auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode, |
| 2901 | bool IsLogical, Value *A, |
| 2902 | Value *B) -> Value * { |
| 2903 | bool LocalDoesConsume = DoesConsume; |
| 2904 | if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder=*/nullptr, |
| 2905 | DoesConsume&: LocalDoesConsume, Depth)) |
| 2906 | return nullptr; |
| 2907 | if (auto *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
| 2908 | DoesConsume&: LocalDoesConsume, Depth)) { |
| 2909 | auto *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder, |
| 2910 | DoesConsume&: LocalDoesConsume, Depth); |
| 2911 | DoesConsume = LocalDoesConsume; |
| 2912 | if (IsLogical) |
| 2913 | return Builder ? Builder->CreateLogicalOp(Opc: Opcode, Cond1: NotA, Cond2: NotB) : NonNull; |
| 2914 | return Builder ? Builder->CreateBinOp(Opc: Opcode, LHS: NotA, RHS: NotB) : NonNull; |
| 2915 | } |
| 2916 | |
| 2917 | return nullptr; |
| 2918 | }; |
| 2919 | |
| 2920 | if (match(V, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B)))) |
| 2921 | return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A, |
| 2922 | B); |
| 2923 | |
| 2924 | if (match(V, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B)))) |
| 2925 | return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A, |
| 2926 | B); |
| 2927 | |
| 2928 | if (match(V, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B)))) |
| 2929 | return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A, |
| 2930 | B); |
| 2931 | |
| 2932 | if (match(V, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) |
| 2933 | return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A, |
| 2934 | B); |
| 2935 | |
| 2936 | return nullptr; |
| 2937 | } |
| 2938 | |
| 2939 | /// Return true if we should canonicalize the gep to an i8 ptradd. |
| 2940 | static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) { |
| 2941 | Value *PtrOp = GEP.getOperand(i_nocapture: 0); |
| 2942 | Type *GEPEltType = GEP.getSourceElementType(); |
| 2943 | if (GEPEltType->isIntegerTy(Bitwidth: 8)) |
| 2944 | return false; |
| 2945 | |
| 2946 | // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale |
| 2947 | // intrinsic. This has better support in BasicAA. |
| 2948 | if (GEPEltType->isScalableTy()) |
| 2949 | return true; |
| 2950 | |
| 2951 | // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies |
| 2952 | // together. |
| 2953 | if (GEP.getNumIndices() == 1 && |
| 2954 | match(V: GEP.getOperand(i_nocapture: 1), |
| 2955 | P: m_OneUse(SubPattern: m_CombineOr(L: m_Mul(L: m_Value(), R: m_ConstantInt()), |
| 2956 | R: m_Shl(L: m_Value(), R: m_ConstantInt()))))) |
| 2957 | return true; |
| 2958 | |
| 2959 | // gep (gep %p, C1), %x, C2 is expanded so the two constants can |
| 2960 | // possibly be merged together. |
| 2961 | auto PtrOpGep = dyn_cast<GEPOperator>(Val: PtrOp); |
| 2962 | return PtrOpGep && PtrOpGep->hasAllConstantIndices() && |
| 2963 | any_of(Range: GEP.indices(), P: [](Value *V) { |
| 2964 | const APInt *C; |
| 2965 | return match(V, P: m_APInt(Res&: C)) && !C->isZero(); |
| 2966 | }); |
| 2967 | } |
| 2968 | |
| 2969 | static Instruction *foldGEPOfPhi(GetElementPtrInst &GEP, PHINode *PN, |
| 2970 | IRBuilderBase &Builder) { |
| 2971 | auto *Op1 = dyn_cast<GetElementPtrInst>(Val: PN->getOperand(i_nocapture: 0)); |
| 2972 | if (!Op1) |
| 2973 | return nullptr; |
| 2974 | |
| 2975 | // Don't fold a GEP into itself through a PHI node. This can only happen |
| 2976 | // through the back-edge of a loop. Folding a GEP into itself means that |
| 2977 | // the value of the previous iteration needs to be stored in the meantime, |
| 2978 | // thus requiring an additional register variable to be live, but not |
| 2979 | // actually achieving anything (the GEP still needs to be executed once per |
| 2980 | // loop iteration). |
| 2981 | if (Op1 == &GEP) |
| 2982 | return nullptr; |
| 2983 | GEPNoWrapFlags NW = Op1->getNoWrapFlags(); |
| 2984 | |
| 2985 | int DI = -1; |
| 2986 | |
| 2987 | for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { |
| 2988 | auto *Op2 = dyn_cast<GetElementPtrInst>(Val&: *I); |
| 2989 | if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() || |
| 2990 | Op1->getSourceElementType() != Op2->getSourceElementType()) |
| 2991 | return nullptr; |
| 2992 | |
| 2993 | // As for Op1 above, don't try to fold a GEP into itself. |
| 2994 | if (Op2 == &GEP) |
| 2995 | return nullptr; |
| 2996 | |
| 2997 | // Keep track of the type as we walk the GEP. |
| 2998 | Type *CurTy = nullptr; |
| 2999 | |
| 3000 | for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) { |
| 3001 | if (Op1->getOperand(i_nocapture: J)->getType() != Op2->getOperand(i_nocapture: J)->getType()) |
| 3002 | return nullptr; |
| 3003 | |
| 3004 | if (Op1->getOperand(i_nocapture: J) != Op2->getOperand(i_nocapture: J)) { |
| 3005 | if (DI == -1) { |
| 3006 | // We have not seen any differences yet in the GEPs feeding the |
| 3007 | // PHI yet, so we record this one if it is allowed to be a |
| 3008 | // variable. |
| 3009 | |
| 3010 | // The first two arguments can vary for any GEP, the rest have to be |
| 3011 | // static for struct slots |
| 3012 | if (J > 1) { |
| 3013 | assert(CurTy && "No current type?" ); |
| 3014 | if (CurTy->isStructTy()) |
| 3015 | return nullptr; |
| 3016 | } |
| 3017 | |
| 3018 | DI = J; |
| 3019 | } else { |
| 3020 | // The GEP is different by more than one input. While this could be |
| 3021 | // extended to support GEPs that vary by more than one variable it |
| 3022 | // doesn't make sense since it greatly increases the complexity and |
| 3023 | // would result in an R+R+R addressing mode which no backend |
| 3024 | // directly supports and would need to be broken into several |
| 3025 | // simpler instructions anyway. |
| 3026 | return nullptr; |
| 3027 | } |
| 3028 | } |
| 3029 | |
| 3030 | // Sink down a layer of the type for the next iteration. |
| 3031 | if (J > 0) { |
| 3032 | if (J == 1) { |
| 3033 | CurTy = Op1->getSourceElementType(); |
| 3034 | } else { |
| 3035 | CurTy = |
| 3036 | GetElementPtrInst::getTypeAtIndex(Ty: CurTy, Idx: Op1->getOperand(i_nocapture: J)); |
| 3037 | } |
| 3038 | } |
| 3039 | } |
| 3040 | |
| 3041 | NW &= Op2->getNoWrapFlags(); |
| 3042 | } |
| 3043 | |
| 3044 | // If not all GEPs are identical we'll have to create a new PHI node. |
| 3045 | // Check that the old PHI node has only one use so that it will get |
| 3046 | // removed. |
| 3047 | if (DI != -1 && !PN->hasOneUse()) |
| 3048 | return nullptr; |
| 3049 | |
| 3050 | auto *NewGEP = cast<GetElementPtrInst>(Val: Op1->clone()); |
| 3051 | NewGEP->setNoWrapFlags(NW); |
| 3052 | |
| 3053 | if (DI == -1) { |
| 3054 | // All the GEPs feeding the PHI are identical. Clone one down into our |
| 3055 | // BB so that it can be merged with the current GEP. |
| 3056 | } else { |
| 3057 | // All the GEPs feeding the PHI differ at a single offset. Clone a GEP |
| 3058 | // into the current block so it can be merged, and create a new PHI to |
| 3059 | // set that index. |
| 3060 | PHINode *NewPN; |
| 3061 | { |
| 3062 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 3063 | Builder.SetInsertPoint(PN); |
| 3064 | NewPN = Builder.CreatePHI(Ty: Op1->getOperand(i_nocapture: DI)->getType(), |
| 3065 | NumReservedValues: PN->getNumOperands()); |
| 3066 | } |
| 3067 | |
| 3068 | for (auto &I : PN->operands()) |
| 3069 | NewPN->addIncoming(V: cast<GEPOperator>(Val&: I)->getOperand(i_nocapture: DI), |
| 3070 | BB: PN->getIncomingBlock(U: I)); |
| 3071 | |
| 3072 | NewGEP->setOperand(i_nocapture: DI, Val_nocapture: NewPN); |
| 3073 | } |
| 3074 | |
| 3075 | NewGEP->insertBefore(BB&: *GEP.getParent(), InsertPos: GEP.getParent()->getFirstInsertionPt()); |
| 3076 | return NewGEP; |
| 3077 | } |
| 3078 | |
| 3079 | Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { |
| 3080 | Value *PtrOp = GEP.getOperand(i_nocapture: 0); |
| 3081 | SmallVector<Value *, 8> Indices(GEP.indices()); |
| 3082 | Type *GEPType = GEP.getType(); |
| 3083 | Type *GEPEltType = GEP.getSourceElementType(); |
| 3084 | if (Value *V = |
| 3085 | simplifyGEPInst(SrcTy: GEPEltType, Ptr: PtrOp, Indices, NW: GEP.getNoWrapFlags(), |
| 3086 | Q: SQ.getWithInstruction(I: &GEP))) |
| 3087 | return replaceInstUsesWith(I&: GEP, V); |
| 3088 | |
| 3089 | // For vector geps, use the generic demanded vector support. |
| 3090 | // Skip if GEP return type is scalable. The number of elements is unknown at |
| 3091 | // compile-time. |
| 3092 | if (auto *GEPFVTy = dyn_cast<FixedVectorType>(Val: GEPType)) { |
| 3093 | auto VWidth = GEPFVTy->getNumElements(); |
| 3094 | APInt PoisonElts(VWidth, 0); |
| 3095 | APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth)); |
| 3096 | if (Value *V = SimplifyDemandedVectorElts(V: &GEP, DemandedElts: AllOnesEltMask, |
| 3097 | PoisonElts)) { |
| 3098 | if (V != &GEP) |
| 3099 | return replaceInstUsesWith(I&: GEP, V); |
| 3100 | return &GEP; |
| 3101 | } |
| 3102 | } |
| 3103 | |
| 3104 | // Eliminate unneeded casts for indices, and replace indices which displace |
| 3105 | // by multiples of a zero size type with zero. |
| 3106 | bool MadeChange = false; |
| 3107 | |
| 3108 | // Index width may not be the same width as pointer width. |
| 3109 | // Data layout chooses the right type based on supported integer types. |
| 3110 | Type *NewScalarIndexTy = |
| 3111 | DL.getIndexType(PtrTy: GEP.getPointerOperandType()->getScalarType()); |
| 3112 | |
| 3113 | gep_type_iterator GTI = gep_type_begin(GEP); |
| 3114 | for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; |
| 3115 | ++I, ++GTI) { |
| 3116 | // Skip indices into struct types. |
| 3117 | if (GTI.isStruct()) |
| 3118 | continue; |
| 3119 | |
| 3120 | Type *IndexTy = (*I)->getType(); |
| 3121 | Type *NewIndexType = |
| 3122 | IndexTy->isVectorTy() |
| 3123 | ? VectorType::get(ElementType: NewScalarIndexTy, |
| 3124 | EC: cast<VectorType>(Val: IndexTy)->getElementCount()) |
| 3125 | : NewScalarIndexTy; |
| 3126 | |
| 3127 | // If the element type has zero size then any index over it is equivalent |
| 3128 | // to an index of zero, so replace it with zero if it is not zero already. |
| 3129 | Type *EltTy = GTI.getIndexedType(); |
| 3130 | if (EltTy->isSized() && DL.getTypeAllocSize(Ty: EltTy).isZero()) |
| 3131 | if (!isa<Constant>(Val: *I) || !match(V: I->get(), P: m_Zero())) { |
| 3132 | *I = Constant::getNullValue(Ty: NewIndexType); |
| 3133 | MadeChange = true; |
| 3134 | } |
| 3135 | |
| 3136 | if (IndexTy != NewIndexType) { |
| 3137 | // If we are using a wider index than needed for this platform, shrink |
| 3138 | // it to what we need. If narrower, sign-extend it to what we need. |
| 3139 | // This explicit cast can make subsequent optimizations more obvious. |
| 3140 | *I = Builder.CreateIntCast(V: *I, DestTy: NewIndexType, isSigned: true); |
| 3141 | MadeChange = true; |
| 3142 | } |
| 3143 | } |
| 3144 | if (MadeChange) |
| 3145 | return &GEP; |
| 3146 | |
| 3147 | // Canonicalize constant GEPs to i8 type. |
| 3148 | if (!GEPEltType->isIntegerTy(Bitwidth: 8) && GEP.hasAllConstantIndices()) { |
| 3149 | APInt Offset(DL.getIndexTypeSizeInBits(Ty: GEPType), 0); |
| 3150 | if (GEP.accumulateConstantOffset(DL, Offset)) |
| 3151 | return replaceInstUsesWith( |
| 3152 | I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset: Builder.getInt(AI: Offset), Name: "" , |
| 3153 | NW: GEP.getNoWrapFlags())); |
| 3154 | } |
| 3155 | |
| 3156 | if (shouldCanonicalizeGEPToPtrAdd(GEP)) { |
| 3157 | Value *Offset = EmitGEPOffset(GEP: cast<GEPOperator>(Val: &GEP)); |
| 3158 | Value *NewGEP = |
| 3159 | Builder.CreatePtrAdd(Ptr: PtrOp, Offset, Name: "" , NW: GEP.getNoWrapFlags()); |
| 3160 | return replaceInstUsesWith(I&: GEP, V: NewGEP); |
| 3161 | } |
| 3162 | |
| 3163 | // Scalarize vector operands; prefer splat-of-gep.as canonical form. |
| 3164 | // Note that this looses information about undef lanes; we run it after |
| 3165 | // demanded bits to partially mitigate that loss. |
| 3166 | if (GEPType->isVectorTy() && llvm::any_of(Range: GEP.operands(), P: [](Value *Op) { |
| 3167 | return Op->getType()->isVectorTy() && getSplatValue(V: Op); |
| 3168 | })) { |
| 3169 | SmallVector<Value *> NewOps; |
| 3170 | for (auto &Op : GEP.operands()) { |
| 3171 | if (Op->getType()->isVectorTy()) |
| 3172 | if (Value *Scalar = getSplatValue(V: Op)) { |
| 3173 | NewOps.push_back(Elt: Scalar); |
| 3174 | continue; |
| 3175 | } |
| 3176 | NewOps.push_back(Elt: Op); |
| 3177 | } |
| 3178 | |
| 3179 | Value *Res = Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewOps[0], |
| 3180 | IdxList: ArrayRef(NewOps).drop_front(), Name: GEP.getName(), |
| 3181 | NW: GEP.getNoWrapFlags()); |
| 3182 | if (!Res->getType()->isVectorTy()) { |
| 3183 | ElementCount EC = cast<VectorType>(Val: GEPType)->getElementCount(); |
| 3184 | Res = Builder.CreateVectorSplat(EC, V: Res); |
| 3185 | } |
| 3186 | return replaceInstUsesWith(I&: GEP, V: Res); |
| 3187 | } |
| 3188 | |
| 3189 | // Check to see if the inputs to the PHI node are getelementptr instructions. |
| 3190 | if (auto *PN = dyn_cast<PHINode>(Val: PtrOp)) { |
| 3191 | if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder)) |
| 3192 | return replaceOperand(I&: GEP, OpNum: 0, V: NewPtrOp); |
| 3193 | } |
| 3194 | |
| 3195 | if (auto *Src = dyn_cast<GEPOperator>(Val: PtrOp)) |
| 3196 | if (Instruction *I = visitGEPOfGEP(GEP, Src)) |
| 3197 | return I; |
| 3198 | |
| 3199 | if (GEP.getNumIndices() == 1) { |
| 3200 | unsigned AS = GEP.getPointerAddressSpace(); |
| 3201 | if (GEP.getOperand(i_nocapture: 1)->getType()->getScalarSizeInBits() == |
| 3202 | DL.getIndexSizeInBits(AS)) { |
| 3203 | uint64_t TyAllocSize = DL.getTypeAllocSize(Ty: GEPEltType).getFixedValue(); |
| 3204 | |
| 3205 | if (TyAllocSize == 1) { |
| 3206 | // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), |
| 3207 | // but only if the result pointer is only used as if it were an integer, |
| 3208 | // or both point to the same underlying object (otherwise provenance is |
| 3209 | // not necessarily retained). |
| 3210 | Value *X = GEP.getPointerOperand(); |
| 3211 | Value *Y; |
| 3212 | if (match(V: GEP.getOperand(i_nocapture: 1), |
| 3213 | P: m_Sub(L: m_PtrToInt(Op: m_Value(V&: Y)), R: m_PtrToInt(Op: m_Specific(V: X)))) && |
| 3214 | GEPType == Y->getType()) { |
| 3215 | bool HasSameUnderlyingObject = |
| 3216 | getUnderlyingObject(V: X) == getUnderlyingObject(V: Y); |
| 3217 | bool Changed = false; |
| 3218 | GEP.replaceUsesWithIf(New: Y, ShouldReplace: [&](Use &U) { |
| 3219 | bool ShouldReplace = HasSameUnderlyingObject || |
| 3220 | isa<ICmpInst>(Val: U.getUser()) || |
| 3221 | isa<PtrToIntInst>(Val: U.getUser()); |
| 3222 | Changed |= ShouldReplace; |
| 3223 | return ShouldReplace; |
| 3224 | }); |
| 3225 | return Changed ? &GEP : nullptr; |
| 3226 | } |
| 3227 | } else if (auto *ExactIns = |
| 3228 | dyn_cast<PossiblyExactOperator>(Val: GEP.getOperand(i_nocapture: 1))) { |
| 3229 | // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V) |
| 3230 | Value *V; |
| 3231 | if (ExactIns->isExact()) { |
| 3232 | if ((has_single_bit(Value: TyAllocSize) && |
| 3233 | match(V: GEP.getOperand(i_nocapture: 1), |
| 3234 | P: m_Shr(L: m_Value(V), |
| 3235 | R: m_SpecificInt(V: countr_zero(Val: TyAllocSize))))) || |
| 3236 | match(V: GEP.getOperand(i_nocapture: 1), |
| 3237 | P: m_IDiv(L: m_Value(V), R: m_SpecificInt(V: TyAllocSize)))) { |
| 3238 | return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(), |
| 3239 | Ptr: GEP.getPointerOperand(), IdxList: V, |
| 3240 | NW: GEP.getNoWrapFlags()); |
| 3241 | } |
| 3242 | } |
| 3243 | if (ExactIns->isExact() && ExactIns->hasOneUse()) { |
| 3244 | // Try to canonicalize non-i8 element type to i8 if the index is an |
| 3245 | // exact instruction. If the index is an exact instruction (div/shr) |
| 3246 | // with a constant RHS, we can fold the non-i8 element scale into the |
| 3247 | // div/shr (similiar to the mul case, just inverted). |
| 3248 | const APInt *C; |
| 3249 | std::optional<APInt> NewC; |
| 3250 | if (has_single_bit(Value: TyAllocSize) && |
| 3251 | match(V: ExactIns, P: m_Shr(L: m_Value(V), R: m_APInt(Res&: C))) && |
| 3252 | C->uge(RHS: countr_zero(Val: TyAllocSize))) |
| 3253 | NewC = *C - countr_zero(Val: TyAllocSize); |
| 3254 | else if (match(V: ExactIns, P: m_UDiv(L: m_Value(V), R: m_APInt(Res&: C)))) { |
| 3255 | APInt Quot; |
| 3256 | uint64_t Rem; |
| 3257 | APInt::udivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem); |
| 3258 | if (Rem == 0) |
| 3259 | NewC = Quot; |
| 3260 | } else if (match(V: ExactIns, P: m_SDiv(L: m_Value(V), R: m_APInt(Res&: C)))) { |
| 3261 | APInt Quot; |
| 3262 | int64_t Rem; |
| 3263 | APInt::sdivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem); |
| 3264 | // For sdiv we need to make sure we arent creating INT_MIN / -1. |
| 3265 | if (!Quot.isAllOnes() && Rem == 0) |
| 3266 | NewC = Quot; |
| 3267 | } |
| 3268 | |
| 3269 | if (NewC.has_value()) { |
| 3270 | Value *NewOp = Builder.CreateBinOp( |
| 3271 | Opc: static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), LHS: V, |
| 3272 | RHS: ConstantInt::get(Ty: V->getType(), V: *NewC)); |
| 3273 | cast<BinaryOperator>(Val: NewOp)->setIsExact(); |
| 3274 | return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(), |
| 3275 | Ptr: GEP.getPointerOperand(), IdxList: NewOp, |
| 3276 | NW: GEP.getNoWrapFlags()); |
| 3277 | } |
| 3278 | } |
| 3279 | } |
| 3280 | } |
| 3281 | } |
| 3282 | // We do not handle pointer-vector geps here. |
| 3283 | if (GEPType->isVectorTy()) |
| 3284 | return nullptr; |
| 3285 | |
| 3286 | if (!GEP.isInBounds()) { |
| 3287 | unsigned IdxWidth = |
| 3288 | DL.getIndexSizeInBits(AS: PtrOp->getType()->getPointerAddressSpace()); |
| 3289 | APInt BasePtrOffset(IdxWidth, 0); |
| 3290 | Value *UnderlyingPtrOp = |
| 3291 | PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, Offset&: BasePtrOffset); |
| 3292 | bool CanBeNull, CanBeFreed; |
| 3293 | uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes( |
| 3294 | DL, CanBeNull, CanBeFreed); |
| 3295 | if (!CanBeNull && !CanBeFreed && DerefBytes != 0) { |
| 3296 | if (GEP.accumulateConstantOffset(DL, Offset&: BasePtrOffset) && |
| 3297 | BasePtrOffset.isNonNegative()) { |
| 3298 | APInt AllocSize(IdxWidth, DerefBytes); |
| 3299 | if (BasePtrOffset.ule(RHS: AllocSize)) { |
| 3300 | return GetElementPtrInst::CreateInBounds( |
| 3301 | PointeeType: GEP.getSourceElementType(), Ptr: PtrOp, IdxList: Indices, NameStr: GEP.getName()); |
| 3302 | } |
| 3303 | } |
| 3304 | } |
| 3305 | } |
| 3306 | |
| 3307 | // nusw + nneg -> nuw |
| 3308 | if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() && |
| 3309 | all_of(Range: GEP.indices(), P: [&](Value *Idx) { |
| 3310 | return isKnownNonNegative(V: Idx, SQ: SQ.getWithInstruction(I: &GEP)); |
| 3311 | })) { |
| 3312 | GEP.setNoWrapFlags(GEP.getNoWrapFlags() | GEPNoWrapFlags::noUnsignedWrap()); |
| 3313 | return &GEP; |
| 3314 | } |
| 3315 | |
| 3316 | // These rewrites are trying to preserve inbounds/nuw attributes. So we want |
| 3317 | // to do this after having tried to derive "nuw" above. |
| 3318 | if (GEP.getNumIndices() == 1) { |
| 3319 | // Given (gep p, x+y) we want to determine the common nowrap flags for both |
| 3320 | // geps if transforming into (gep (gep p, x), y). |
| 3321 | auto GetPreservedNoWrapFlags = [&](bool AddIsNUW) { |
| 3322 | // We can preserve both "inbounds nuw", "nusw nuw" and "nuw" if we know |
| 3323 | // that x + y does not have unsigned wrap. |
| 3324 | if (GEP.hasNoUnsignedWrap() && AddIsNUW) |
| 3325 | return GEP.getNoWrapFlags(); |
| 3326 | return GEPNoWrapFlags::none(); |
| 3327 | }; |
| 3328 | |
| 3329 | // Try to replace ADD + GEP with GEP + GEP. |
| 3330 | Value *Idx1, *Idx2; |
| 3331 | if (match(V: GEP.getOperand(i_nocapture: 1), |
| 3332 | P: m_OneUse(SubPattern: m_AddLike(L: m_Value(V&: Idx1), R: m_Value(V&: Idx2))))) { |
| 3333 | // %idx = add i64 %idx1, %idx2 |
| 3334 | // %gep = getelementptr i32, ptr %ptr, i64 %idx |
| 3335 | // as: |
| 3336 | // %newptr = getelementptr i32, ptr %ptr, i64 %idx1 |
| 3337 | // %newgep = getelementptr i32, ptr %newptr, i64 %idx2 |
| 3338 | bool NUW = match(V: GEP.getOperand(i_nocapture: 1), P: m_NUWAddLike(L: m_Value(), R: m_Value())); |
| 3339 | GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW); |
| 3340 | auto *NewPtr = |
| 3341 | Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(), |
| 3342 | IdxList: Idx1, Name: "" , NW: NWFlags); |
| 3343 | return replaceInstUsesWith(I&: GEP, |
| 3344 | V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), |
| 3345 | Ptr: NewPtr, IdxList: Idx2, Name: "" , NW: NWFlags)); |
| 3346 | } |
| 3347 | ConstantInt *C; |
| 3348 | if (match(V: GEP.getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_SExtLike(Op: m_OneUse(SubPattern: m_NSWAddLike( |
| 3349 | L: m_Value(V&: Idx1), R: m_ConstantInt(CI&: C))))))) { |
| 3350 | // %add = add nsw i32 %idx1, idx2 |
| 3351 | // %sidx = sext i32 %add to i64 |
| 3352 | // %gep = getelementptr i32, ptr %ptr, i64 %sidx |
| 3353 | // as: |
| 3354 | // %newptr = getelementptr i32, ptr %ptr, i32 %idx1 |
| 3355 | // %newgep = getelementptr i32, ptr %newptr, i32 idx2 |
| 3356 | bool NUW = match(V: GEP.getOperand(i_nocapture: 1), |
| 3357 | P: m_NNegZExt(Op: m_NUWAddLike(L: m_Value(), R: m_Value()))); |
| 3358 | GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW); |
| 3359 | auto *NewPtr = Builder.CreateGEP( |
| 3360 | Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(), |
| 3361 | IdxList: Builder.CreateSExt(V: Idx1, DestTy: GEP.getOperand(i_nocapture: 1)->getType()), Name: "" , NW: NWFlags); |
| 3362 | return replaceInstUsesWith( |
| 3363 | I&: GEP, |
| 3364 | V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewPtr, |
| 3365 | IdxList: Builder.CreateSExt(V: C, DestTy: GEP.getOperand(i_nocapture: 1)->getType()), |
| 3366 | Name: "" , NW: NWFlags)); |
| 3367 | } |
| 3368 | } |
| 3369 | |
| 3370 | if (Instruction *R = foldSelectGEP(GEP, Builder)) |
| 3371 | return R; |
| 3372 | |
| 3373 | return nullptr; |
| 3374 | } |
| 3375 | |
| 3376 | static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, |
| 3377 | Instruction *AI) { |
| 3378 | if (isa<ConstantPointerNull>(Val: V)) |
| 3379 | return true; |
| 3380 | if (auto *LI = dyn_cast<LoadInst>(Val: V)) |
| 3381 | return isa<GlobalVariable>(Val: LI->getPointerOperand()); |
| 3382 | // Two distinct allocations will never be equal. |
| 3383 | return isAllocLikeFn(V, TLI: &TLI) && V != AI; |
| 3384 | } |
| 3385 | |
| 3386 | /// Given a call CB which uses an address UsedV, return true if we can prove the |
| 3387 | /// call's only possible effect is storing to V. |
| 3388 | static bool isRemovableWrite(CallBase &CB, Value *UsedV, |
| 3389 | const TargetLibraryInfo &TLI) { |
| 3390 | if (!CB.use_empty()) |
| 3391 | // TODO: add recursion if returned attribute is present |
| 3392 | return false; |
| 3393 | |
| 3394 | if (CB.isTerminator()) |
| 3395 | // TODO: remove implementation restriction |
| 3396 | return false; |
| 3397 | |
| 3398 | if (!CB.willReturn() || !CB.doesNotThrow()) |
| 3399 | return false; |
| 3400 | |
| 3401 | // If the only possible side effect of the call is writing to the alloca, |
| 3402 | // and the result isn't used, we can safely remove any reads implied by the |
| 3403 | // call including those which might read the alloca itself. |
| 3404 | std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: &CB, TLI); |
| 3405 | return Dest && Dest->Ptr == UsedV; |
| 3406 | } |
| 3407 | |
| 3408 | static std::optional<ModRefInfo> |
| 3409 | isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakTrackingVH> &Users, |
| 3410 | const TargetLibraryInfo &TLI, bool KnowInit) { |
| 3411 | SmallVector<Instruction*, 4> Worklist; |
| 3412 | const std::optional<StringRef> Family = getAllocationFamily(I: AI, TLI: &TLI); |
| 3413 | Worklist.push_back(Elt: AI); |
| 3414 | ModRefInfo Access = KnowInit ? ModRefInfo::NoModRef : ModRefInfo::Mod; |
| 3415 | |
| 3416 | do { |
| 3417 | Instruction *PI = Worklist.pop_back_val(); |
| 3418 | for (User *U : PI->users()) { |
| 3419 | Instruction *I = cast<Instruction>(Val: U); |
| 3420 | switch (I->getOpcode()) { |
| 3421 | default: |
| 3422 | // Give up the moment we see something we can't handle. |
| 3423 | return std::nullopt; |
| 3424 | |
| 3425 | case Instruction::AddrSpaceCast: |
| 3426 | case Instruction::BitCast: |
| 3427 | case Instruction::GetElementPtr: |
| 3428 | Users.emplace_back(Args&: I); |
| 3429 | Worklist.push_back(Elt: I); |
| 3430 | continue; |
| 3431 | |
| 3432 | case Instruction::ICmp: { |
| 3433 | ICmpInst *ICI = cast<ICmpInst>(Val: I); |
| 3434 | // We can fold eq/ne comparisons with null to false/true, respectively. |
| 3435 | // We also fold comparisons in some conditions provided the alloc has |
| 3436 | // not escaped (see isNeverEqualToUnescapedAlloc). |
| 3437 | if (!ICI->isEquality()) |
| 3438 | return std::nullopt; |
| 3439 | unsigned OtherIndex = (ICI->getOperand(i_nocapture: 0) == PI) ? 1 : 0; |
| 3440 | if (!isNeverEqualToUnescapedAlloc(V: ICI->getOperand(i_nocapture: OtherIndex), TLI, AI)) |
| 3441 | return std::nullopt; |
| 3442 | |
| 3443 | // Do not fold compares to aligned_alloc calls, as they may have to |
| 3444 | // return null in case the required alignment cannot be satisfied, |
| 3445 | // unless we can prove that both alignment and size are valid. |
| 3446 | auto AlignmentAndSizeKnownValid = [](CallBase *CB) { |
| 3447 | // Check if alignment and size of a call to aligned_alloc is valid, |
| 3448 | // that is alignment is a power-of-2 and the size is a multiple of the |
| 3449 | // alignment. |
| 3450 | const APInt *Alignment; |
| 3451 | const APInt *Size; |
| 3452 | return match(V: CB->getArgOperand(i: 0), P: m_APInt(Res&: Alignment)) && |
| 3453 | match(V: CB->getArgOperand(i: 1), P: m_APInt(Res&: Size)) && |
| 3454 | Alignment->isPowerOf2() && Size->urem(RHS: *Alignment).isZero(); |
| 3455 | }; |
| 3456 | auto *CB = dyn_cast<CallBase>(Val: AI); |
| 3457 | LibFunc TheLibFunc; |
| 3458 | if (CB && TLI.getLibFunc(FDecl: *CB->getCalledFunction(), F&: TheLibFunc) && |
| 3459 | TLI.has(F: TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc && |
| 3460 | !AlignmentAndSizeKnownValid(CB)) |
| 3461 | return std::nullopt; |
| 3462 | Users.emplace_back(Args&: I); |
| 3463 | continue; |
| 3464 | } |
| 3465 | |
| 3466 | case Instruction::Call: |
| 3467 | // Ignore no-op and store intrinsics. |
| 3468 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
| 3469 | switch (II->getIntrinsicID()) { |
| 3470 | default: |
| 3471 | return std::nullopt; |
| 3472 | |
| 3473 | case Intrinsic::memmove: |
| 3474 | case Intrinsic::memcpy: |
| 3475 | case Intrinsic::memset: { |
| 3476 | MemIntrinsic *MI = cast<MemIntrinsic>(Val: II); |
| 3477 | if (MI->isVolatile()) |
| 3478 | return std::nullopt; |
| 3479 | // Note: this could also be ModRef, but we can still interpret that |
| 3480 | // as just Mod in that case. |
| 3481 | ModRefInfo NewAccess = |
| 3482 | MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref; |
| 3483 | if ((Access & ~NewAccess) != ModRefInfo::NoModRef) |
| 3484 | return std::nullopt; |
| 3485 | Access |= NewAccess; |
| 3486 | [[fallthrough]]; |
| 3487 | } |
| 3488 | case Intrinsic::assume: |
| 3489 | case Intrinsic::invariant_start: |
| 3490 | case Intrinsic::invariant_end: |
| 3491 | case Intrinsic::lifetime_start: |
| 3492 | case Intrinsic::lifetime_end: |
| 3493 | case Intrinsic::objectsize: |
| 3494 | Users.emplace_back(Args&: I); |
| 3495 | continue; |
| 3496 | case Intrinsic::launder_invariant_group: |
| 3497 | case Intrinsic::strip_invariant_group: |
| 3498 | Users.emplace_back(Args&: I); |
| 3499 | Worklist.push_back(Elt: I); |
| 3500 | continue; |
| 3501 | } |
| 3502 | } |
| 3503 | |
| 3504 | if (Family && getFreedOperand(CB: cast<CallBase>(Val: I), TLI: &TLI) == PI && |
| 3505 | getAllocationFamily(I, TLI: &TLI) == Family) { |
| 3506 | Users.emplace_back(Args&: I); |
| 3507 | continue; |
| 3508 | } |
| 3509 | |
| 3510 | if (Family && getReallocatedOperand(CB: cast<CallBase>(Val: I)) == PI && |
| 3511 | getAllocationFamily(I, TLI: &TLI) == Family) { |
| 3512 | Users.emplace_back(Args&: I); |
| 3513 | Worklist.push_back(Elt: I); |
| 3514 | continue; |
| 3515 | } |
| 3516 | |
| 3517 | if (!isRefSet(MRI: Access) && |
| 3518 | isRemovableWrite(CB&: *cast<CallBase>(Val: I), UsedV: PI, TLI)) { |
| 3519 | Access |= ModRefInfo::Mod; |
| 3520 | Users.emplace_back(Args&: I); |
| 3521 | continue; |
| 3522 | } |
| 3523 | |
| 3524 | return std::nullopt; |
| 3525 | |
| 3526 | case Instruction::Store: { |
| 3527 | StoreInst *SI = cast<StoreInst>(Val: I); |
| 3528 | if (SI->isVolatile() || SI->getPointerOperand() != PI) |
| 3529 | return std::nullopt; |
| 3530 | if (isRefSet(MRI: Access)) |
| 3531 | return std::nullopt; |
| 3532 | Access |= ModRefInfo::Mod; |
| 3533 | Users.emplace_back(Args&: I); |
| 3534 | continue; |
| 3535 | } |
| 3536 | |
| 3537 | case Instruction::Load: { |
| 3538 | LoadInst *LI = cast<LoadInst>(Val: I); |
| 3539 | if (LI->isVolatile() || LI->getPointerOperand() != PI) |
| 3540 | return std::nullopt; |
| 3541 | if (isModSet(MRI: Access)) |
| 3542 | return std::nullopt; |
| 3543 | Access |= ModRefInfo::Ref; |
| 3544 | Users.emplace_back(Args&: I); |
| 3545 | continue; |
| 3546 | } |
| 3547 | } |
| 3548 | llvm_unreachable("missing a return?" ); |
| 3549 | } |
| 3550 | } while (!Worklist.empty()); |
| 3551 | |
| 3552 | assert(Access != ModRefInfo::ModRef); |
| 3553 | return Access; |
| 3554 | } |
| 3555 | |
| 3556 | Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { |
| 3557 | assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI)); |
| 3558 | |
| 3559 | // If we have a malloc call which is only used in any amount of comparisons to |
| 3560 | // null and free calls, delete the calls and replace the comparisons with true |
| 3561 | // or false as appropriate. |
| 3562 | |
| 3563 | // This is based on the principle that we can substitute our own allocation |
| 3564 | // function (which will never return null) rather than knowledge of the |
| 3565 | // specific function being called. In some sense this can change the permitted |
| 3566 | // outputs of a program (when we convert a malloc to an alloca, the fact that |
| 3567 | // the allocation is now on the stack is potentially visible, for example), |
| 3568 | // but we believe in a permissible manner. |
| 3569 | SmallVector<WeakTrackingVH, 64> Users; |
| 3570 | |
| 3571 | // If we are removing an alloca with a dbg.declare, insert dbg.value calls |
| 3572 | // before each store. |
| 3573 | SmallVector<DbgVariableIntrinsic *, 8> DVIs; |
| 3574 | SmallVector<DbgVariableRecord *, 8> DVRs; |
| 3575 | std::unique_ptr<DIBuilder> DIB; |
| 3576 | if (isa<AllocaInst>(Val: MI)) { |
| 3577 | findDbgUsers(DbgInsts&: DVIs, V: &MI, DbgVariableRecords: &DVRs); |
| 3578 | DIB.reset(p: new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false)); |
| 3579 | } |
| 3580 | |
| 3581 | // Determine what getInitialValueOfAllocation would return without actually |
| 3582 | // allocating the result. |
| 3583 | bool KnowInitUndef = false; |
| 3584 | bool KnowInitZero = false; |
| 3585 | Constant *Init = |
| 3586 | getInitialValueOfAllocation(V: &MI, TLI: &TLI, Ty: Type::getInt8Ty(C&: MI.getContext())); |
| 3587 | if (Init) { |
| 3588 | if (isa<UndefValue>(Val: Init)) |
| 3589 | KnowInitUndef = true; |
| 3590 | else if (Init->isNullValue()) |
| 3591 | KnowInitZero = true; |
| 3592 | } |
| 3593 | // The various sanitizers don't actually return undef memory, but rather |
| 3594 | // memory initialized with special forms of runtime poison |
| 3595 | auto &F = *MI.getFunction(); |
| 3596 | if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) || |
| 3597 | F.hasFnAttribute(Kind: Attribute::SanitizeAddress)) |
| 3598 | KnowInitUndef = false; |
| 3599 | |
| 3600 | auto Removable = |
| 3601 | isAllocSiteRemovable(AI: &MI, Users, TLI, KnowInit: KnowInitZero | KnowInitUndef); |
| 3602 | if (Removable) { |
| 3603 | for (WeakTrackingVH &User : Users) { |
| 3604 | // Lowering all @llvm.objectsize and MTI calls first because they may use |
| 3605 | // a bitcast/GEP of the alloca we are removing. |
| 3606 | if (!User) |
| 3607 | continue; |
| 3608 | |
| 3609 | Instruction *I = cast<Instruction>(Val: &*User); |
| 3610 | |
| 3611 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
| 3612 | if (II->getIntrinsicID() == Intrinsic::objectsize) { |
| 3613 | SmallVector<Instruction *> InsertedInstructions; |
| 3614 | Value *Result = lowerObjectSizeCall( |
| 3615 | ObjectSize: II, DL, TLI: &TLI, AA, /*MustSucceed=*/true, InsertedInstructions: &InsertedInstructions); |
| 3616 | for (Instruction *Inserted : InsertedInstructions) |
| 3617 | Worklist.add(I: Inserted); |
| 3618 | replaceInstUsesWith(I&: *I, V: Result); |
| 3619 | eraseInstFromFunction(I&: *I); |
| 3620 | User = nullptr; // Skip examining in the next loop. |
| 3621 | continue; |
| 3622 | } |
| 3623 | if (auto *MTI = dyn_cast<MemTransferInst>(Val: I)) { |
| 3624 | if (KnowInitZero && isRefSet(MRI: *Removable)) { |
| 3625 | IRBuilderBase::InsertPointGuard Guard(Builder); |
| 3626 | Builder.SetInsertPoint(MTI); |
| 3627 | auto *M = Builder.CreateMemSet( |
| 3628 | Ptr: MTI->getRawDest(), |
| 3629 | Val: ConstantInt::get(Ty: Type::getInt8Ty(C&: MI.getContext()), V: 0), |
| 3630 | Size: MTI->getLength(), Align: MTI->getDestAlign()); |
| 3631 | M->copyMetadata(SrcInst: *MTI); |
| 3632 | } |
| 3633 | } |
| 3634 | } |
| 3635 | } |
| 3636 | for (WeakTrackingVH &User : Users) { |
| 3637 | if (!User) |
| 3638 | continue; |
| 3639 | |
| 3640 | Instruction *I = cast<Instruction>(Val: &*User); |
| 3641 | |
| 3642 | if (ICmpInst *C = dyn_cast<ICmpInst>(Val: I)) { |
| 3643 | replaceInstUsesWith(I&: *C, |
| 3644 | V: ConstantInt::get(Ty: Type::getInt1Ty(C&: C->getContext()), |
| 3645 | V: C->isFalseWhenEqual())); |
| 3646 | } else if (auto *SI = dyn_cast<StoreInst>(Val: I)) { |
| 3647 | for (auto *DVI : DVIs) |
| 3648 | if (DVI->isAddressOfVariable()) |
| 3649 | ConvertDebugDeclareToDebugValue(DII: DVI, SI, Builder&: *DIB); |
| 3650 | for (auto *DVR : DVRs) |
| 3651 | if (DVR->isAddressOfVariable()) |
| 3652 | ConvertDebugDeclareToDebugValue(DVR, SI, Builder&: *DIB); |
| 3653 | } else { |
| 3654 | // Casts, GEP, or anything else: we're about to delete this instruction, |
| 3655 | // so it can not have any valid uses. |
| 3656 | Constant *Replace; |
| 3657 | if (isa<LoadInst>(Val: I)) { |
| 3658 | assert(KnowInitZero || KnowInitUndef); |
| 3659 | Replace = KnowInitUndef ? UndefValue::get(T: I->getType()) |
| 3660 | : Constant::getNullValue(Ty: I->getType()); |
| 3661 | } else |
| 3662 | Replace = PoisonValue::get(T: I->getType()); |
| 3663 | replaceInstUsesWith(I&: *I, V: Replace); |
| 3664 | } |
| 3665 | eraseInstFromFunction(I&: *I); |
| 3666 | } |
| 3667 | |
| 3668 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &MI)) { |
| 3669 | // Replace invoke with a NOP intrinsic to maintain the original CFG |
| 3670 | Module *M = II->getModule(); |
| 3671 | Function *F = Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::donothing); |
| 3672 | auto *NewII = InvokeInst::Create( |
| 3673 | Func: F, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(), Args: {}, NameStr: "" , InsertBefore: II->getParent()); |
| 3674 | NewII->setDebugLoc(II->getDebugLoc()); |
| 3675 | } |
| 3676 | |
| 3677 | // Remove debug intrinsics which describe the value contained within the |
| 3678 | // alloca. In addition to removing dbg.{declare,addr} which simply point to |
| 3679 | // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.: |
| 3680 | // |
| 3681 | // ``` |
| 3682 | // define void @foo(i32 %0) { |
| 3683 | // %a = alloca i32 ; Deleted. |
| 3684 | // store i32 %0, i32* %a |
| 3685 | // dbg.value(i32 %0, "arg0") ; Not deleted. |
| 3686 | // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted. |
| 3687 | // call void @trivially_inlinable_no_op(i32* %a) |
| 3688 | // ret void |
| 3689 | // } |
| 3690 | // ``` |
| 3691 | // |
| 3692 | // This may not be required if we stop describing the contents of allocas |
| 3693 | // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in |
| 3694 | // the LowerDbgDeclare utility. |
| 3695 | // |
| 3696 | // If there is a dead store to `%a` in @trivially_inlinable_no_op, the |
| 3697 | // "arg0" dbg.value may be stale after the call. However, failing to remove |
| 3698 | // the DW_OP_deref dbg.value causes large gaps in location coverage. |
| 3699 | // |
| 3700 | // FIXME: the Assignment Tracking project has now likely made this |
| 3701 | // redundant (and it's sometimes harmful). |
| 3702 | for (auto *DVI : DVIs) |
| 3703 | if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref()) |
| 3704 | DVI->eraseFromParent(); |
| 3705 | for (auto *DVR : DVRs) |
| 3706 | if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref()) |
| 3707 | DVR->eraseFromParent(); |
| 3708 | |
| 3709 | return eraseInstFromFunction(I&: MI); |
| 3710 | } |
| 3711 | return nullptr; |
| 3712 | } |
| 3713 | |
| 3714 | /// Move the call to free before a NULL test. |
| 3715 | /// |
| 3716 | /// Check if this free is accessed after its argument has been test |
| 3717 | /// against NULL (property 0). |
| 3718 | /// If yes, it is legal to move this call in its predecessor block. |
| 3719 | /// |
| 3720 | /// The move is performed only if the block containing the call to free |
| 3721 | /// will be removed, i.e.: |
| 3722 | /// 1. it has only one predecessor P, and P has two successors |
| 3723 | /// 2. it contains the call, noops, and an unconditional branch |
| 3724 | /// 3. its successor is the same as its predecessor's successor |
| 3725 | /// |
| 3726 | /// The profitability is out-of concern here and this function should |
| 3727 | /// be called only if the caller knows this transformation would be |
| 3728 | /// profitable (e.g., for code size). |
| 3729 | static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI, |
| 3730 | const DataLayout &DL) { |
| 3731 | Value *Op = FI.getArgOperand(i: 0); |
| 3732 | BasicBlock *FreeInstrBB = FI.getParent(); |
| 3733 | BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor(); |
| 3734 | |
| 3735 | // Validate part of constraint #1: Only one predecessor |
| 3736 | // FIXME: We can extend the number of predecessor, but in that case, we |
| 3737 | // would duplicate the call to free in each predecessor and it may |
| 3738 | // not be profitable even for code size. |
| 3739 | if (!PredBB) |
| 3740 | return nullptr; |
| 3741 | |
| 3742 | // Validate constraint #2: Does this block contains only the call to |
| 3743 | // free, noops, and an unconditional branch? |
| 3744 | BasicBlock *SuccBB; |
| 3745 | Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator(); |
| 3746 | if (!match(V: FreeInstrBBTerminator, P: m_UnconditionalBr(Succ&: SuccBB))) |
| 3747 | return nullptr; |
| 3748 | |
| 3749 | // If there are only 2 instructions in the block, at this point, |
| 3750 | // this is the call to free and unconditional. |
| 3751 | // If there are more than 2 instructions, check that they are noops |
| 3752 | // i.e., they won't hurt the performance of the generated code. |
| 3753 | if (FreeInstrBB->size() != 2) { |
| 3754 | for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) { |
| 3755 | if (&Inst == &FI || &Inst == FreeInstrBBTerminator) |
| 3756 | continue; |
| 3757 | auto *Cast = dyn_cast<CastInst>(Val: &Inst); |
| 3758 | if (!Cast || !Cast->isNoopCast(DL)) |
| 3759 | return nullptr; |
| 3760 | } |
| 3761 | } |
| 3762 | // Validate the rest of constraint #1 by matching on the pred branch. |
| 3763 | Instruction *TI = PredBB->getTerminator(); |
| 3764 | BasicBlock *TrueBB, *FalseBB; |
| 3765 | CmpPredicate Pred; |
| 3766 | if (!match(V: TI, P: m_Br(C: m_ICmp(Pred, |
| 3767 | L: m_CombineOr(L: m_Specific(V: Op), |
| 3768 | R: m_Specific(V: Op->stripPointerCasts())), |
| 3769 | R: m_Zero()), |
| 3770 | T&: TrueBB, F&: FalseBB))) |
| 3771 | return nullptr; |
| 3772 | if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE) |
| 3773 | return nullptr; |
| 3774 | |
| 3775 | // Validate constraint #3: Ensure the null case just falls through. |
| 3776 | if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB)) |
| 3777 | return nullptr; |
| 3778 | assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) && |
| 3779 | "Broken CFG: missing edge from predecessor to successor" ); |
| 3780 | |
| 3781 | // At this point, we know that everything in FreeInstrBB can be moved |
| 3782 | // before TI. |
| 3783 | for (Instruction &Instr : llvm::make_early_inc_range(Range&: *FreeInstrBB)) { |
| 3784 | if (&Instr == FreeInstrBBTerminator) |
| 3785 | break; |
| 3786 | Instr.moveBeforePreserving(MovePos: TI->getIterator()); |
| 3787 | } |
| 3788 | assert(FreeInstrBB->size() == 1 && |
| 3789 | "Only the branch instruction should remain" ); |
| 3790 | |
| 3791 | // Now that we've moved the call to free before the NULL check, we have to |
| 3792 | // remove any attributes on its parameter that imply it's non-null, because |
| 3793 | // those attributes might have only been valid because of the NULL check, and |
| 3794 | // we can get miscompiles if we keep them. This is conservative if non-null is |
| 3795 | // also implied by something other than the NULL check, but it's guaranteed to |
| 3796 | // be correct, and the conservativeness won't matter in practice, since the |
| 3797 | // attributes are irrelevant for the call to free itself and the pointer |
| 3798 | // shouldn't be used after the call. |
| 3799 | AttributeList Attrs = FI.getAttributes(); |
| 3800 | Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0, Kind: Attribute::NonNull); |
| 3801 | Attribute Dereferenceable = Attrs.getParamAttr(ArgNo: 0, Kind: Attribute::Dereferenceable); |
| 3802 | if (Dereferenceable.isValid()) { |
| 3803 | uint64_t Bytes = Dereferenceable.getDereferenceableBytes(); |
| 3804 | Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0, |
| 3805 | Kind: Attribute::Dereferenceable); |
| 3806 | Attrs = Attrs.addDereferenceableOrNullParamAttr(C&: FI.getContext(), ArgNo: 0, Bytes); |
| 3807 | } |
| 3808 | FI.setAttributes(Attrs); |
| 3809 | |
| 3810 | return &FI; |
| 3811 | } |
| 3812 | |
| 3813 | Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) { |
| 3814 | // free undef -> unreachable. |
| 3815 | if (isa<UndefValue>(Val: Op)) { |
| 3816 | // Leave a marker since we can't modify the CFG here. |
| 3817 | CreateNonTerminatorUnreachable(InsertAt: &FI); |
| 3818 | return eraseInstFromFunction(I&: FI); |
| 3819 | } |
| 3820 | |
| 3821 | // If we have 'free null' delete the instruction. This can happen in stl code |
| 3822 | // when lots of inlining happens. |
| 3823 | if (isa<ConstantPointerNull>(Val: Op)) |
| 3824 | return eraseInstFromFunction(I&: FI); |
| 3825 | |
| 3826 | // If we had free(realloc(...)) with no intervening uses, then eliminate the |
| 3827 | // realloc() entirely. |
| 3828 | CallInst *CI = dyn_cast<CallInst>(Val: Op); |
| 3829 | if (CI && CI->hasOneUse()) |
| 3830 | if (Value *ReallocatedOp = getReallocatedOperand(CB: CI)) |
| 3831 | return eraseInstFromFunction(I&: *replaceInstUsesWith(I&: *CI, V: ReallocatedOp)); |
| 3832 | |
| 3833 | // If we optimize for code size, try to move the call to free before the null |
| 3834 | // test so that simplify cfg can remove the empty block and dead code |
| 3835 | // elimination the branch. I.e., helps to turn something like: |
| 3836 | // if (foo) free(foo); |
| 3837 | // into |
| 3838 | // free(foo); |
| 3839 | // |
| 3840 | // Note that we can only do this for 'free' and not for any flavor of |
| 3841 | // 'operator delete'; there is no 'operator delete' symbol for which we are |
| 3842 | // permitted to invent a call, even if we're passing in a null pointer. |
| 3843 | if (MinimizeSize) { |
| 3844 | LibFunc Func; |
| 3845 | if (TLI.getLibFunc(CB: FI, F&: Func) && TLI.has(F: Func) && Func == LibFunc_free) |
| 3846 | if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL)) |
| 3847 | return I; |
| 3848 | } |
| 3849 | |
| 3850 | return nullptr; |
| 3851 | } |
| 3852 | |
| 3853 | Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) { |
| 3854 | Value *RetVal = RI.getReturnValue(); |
| 3855 | if (!RetVal) |
| 3856 | return nullptr; |
| 3857 | |
| 3858 | Function *F = RI.getFunction(); |
| 3859 | Type *RetTy = RetVal->getType(); |
| 3860 | if (RetTy->isPointerTy()) { |
| 3861 | bool HasDereferenceable = |
| 3862 | F->getAttributes().getRetDereferenceableBytes() > 0; |
| 3863 | if (F->hasRetAttribute(Kind: Attribute::NonNull) || |
| 3864 | (HasDereferenceable && |
| 3865 | !NullPointerIsDefined(F, AS: RetTy->getPointerAddressSpace()))) { |
| 3866 | if (Value *V = simplifyNonNullOperand(V: RetVal, HasDereferenceable)) |
| 3867 | return replaceOperand(I&: RI, OpNum: 0, V); |
| 3868 | } |
| 3869 | } |
| 3870 | |
| 3871 | if (!AttributeFuncs::isNoFPClassCompatibleType(Ty: RetTy)) |
| 3872 | return nullptr; |
| 3873 | |
| 3874 | FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass(); |
| 3875 | if (ReturnClass == fcNone) |
| 3876 | return nullptr; |
| 3877 | |
| 3878 | KnownFPClass KnownClass; |
| 3879 | Value *Simplified = |
| 3880 | SimplifyDemandedUseFPClass(V: RetVal, DemandedMask: ~ReturnClass, Known&: KnownClass, CxtI: &RI); |
| 3881 | if (!Simplified) |
| 3882 | return nullptr; |
| 3883 | |
| 3884 | return ReturnInst::Create(C&: RI.getContext(), retVal: Simplified); |
| 3885 | } |
| 3886 | |
| 3887 | // WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()! |
| 3888 | bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) { |
| 3889 | // Try to remove the previous instruction if it must lead to unreachable. |
| 3890 | // This includes instructions like stores and "llvm.assume" that may not get |
| 3891 | // removed by simple dead code elimination. |
| 3892 | bool Changed = false; |
| 3893 | while (Instruction *Prev = I.getPrevNonDebugInstruction()) { |
| 3894 | // While we theoretically can erase EH, that would result in a block that |
| 3895 | // used to start with an EH no longer starting with EH, which is invalid. |
| 3896 | // To make it valid, we'd need to fixup predecessors to no longer refer to |
| 3897 | // this block, but that changes CFG, which is not allowed in InstCombine. |
| 3898 | if (Prev->isEHPad()) |
| 3899 | break; // Can not drop any more instructions. We're done here. |
| 3900 | |
| 3901 | if (!isGuaranteedToTransferExecutionToSuccessor(I: Prev)) |
| 3902 | break; // Can not drop any more instructions. We're done here. |
| 3903 | // Otherwise, this instruction can be freely erased, |
| 3904 | // even if it is not side-effect free. |
| 3905 | |
| 3906 | // A value may still have uses before we process it here (for example, in |
| 3907 | // another unreachable block), so convert those to poison. |
| 3908 | replaceInstUsesWith(I&: *Prev, V: PoisonValue::get(T: Prev->getType())); |
| 3909 | eraseInstFromFunction(I&: *Prev); |
| 3910 | Changed = true; |
| 3911 | } |
| 3912 | return Changed; |
| 3913 | } |
| 3914 | |
| 3915 | Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) { |
| 3916 | removeInstructionsBeforeUnreachable(I); |
| 3917 | return nullptr; |
| 3918 | } |
| 3919 | |
| 3920 | Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) { |
| 3921 | assert(BI.isUnconditional() && "Only for unconditional branches." ); |
| 3922 | |
| 3923 | // If this store is the second-to-last instruction in the basic block |
| 3924 | // (excluding debug info) and if the block ends with |
| 3925 | // an unconditional branch, try to move the store to the successor block. |
| 3926 | |
| 3927 | auto GetLastSinkableStore = [](BasicBlock::iterator BBI) { |
| 3928 | BasicBlock::iterator FirstInstr = BBI->getParent()->begin(); |
| 3929 | do { |
| 3930 | if (BBI != FirstInstr) |
| 3931 | --BBI; |
| 3932 | } while (BBI != FirstInstr && BBI->isDebugOrPseudoInst()); |
| 3933 | |
| 3934 | return dyn_cast<StoreInst>(Val&: BBI); |
| 3935 | }; |
| 3936 | |
| 3937 | if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI))) |
| 3938 | if (mergeStoreIntoSuccessor(SI&: *SI)) |
| 3939 | return &BI; |
| 3940 | |
| 3941 | return nullptr; |
| 3942 | } |
| 3943 | |
| 3944 | void InstCombinerImpl::addDeadEdge(BasicBlock *From, BasicBlock *To, |
| 3945 | SmallVectorImpl<BasicBlock *> &Worklist) { |
| 3946 | if (!DeadEdges.insert(V: {From, To}).second) |
| 3947 | return; |
| 3948 | |
| 3949 | // Replace phi node operands in successor with poison. |
| 3950 | for (PHINode &PN : To->phis()) |
| 3951 | for (Use &U : PN.incoming_values()) |
| 3952 | if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(Val: U)) { |
| 3953 | replaceUse(U, NewValue: PoisonValue::get(T: PN.getType())); |
| 3954 | addToWorklist(I: &PN); |
| 3955 | MadeIRChange = true; |
| 3956 | } |
| 3957 | |
| 3958 | Worklist.push_back(Elt: To); |
| 3959 | } |
| 3960 | |
| 3961 | // Under the assumption that I is unreachable, remove it and following |
| 3962 | // instructions. Changes are reported directly to MadeIRChange. |
| 3963 | void InstCombinerImpl::handleUnreachableFrom( |
| 3964 | Instruction *I, SmallVectorImpl<BasicBlock *> &Worklist) { |
| 3965 | BasicBlock *BB = I->getParent(); |
| 3966 | for (Instruction &Inst : make_early_inc_range( |
| 3967 | Range: make_range(x: std::next(x: BB->getTerminator()->getReverseIterator()), |
| 3968 | y: std::next(x: I->getReverseIterator())))) { |
| 3969 | if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) { |
| 3970 | replaceInstUsesWith(I&: Inst, V: PoisonValue::get(T: Inst.getType())); |
| 3971 | MadeIRChange = true; |
| 3972 | } |
| 3973 | if (Inst.isEHPad() || Inst.getType()->isTokenTy()) |
| 3974 | continue; |
| 3975 | // RemoveDIs: erase debug-info on this instruction manually. |
| 3976 | Inst.dropDbgRecords(); |
| 3977 | eraseInstFromFunction(I&: Inst); |
| 3978 | MadeIRChange = true; |
| 3979 | } |
| 3980 | |
| 3981 | SmallVector<Value *> Changed; |
| 3982 | if (handleUnreachableTerminator(I: BB->getTerminator(), PoisonedValues&: Changed)) { |
| 3983 | MadeIRChange = true; |
| 3984 | for (Value *V : Changed) |
| 3985 | addToWorklist(I: cast<Instruction>(Val: V)); |
| 3986 | } |
| 3987 | |
| 3988 | // Handle potentially dead successors. |
| 3989 | for (BasicBlock *Succ : successors(BB)) |
| 3990 | addDeadEdge(From: BB, To: Succ, Worklist); |
| 3991 | } |
| 3992 | |
| 3993 | void InstCombinerImpl::handlePotentiallyDeadBlocks( |
| 3994 | SmallVectorImpl<BasicBlock *> &Worklist) { |
| 3995 | while (!Worklist.empty()) { |
| 3996 | BasicBlock *BB = Worklist.pop_back_val(); |
| 3997 | if (!all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) { |
| 3998 | return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred); |
| 3999 | })) |
| 4000 | continue; |
| 4001 | |
| 4002 | handleUnreachableFrom(I: &BB->front(), Worklist); |
| 4003 | } |
| 4004 | } |
| 4005 | |
| 4006 | void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB, |
| 4007 | BasicBlock *LiveSucc) { |
| 4008 | SmallVector<BasicBlock *> Worklist; |
| 4009 | for (BasicBlock *Succ : successors(BB)) { |
| 4010 | // The live successor isn't dead. |
| 4011 | if (Succ == LiveSucc) |
| 4012 | continue; |
| 4013 | |
| 4014 | addDeadEdge(From: BB, To: Succ, Worklist); |
| 4015 | } |
| 4016 | |
| 4017 | handlePotentiallyDeadBlocks(Worklist); |
| 4018 | } |
| 4019 | |
| 4020 | Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) { |
| 4021 | if (BI.isUnconditional()) |
| 4022 | return visitUnconditionalBranchInst(BI); |
| 4023 | |
| 4024 | // Change br (not X), label True, label False to: br X, label False, True |
| 4025 | Value *Cond = BI.getCondition(); |
| 4026 | Value *X; |
| 4027 | if (match(V: Cond, P: m_Not(V: m_Value(V&: X))) && !isa<Constant>(Val: X)) { |
| 4028 | // Swap Destinations and condition... |
| 4029 | BI.swapSuccessors(); |
| 4030 | if (BPI) |
| 4031 | BPI->swapSuccEdgesProbabilities(Src: BI.getParent()); |
| 4032 | return replaceOperand(I&: BI, OpNum: 0, V: X); |
| 4033 | } |
| 4034 | |
| 4035 | // Canonicalize logical-and-with-invert as logical-or-with-invert. |
| 4036 | // This is done by inverting the condition and swapping successors: |
| 4037 | // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T |
| 4038 | Value *Y; |
| 4039 | if (isa<SelectInst>(Val: Cond) && |
| 4040 | match(V: Cond, |
| 4041 | P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Not(V: m_Value(V&: Y))))))) { |
| 4042 | Value *NotX = Builder.CreateNot(V: X, Name: "not." + X->getName()); |
| 4043 | Value *Or = Builder.CreateLogicalOr(Cond1: NotX, Cond2: Y); |
| 4044 | BI.swapSuccessors(); |
| 4045 | if (BPI) |
| 4046 | BPI->swapSuccEdgesProbabilities(Src: BI.getParent()); |
| 4047 | return replaceOperand(I&: BI, OpNum: 0, V: Or); |
| 4048 | } |
| 4049 | |
| 4050 | // If the condition is irrelevant, remove the use so that other |
| 4051 | // transforms on the condition become more effective. |
| 4052 | if (!isa<ConstantInt>(Val: Cond) && BI.getSuccessor(i: 0) == BI.getSuccessor(i: 1)) |
| 4053 | return replaceOperand(I&: BI, OpNum: 0, V: ConstantInt::getFalse(Ty: Cond->getType())); |
| 4054 | |
| 4055 | // Canonicalize, for example, fcmp_one -> fcmp_oeq. |
| 4056 | CmpPredicate Pred; |
| 4057 | if (match(V: Cond, P: m_OneUse(SubPattern: m_FCmp(Pred, L: m_Value(), R: m_Value()))) && |
| 4058 | !isCanonicalPredicate(Pred)) { |
| 4059 | // Swap destinations and condition. |
| 4060 | auto *Cmp = cast<CmpInst>(Val: Cond); |
| 4061 | Cmp->setPredicate(CmpInst::getInversePredicate(pred: Pred)); |
| 4062 | BI.swapSuccessors(); |
| 4063 | if (BPI) |
| 4064 | BPI->swapSuccEdgesProbabilities(Src: BI.getParent()); |
| 4065 | Worklist.push(I: Cmp); |
| 4066 | return &BI; |
| 4067 | } |
| 4068 | |
| 4069 | if (isa<UndefValue>(Val: Cond)) { |
| 4070 | handlePotentiallyDeadSuccessors(BB: BI.getParent(), /*LiveSucc*/ nullptr); |
| 4071 | return nullptr; |
| 4072 | } |
| 4073 | if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) { |
| 4074 | handlePotentiallyDeadSuccessors(BB: BI.getParent(), |
| 4075 | LiveSucc: BI.getSuccessor(i: !CI->getZExtValue())); |
| 4076 | return nullptr; |
| 4077 | } |
| 4078 | |
| 4079 | // Replace all dominated uses of the condition with true/false |
| 4080 | // Ignore constant expressions to avoid iterating over uses on other |
| 4081 | // functions. |
| 4082 | if (!isa<Constant>(Val: Cond) && BI.getSuccessor(i: 0) != BI.getSuccessor(i: 1)) { |
| 4083 | for (auto &U : make_early_inc_range(Range: Cond->uses())) { |
| 4084 | BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(i: 0)); |
| 4085 | if (DT.dominates(BBE: Edge0, U)) { |
| 4086 | replaceUse(U, NewValue: ConstantInt::getTrue(Ty: Cond->getType())); |
| 4087 | addToWorklist(I: cast<Instruction>(Val: U.getUser())); |
| 4088 | continue; |
| 4089 | } |
| 4090 | BasicBlockEdge Edge1(BI.getParent(), BI.getSuccessor(i: 1)); |
| 4091 | if (DT.dominates(BBE: Edge1, U)) { |
| 4092 | replaceUse(U, NewValue: ConstantInt::getFalse(Ty: Cond->getType())); |
| 4093 | addToWorklist(I: cast<Instruction>(Val: U.getUser())); |
| 4094 | } |
| 4095 | } |
| 4096 | } |
| 4097 | |
| 4098 | DC.registerBranch(BI: &BI); |
| 4099 | return nullptr; |
| 4100 | } |
| 4101 | |
| 4102 | // Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if |
| 4103 | // we can prove that both (switch C) and (switch X) go to the default when cond |
| 4104 | // is false/true. |
| 4105 | static Value *simplifySwitchOnSelectUsingRanges(SwitchInst &SI, |
| 4106 | SelectInst *Select, |
| 4107 | bool IsTrueArm) { |
| 4108 | unsigned CstOpIdx = IsTrueArm ? 1 : 2; |
| 4109 | auto *C = dyn_cast<ConstantInt>(Val: Select->getOperand(i_nocapture: CstOpIdx)); |
| 4110 | if (!C) |
| 4111 | return nullptr; |
| 4112 | |
| 4113 | BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor(); |
| 4114 | if (CstBB != SI.getDefaultDest()) |
| 4115 | return nullptr; |
| 4116 | Value *X = Select->getOperand(i_nocapture: 3 - CstOpIdx); |
| 4117 | CmpPredicate Pred; |
| 4118 | const APInt *RHSC; |
| 4119 | if (!match(V: Select->getCondition(), |
| 4120 | P: m_ICmp(Pred, L: m_Specific(V: X), R: m_APInt(Res&: RHSC)))) |
| 4121 | return nullptr; |
| 4122 | if (IsTrueArm) |
| 4123 | Pred = ICmpInst::getInversePredicate(pred: Pred); |
| 4124 | |
| 4125 | // See whether we can replace the select with X |
| 4126 | ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC); |
| 4127 | for (auto Case : SI.cases()) |
| 4128 | if (!CR.contains(Val: Case.getCaseValue()->getValue())) |
| 4129 | return nullptr; |
| 4130 | |
| 4131 | return X; |
| 4132 | } |
| 4133 | |
| 4134 | Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) { |
| 4135 | Value *Cond = SI.getCondition(); |
| 4136 | Value *Op0; |
| 4137 | ConstantInt *AddRHS; |
| 4138 | if (match(V: Cond, P: m_Add(L: m_Value(V&: Op0), R: m_ConstantInt(CI&: AddRHS)))) { |
| 4139 | // Change 'switch (X+4) case 1:' into 'switch (X) case -3'. |
| 4140 | for (auto Case : SI.cases()) { |
| 4141 | Constant *NewCase = ConstantExpr::getSub(C1: Case.getCaseValue(), C2: AddRHS); |
| 4142 | assert(isa<ConstantInt>(NewCase) && |
| 4143 | "Result of expression should be constant" ); |
| 4144 | Case.setValue(cast<ConstantInt>(Val: NewCase)); |
| 4145 | } |
| 4146 | return replaceOperand(I&: SI, OpNum: 0, V: Op0); |
| 4147 | } |
| 4148 | |
| 4149 | ConstantInt *SubLHS; |
| 4150 | if (match(V: Cond, P: m_Sub(L: m_ConstantInt(CI&: SubLHS), R: m_Value(V&: Op0)))) { |
| 4151 | // Change 'switch (1-X) case 1:' into 'switch (X) case 0'. |
| 4152 | for (auto Case : SI.cases()) { |
| 4153 | Constant *NewCase = ConstantExpr::getSub(C1: SubLHS, C2: Case.getCaseValue()); |
| 4154 | assert(isa<ConstantInt>(NewCase) && |
| 4155 | "Result of expression should be constant" ); |
| 4156 | Case.setValue(cast<ConstantInt>(Val: NewCase)); |
| 4157 | } |
| 4158 | return replaceOperand(I&: SI, OpNum: 0, V: Op0); |
| 4159 | } |
| 4160 | |
| 4161 | uint64_t ShiftAmt; |
| 4162 | if (match(V: Cond, P: m_Shl(L: m_Value(V&: Op0), R: m_ConstantInt(V&: ShiftAmt))) && |
| 4163 | ShiftAmt < Op0->getType()->getScalarSizeInBits() && |
| 4164 | all_of(Range: SI.cases(), P: [&](const auto &Case) { |
| 4165 | return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt; |
| 4166 | })) { |
| 4167 | // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'. |
| 4168 | OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Val: Cond); |
| 4169 | if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() || |
| 4170 | Shl->hasOneUse()) { |
| 4171 | Value *NewCond = Op0; |
| 4172 | if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) { |
| 4173 | // If the shift may wrap, we need to mask off the shifted bits. |
| 4174 | unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); |
| 4175 | NewCond = Builder.CreateAnd( |
| 4176 | LHS: Op0, RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - ShiftAmt)); |
| 4177 | } |
| 4178 | for (auto Case : SI.cases()) { |
| 4179 | const APInt &CaseVal = Case.getCaseValue()->getValue(); |
| 4180 | APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt) |
| 4181 | : CaseVal.lshr(shiftAmt: ShiftAmt); |
| 4182 | Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: ShiftedCase)); |
| 4183 | } |
| 4184 | return replaceOperand(I&: SI, OpNum: 0, V: NewCond); |
| 4185 | } |
| 4186 | } |
| 4187 | |
| 4188 | // Fold switch(zext/sext(X)) into switch(X) if possible. |
| 4189 | if (match(V: Cond, P: m_ZExtOrSExt(Op: m_Value(V&: Op0)))) { |
| 4190 | bool IsZExt = isa<ZExtInst>(Val: Cond); |
| 4191 | Type *SrcTy = Op0->getType(); |
| 4192 | unsigned NewWidth = SrcTy->getScalarSizeInBits(); |
| 4193 | |
| 4194 | if (all_of(Range: SI.cases(), P: [&](const auto &Case) { |
| 4195 | const APInt &CaseVal = Case.getCaseValue()->getValue(); |
| 4196 | return IsZExt ? CaseVal.isIntN(N: NewWidth) |
| 4197 | : CaseVal.isSignedIntN(N: NewWidth); |
| 4198 | })) { |
| 4199 | for (auto &Case : SI.cases()) { |
| 4200 | APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth); |
| 4201 | Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase)); |
| 4202 | } |
| 4203 | return replaceOperand(I&: SI, OpNum: 0, V: Op0); |
| 4204 | } |
| 4205 | } |
| 4206 | |
| 4207 | // Fold switch(select cond, X, Y) into switch(X/Y) if possible |
| 4208 | if (auto *Select = dyn_cast<SelectInst>(Val: Cond)) { |
| 4209 | if (Value *V = |
| 4210 | simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true)) |
| 4211 | return replaceOperand(I&: SI, OpNum: 0, V); |
| 4212 | if (Value *V = |
| 4213 | simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false)) |
| 4214 | return replaceOperand(I&: SI, OpNum: 0, V); |
| 4215 | } |
| 4216 | |
| 4217 | KnownBits Known = computeKnownBits(V: Cond, CxtI: &SI); |
| 4218 | unsigned LeadingKnownZeros = Known.countMinLeadingZeros(); |
| 4219 | unsigned LeadingKnownOnes = Known.countMinLeadingOnes(); |
| 4220 | |
| 4221 | // Compute the number of leading bits we can ignore. |
| 4222 | // TODO: A better way to determine this would use ComputeNumSignBits(). |
| 4223 | for (const auto &C : SI.cases()) { |
| 4224 | LeadingKnownZeros = |
| 4225 | std::min(a: LeadingKnownZeros, b: C.getCaseValue()->getValue().countl_zero()); |
| 4226 | LeadingKnownOnes = |
| 4227 | std::min(a: LeadingKnownOnes, b: C.getCaseValue()->getValue().countl_one()); |
| 4228 | } |
| 4229 | |
| 4230 | unsigned NewWidth = Known.getBitWidth() - std::max(a: LeadingKnownZeros, b: LeadingKnownOnes); |
| 4231 | |
| 4232 | // Shrink the condition operand if the new type is smaller than the old type. |
| 4233 | // But do not shrink to a non-standard type, because backend can't generate |
| 4234 | // good code for that yet. |
| 4235 | // TODO: We can make it aggressive again after fixing PR39569. |
| 4236 | if (NewWidth > 0 && NewWidth < Known.getBitWidth() && |
| 4237 | shouldChangeType(FromWidth: Known.getBitWidth(), ToWidth: NewWidth)) { |
| 4238 | IntegerType *Ty = IntegerType::get(C&: SI.getContext(), NumBits: NewWidth); |
| 4239 | Builder.SetInsertPoint(&SI); |
| 4240 | Value *NewCond = Builder.CreateTrunc(V: Cond, DestTy: Ty, Name: "trunc" ); |
| 4241 | |
| 4242 | for (auto Case : SI.cases()) { |
| 4243 | APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth); |
| 4244 | Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase)); |
| 4245 | } |
| 4246 | return replaceOperand(I&: SI, OpNum: 0, V: NewCond); |
| 4247 | } |
| 4248 | |
| 4249 | if (isa<UndefValue>(Val: Cond)) { |
| 4250 | handlePotentiallyDeadSuccessors(BB: SI.getParent(), /*LiveSucc*/ nullptr); |
| 4251 | return nullptr; |
| 4252 | } |
| 4253 | if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) { |
| 4254 | handlePotentiallyDeadSuccessors(BB: SI.getParent(), |
| 4255 | LiveSucc: SI.findCaseValue(C: CI)->getCaseSuccessor()); |
| 4256 | return nullptr; |
| 4257 | } |
| 4258 | |
| 4259 | return nullptr; |
| 4260 | } |
| 4261 | |
| 4262 | Instruction * |
| 4263 | InstCombinerImpl::(ExtractValueInst &EV) { |
| 4264 | auto *WO = dyn_cast<WithOverflowInst>(Val: EV.getAggregateOperand()); |
| 4265 | if (!WO) |
| 4266 | return nullptr; |
| 4267 | |
| 4268 | Intrinsic::ID OvID = WO->getIntrinsicID(); |
| 4269 | const APInt *C = nullptr; |
| 4270 | if (match(V: WO->getRHS(), P: m_APIntAllowPoison(Res&: C))) { |
| 4271 | if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow || |
| 4272 | OvID == Intrinsic::umul_with_overflow)) { |
| 4273 | // extractvalue (any_mul_with_overflow X, -1), 0 --> -X |
| 4274 | if (C->isAllOnes()) |
| 4275 | return BinaryOperator::CreateNeg(Op: WO->getLHS()); |
| 4276 | // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n |
| 4277 | if (C->isPowerOf2()) { |
| 4278 | return BinaryOperator::CreateShl( |
| 4279 | V1: WO->getLHS(), |
| 4280 | V2: ConstantInt::get(Ty: WO->getLHS()->getType(), V: C->logBase2())); |
| 4281 | } |
| 4282 | } |
| 4283 | } |
| 4284 | |
| 4285 | // We're extracting from an overflow intrinsic. See if we're the only user. |
| 4286 | // That allows us to simplify multiple result intrinsics to simpler things |
| 4287 | // that just get one value. |
| 4288 | if (!WO->hasOneUse()) |
| 4289 | return nullptr; |
| 4290 | |
| 4291 | // Check if we're grabbing only the result of a 'with overflow' intrinsic |
| 4292 | // and replace it with a traditional binary instruction. |
| 4293 | if (*EV.idx_begin() == 0) { |
| 4294 | Instruction::BinaryOps BinOp = WO->getBinaryOp(); |
| 4295 | Value *LHS = WO->getLHS(), *RHS = WO->getRHS(); |
| 4296 | // Replace the old instruction's uses with poison. |
| 4297 | replaceInstUsesWith(I&: *WO, V: PoisonValue::get(T: WO->getType())); |
| 4298 | eraseInstFromFunction(I&: *WO); |
| 4299 | return BinaryOperator::Create(Op: BinOp, S1: LHS, S2: RHS); |
| 4300 | } |
| 4301 | |
| 4302 | assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst" ); |
| 4303 | |
| 4304 | // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS. |
| 4305 | if (OvID == Intrinsic::usub_with_overflow) |
| 4306 | return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS()); |
| 4307 | |
| 4308 | // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but |
| 4309 | // +1 is not possible because we assume signed values. |
| 4310 | if (OvID == Intrinsic::smul_with_overflow && |
| 4311 | WO->getLHS()->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
| 4312 | return BinaryOperator::CreateAnd(V1: WO->getLHS(), V2: WO->getRHS()); |
| 4313 | |
| 4314 | // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1 |
| 4315 | if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) { |
| 4316 | unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits(); |
| 4317 | // Only handle even bitwidths for performance reasons. |
| 4318 | if (BitWidth % 2 == 0) |
| 4319 | return new ICmpInst( |
| 4320 | ICmpInst::ICMP_UGT, WO->getLHS(), |
| 4321 | ConstantInt::get(Ty: WO->getLHS()->getType(), |
| 4322 | V: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth / 2))); |
| 4323 | } |
| 4324 | |
| 4325 | // If only the overflow result is used, and the right hand side is a |
| 4326 | // constant (or constant splat), we can remove the intrinsic by directly |
| 4327 | // checking for overflow. |
| 4328 | if (C) { |
| 4329 | // Compute the no-wrap range for LHS given RHS=C, then construct an |
| 4330 | // equivalent icmp, potentially using an offset. |
| 4331 | ConstantRange NWR = ConstantRange::makeExactNoWrapRegion( |
| 4332 | BinOp: WO->getBinaryOp(), Other: *C, NoWrapKind: WO->getNoWrapKind()); |
| 4333 | |
| 4334 | CmpInst::Predicate Pred; |
| 4335 | APInt NewRHSC, Offset; |
| 4336 | NWR.getEquivalentICmp(Pred, RHS&: NewRHSC, Offset); |
| 4337 | auto *OpTy = WO->getRHS()->getType(); |
| 4338 | auto *NewLHS = WO->getLHS(); |
| 4339 | if (Offset != 0) |
| 4340 | NewLHS = Builder.CreateAdd(LHS: NewLHS, RHS: ConstantInt::get(Ty: OpTy, V: Offset)); |
| 4341 | return new ICmpInst(ICmpInst::getInversePredicate(pred: Pred), NewLHS, |
| 4342 | ConstantInt::get(Ty: OpTy, V: NewRHSC)); |
| 4343 | } |
| 4344 | |
| 4345 | return nullptr; |
| 4346 | } |
| 4347 | |
| 4348 | static Value *(ExtractValueInst &EV, IntrinsicInst *FrexpCall, |
| 4349 | SelectInst *SelectInst, |
| 4350 | InstCombiner::BuilderTy &Builder) { |
| 4351 | // Helper to fold frexp of select to select of frexp. |
| 4352 | |
| 4353 | if (!SelectInst->hasOneUse() || !FrexpCall->hasOneUse()) |
| 4354 | return nullptr; |
| 4355 | Value *Cond = SelectInst->getCondition(); |
| 4356 | Value *TrueVal = SelectInst->getTrueValue(); |
| 4357 | Value *FalseVal = SelectInst->getFalseValue(); |
| 4358 | |
| 4359 | const APFloat *ConstVal = nullptr; |
| 4360 | Value *VarOp = nullptr; |
| 4361 | bool ConstIsTrue = false; |
| 4362 | |
| 4363 | if (match(V: TrueVal, P: m_APFloat(Res&: ConstVal))) { |
| 4364 | VarOp = FalseVal; |
| 4365 | ConstIsTrue = true; |
| 4366 | } else if (match(V: FalseVal, P: m_APFloat(Res&: ConstVal))) { |
| 4367 | VarOp = TrueVal; |
| 4368 | ConstIsTrue = false; |
| 4369 | } else { |
| 4370 | return nullptr; |
| 4371 | } |
| 4372 | |
| 4373 | Builder.SetInsertPoint(&EV); |
| 4374 | |
| 4375 | CallInst *NewFrexp = |
| 4376 | Builder.CreateCall(Callee: FrexpCall->getCalledFunction(), Args: {VarOp}, Name: "frexp" ); |
| 4377 | NewFrexp->copyIRFlags(V: FrexpCall); |
| 4378 | |
| 4379 | Value *NewEV = Builder.CreateExtractValue(Agg: NewFrexp, Idxs: 0, Name: "mantissa" ); |
| 4380 | |
| 4381 | int Exp; |
| 4382 | APFloat Mantissa = frexp(X: *ConstVal, Exp, RM: APFloat::rmNearestTiesToEven); |
| 4383 | |
| 4384 | Constant *ConstantMantissa = ConstantFP::get(Ty: TrueVal->getType(), V: Mantissa); |
| 4385 | |
| 4386 | Value *NewSel = Builder.CreateSelectFMF( |
| 4387 | C: Cond, True: ConstIsTrue ? ConstantMantissa : NewEV, |
| 4388 | False: ConstIsTrue ? NewEV : ConstantMantissa, FMFSource: SelectInst, Name: "select.frexp" ); |
| 4389 | return NewSel; |
| 4390 | } |
| 4391 | Instruction *InstCombinerImpl::(ExtractValueInst &EV) { |
| 4392 | Value *Agg = EV.getAggregateOperand(); |
| 4393 | |
| 4394 | if (!EV.hasIndices()) |
| 4395 | return replaceInstUsesWith(I&: EV, V: Agg); |
| 4396 | |
| 4397 | if (Value *V = simplifyExtractValueInst(Agg, Idxs: EV.getIndices(), |
| 4398 | Q: SQ.getWithInstruction(I: &EV))) |
| 4399 | return replaceInstUsesWith(I&: EV, V); |
| 4400 | |
| 4401 | Value *Cond, *TrueVal, *FalseVal; |
| 4402 | if (match(V: &EV, P: m_ExtractValue<0>(V: m_Intrinsic<Intrinsic::frexp>(Op0: m_Select( |
| 4403 | C: m_Value(V&: Cond), L: m_Value(V&: TrueVal), R: m_Value(V&: FalseVal)))))) { |
| 4404 | auto *SelInst = |
| 4405 | cast<SelectInst>(Val: cast<IntrinsicInst>(Val: Agg)->getArgOperand(i: 0)); |
| 4406 | if (Value *Result = |
| 4407 | foldFrexpOfSelect(EV, FrexpCall: cast<IntrinsicInst>(Val: Agg), SelectInst: SelInst, Builder)) |
| 4408 | return replaceInstUsesWith(I&: EV, V: Result); |
| 4409 | } |
| 4410 | if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Val: Agg)) { |
| 4411 | // We're extracting from an insertvalue instruction, compare the indices |
| 4412 | const unsigned *exti, *exte, *insi, *inse; |
| 4413 | for (exti = EV.idx_begin(), insi = IV->idx_begin(), |
| 4414 | exte = EV.idx_end(), inse = IV->idx_end(); |
| 4415 | exti != exte && insi != inse; |
| 4416 | ++exti, ++insi) { |
| 4417 | if (*insi != *exti) |
| 4418 | // The insert and extract both reference distinctly different elements. |
| 4419 | // This means the extract is not influenced by the insert, and we can |
| 4420 | // replace the aggregate operand of the extract with the aggregate |
| 4421 | // operand of the insert. i.e., replace |
| 4422 | // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 |
| 4423 | // %E = extractvalue { i32, { i32 } } %I, 0 |
| 4424 | // with |
| 4425 | // %E = extractvalue { i32, { i32 } } %A, 0 |
| 4426 | return ExtractValueInst::Create(Agg: IV->getAggregateOperand(), |
| 4427 | Idxs: EV.getIndices()); |
| 4428 | } |
| 4429 | if (exti == exte && insi == inse) |
| 4430 | // Both iterators are at the end: Index lists are identical. Replace |
| 4431 | // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 |
| 4432 | // %C = extractvalue { i32, { i32 } } %B, 1, 0 |
| 4433 | // with "i32 42" |
| 4434 | return replaceInstUsesWith(I&: EV, V: IV->getInsertedValueOperand()); |
| 4435 | if (exti == exte) { |
| 4436 | // The extract list is a prefix of the insert list. i.e. replace |
| 4437 | // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 |
| 4438 | // %E = extractvalue { i32, { i32 } } %I, 1 |
| 4439 | // with |
| 4440 | // %X = extractvalue { i32, { i32 } } %A, 1 |
| 4441 | // %E = insertvalue { i32 } %X, i32 42, 0 |
| 4442 | // by switching the order of the insert and extract (though the |
| 4443 | // insertvalue should be left in, since it may have other uses). |
| 4444 | Value *NewEV = Builder.CreateExtractValue(Agg: IV->getAggregateOperand(), |
| 4445 | Idxs: EV.getIndices()); |
| 4446 | return InsertValueInst::Create(Agg: NewEV, Val: IV->getInsertedValueOperand(), |
| 4447 | Idxs: ArrayRef(insi, inse)); |
| 4448 | } |
| 4449 | if (insi == inse) |
| 4450 | // The insert list is a prefix of the extract list |
| 4451 | // We can simply remove the common indices from the extract and make it |
| 4452 | // operate on the inserted value instead of the insertvalue result. |
| 4453 | // i.e., replace |
| 4454 | // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 |
| 4455 | // %E = extractvalue { i32, { i32 } } %I, 1, 0 |
| 4456 | // with |
| 4457 | // %E extractvalue { i32 } { i32 42 }, 0 |
| 4458 | return ExtractValueInst::Create(Agg: IV->getInsertedValueOperand(), |
| 4459 | Idxs: ArrayRef(exti, exte)); |
| 4460 | } |
| 4461 | |
| 4462 | if (Instruction *R = foldExtractOfOverflowIntrinsic(EV)) |
| 4463 | return R; |
| 4464 | |
| 4465 | if (LoadInst *L = dyn_cast<LoadInst>(Val: Agg)) { |
| 4466 | // Bail out if the aggregate contains scalable vector type |
| 4467 | if (auto *STy = dyn_cast<StructType>(Val: Agg->getType()); |
| 4468 | STy && STy->isScalableTy()) |
| 4469 | return nullptr; |
| 4470 | |
| 4471 | // If the (non-volatile) load only has one use, we can rewrite this to a |
| 4472 | // load from a GEP. This reduces the size of the load. If a load is used |
| 4473 | // only by extractvalue instructions then this either must have been |
| 4474 | // optimized before, or it is a struct with padding, in which case we |
| 4475 | // don't want to do the transformation as it loses padding knowledge. |
| 4476 | if (L->isSimple() && L->hasOneUse()) { |
| 4477 | // extractvalue has integer indices, getelementptr has Value*s. Convert. |
| 4478 | SmallVector<Value*, 4> Indices; |
| 4479 | // Prefix an i32 0 since we need the first element. |
| 4480 | Indices.push_back(Elt: Builder.getInt32(C: 0)); |
| 4481 | for (unsigned Idx : EV.indices()) |
| 4482 | Indices.push_back(Elt: Builder.getInt32(C: Idx)); |
| 4483 | |
| 4484 | // We need to insert these at the location of the old load, not at that of |
| 4485 | // the extractvalue. |
| 4486 | Builder.SetInsertPoint(L); |
| 4487 | Value *GEP = Builder.CreateInBoundsGEP(Ty: L->getType(), |
| 4488 | Ptr: L->getPointerOperand(), IdxList: Indices); |
| 4489 | Instruction *NL = Builder.CreateLoad(Ty: EV.getType(), Ptr: GEP); |
| 4490 | // Whatever aliasing information we had for the orignal load must also |
| 4491 | // hold for the smaller load, so propagate the annotations. |
| 4492 | NL->setAAMetadata(L->getAAMetadata()); |
| 4493 | // Returning the load directly will cause the main loop to insert it in |
| 4494 | // the wrong spot, so use replaceInstUsesWith(). |
| 4495 | return replaceInstUsesWith(I&: EV, V: NL); |
| 4496 | } |
| 4497 | } |
| 4498 | |
| 4499 | if (auto *PN = dyn_cast<PHINode>(Val: Agg)) |
| 4500 | if (Instruction *Res = foldOpIntoPhi(I&: EV, PN)) |
| 4501 | return Res; |
| 4502 | |
| 4503 | // Canonicalize extract (select Cond, TV, FV) |
| 4504 | // -> select cond, (extract TV), (extract FV) |
| 4505 | if (auto *SI = dyn_cast<SelectInst>(Val: Agg)) |
| 4506 | if (Instruction *R = FoldOpIntoSelect(Op&: EV, SI, /*FoldWithMultiUse=*/true)) |
| 4507 | return R; |
| 4508 | |
| 4509 | // We could simplify extracts from other values. Note that nested extracts may |
| 4510 | // already be simplified implicitly by the above: extract (extract (insert) ) |
| 4511 | // will be translated into extract ( insert ( extract ) ) first and then just |
| 4512 | // the value inserted, if appropriate. Similarly for extracts from single-use |
| 4513 | // loads: extract (extract (load)) will be translated to extract (load (gep)) |
| 4514 | // and if again single-use then via load (gep (gep)) to load (gep). |
| 4515 | // However, double extracts from e.g. function arguments or return values |
| 4516 | // aren't handled yet. |
| 4517 | return nullptr; |
| 4518 | } |
| 4519 | |
| 4520 | /// Return 'true' if the given typeinfo will match anything. |
| 4521 | static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) { |
| 4522 | switch (Personality) { |
| 4523 | case EHPersonality::GNU_C: |
| 4524 | case EHPersonality::GNU_C_SjLj: |
| 4525 | case EHPersonality::Rust: |
| 4526 | // The GCC C EH and Rust personality only exists to support cleanups, so |
| 4527 | // it's not clear what the semantics of catch clauses are. |
| 4528 | return false; |
| 4529 | case EHPersonality::Unknown: |
| 4530 | return false; |
| 4531 | case EHPersonality::GNU_Ada: |
| 4532 | // While __gnat_all_others_value will match any Ada exception, it doesn't |
| 4533 | // match foreign exceptions (or didn't, before gcc-4.7). |
| 4534 | return false; |
| 4535 | case EHPersonality::GNU_CXX: |
| 4536 | case EHPersonality::GNU_CXX_SjLj: |
| 4537 | case EHPersonality::GNU_ObjC: |
| 4538 | case EHPersonality::MSVC_X86SEH: |
| 4539 | case EHPersonality::MSVC_TableSEH: |
| 4540 | case EHPersonality::MSVC_CXX: |
| 4541 | case EHPersonality::CoreCLR: |
| 4542 | case EHPersonality::Wasm_CXX: |
| 4543 | case EHPersonality::XL_CXX: |
| 4544 | case EHPersonality::ZOS_CXX: |
| 4545 | return TypeInfo->isNullValue(); |
| 4546 | } |
| 4547 | llvm_unreachable("invalid enum" ); |
| 4548 | } |
| 4549 | |
| 4550 | static bool shorter_filter(const Value *LHS, const Value *RHS) { |
| 4551 | return |
| 4552 | cast<ArrayType>(Val: LHS->getType())->getNumElements() |
| 4553 | < |
| 4554 | cast<ArrayType>(Val: RHS->getType())->getNumElements(); |
| 4555 | } |
| 4556 | |
| 4557 | Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) { |
| 4558 | // The logic here should be correct for any real-world personality function. |
| 4559 | // However if that turns out not to be true, the offending logic can always |
| 4560 | // be conditioned on the personality function, like the catch-all logic is. |
| 4561 | EHPersonality Personality = |
| 4562 | classifyEHPersonality(Pers: LI.getParent()->getParent()->getPersonalityFn()); |
| 4563 | |
| 4564 | // Simplify the list of clauses, eg by removing repeated catch clauses |
| 4565 | // (these are often created by inlining). |
| 4566 | bool MakeNewInstruction = false; // If true, recreate using the following: |
| 4567 | SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction; |
| 4568 | bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. |
| 4569 | |
| 4570 | SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already. |
| 4571 | for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) { |
| 4572 | bool isLastClause = i + 1 == e; |
| 4573 | if (LI.isCatch(Idx: i)) { |
| 4574 | // A catch clause. |
| 4575 | Constant *CatchClause = LI.getClause(Idx: i); |
| 4576 | Constant *TypeInfo = CatchClause->stripPointerCasts(); |
| 4577 | |
| 4578 | // If we already saw this clause, there is no point in having a second |
| 4579 | // copy of it. |
| 4580 | if (AlreadyCaught.insert(Ptr: TypeInfo).second) { |
| 4581 | // This catch clause was not already seen. |
| 4582 | NewClauses.push_back(Elt: CatchClause); |
| 4583 | } else { |
| 4584 | // Repeated catch clause - drop the redundant copy. |
| 4585 | MakeNewInstruction = true; |
| 4586 | } |
| 4587 | |
| 4588 | // If this is a catch-all then there is no point in keeping any following |
| 4589 | // clauses or marking the landingpad as having a cleanup. |
| 4590 | if (isCatchAll(Personality, TypeInfo)) { |
| 4591 | if (!isLastClause) |
| 4592 | MakeNewInstruction = true; |
| 4593 | CleanupFlag = false; |
| 4594 | break; |
| 4595 | } |
| 4596 | } else { |
| 4597 | // A filter clause. If any of the filter elements were already caught |
| 4598 | // then they can be dropped from the filter. It is tempting to try to |
| 4599 | // exploit the filter further by saying that any typeinfo that does not |
| 4600 | // occur in the filter can't be caught later (and thus can be dropped). |
| 4601 | // However this would be wrong, since typeinfos can match without being |
| 4602 | // equal (for example if one represents a C++ class, and the other some |
| 4603 | // class derived from it). |
| 4604 | assert(LI.isFilter(i) && "Unsupported landingpad clause!" ); |
| 4605 | Constant *FilterClause = LI.getClause(Idx: i); |
| 4606 | ArrayType *FilterType = cast<ArrayType>(Val: FilterClause->getType()); |
| 4607 | unsigned NumTypeInfos = FilterType->getNumElements(); |
| 4608 | |
| 4609 | // An empty filter catches everything, so there is no point in keeping any |
| 4610 | // following clauses or marking the landingpad as having a cleanup. By |
| 4611 | // dealing with this case here the following code is made a bit simpler. |
| 4612 | if (!NumTypeInfos) { |
| 4613 | NewClauses.push_back(Elt: FilterClause); |
| 4614 | if (!isLastClause) |
| 4615 | MakeNewInstruction = true; |
| 4616 | CleanupFlag = false; |
| 4617 | break; |
| 4618 | } |
| 4619 | |
| 4620 | bool MakeNewFilter = false; // If true, make a new filter. |
| 4621 | SmallVector<Constant *, 16> NewFilterElts; // New elements. |
| 4622 | if (isa<ConstantAggregateZero>(Val: FilterClause)) { |
| 4623 | // Not an empty filter - it contains at least one null typeinfo. |
| 4624 | assert(NumTypeInfos > 0 && "Should have handled empty filter already!" ); |
| 4625 | Constant *TypeInfo = |
| 4626 | Constant::getNullValue(Ty: FilterType->getElementType()); |
| 4627 | // If this typeinfo is a catch-all then the filter can never match. |
| 4628 | if (isCatchAll(Personality, TypeInfo)) { |
| 4629 | // Throw the filter away. |
| 4630 | MakeNewInstruction = true; |
| 4631 | continue; |
| 4632 | } |
| 4633 | |
| 4634 | // There is no point in having multiple copies of this typeinfo, so |
| 4635 | // discard all but the first copy if there is more than one. |
| 4636 | NewFilterElts.push_back(Elt: TypeInfo); |
| 4637 | if (NumTypeInfos > 1) |
| 4638 | MakeNewFilter = true; |
| 4639 | } else { |
| 4640 | ConstantArray *Filter = cast<ConstantArray>(Val: FilterClause); |
| 4641 | SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements. |
| 4642 | NewFilterElts.reserve(N: NumTypeInfos); |
| 4643 | |
| 4644 | // Remove any filter elements that were already caught or that already |
| 4645 | // occurred in the filter. While there, see if any of the elements are |
| 4646 | // catch-alls. If so, the filter can be discarded. |
| 4647 | bool SawCatchAll = false; |
| 4648 | for (unsigned j = 0; j != NumTypeInfos; ++j) { |
| 4649 | Constant *Elt = Filter->getOperand(i_nocapture: j); |
| 4650 | Constant *TypeInfo = Elt->stripPointerCasts(); |
| 4651 | if (isCatchAll(Personality, TypeInfo)) { |
| 4652 | // This element is a catch-all. Bail out, noting this fact. |
| 4653 | SawCatchAll = true; |
| 4654 | break; |
| 4655 | } |
| 4656 | |
| 4657 | // Even if we've seen a type in a catch clause, we don't want to |
| 4658 | // remove it from the filter. An unexpected type handler may be |
| 4659 | // set up for a call site which throws an exception of the same |
| 4660 | // type caught. In order for the exception thrown by the unexpected |
| 4661 | // handler to propagate correctly, the filter must be correctly |
| 4662 | // described for the call site. |
| 4663 | // |
| 4664 | // Example: |
| 4665 | // |
| 4666 | // void unexpected() { throw 1;} |
| 4667 | // void foo() throw (int) { |
| 4668 | // std::set_unexpected(unexpected); |
| 4669 | // try { |
| 4670 | // throw 2.0; |
| 4671 | // } catch (int i) {} |
| 4672 | // } |
| 4673 | |
| 4674 | // There is no point in having multiple copies of the same typeinfo in |
| 4675 | // a filter, so only add it if we didn't already. |
| 4676 | if (SeenInFilter.insert(Ptr: TypeInfo).second) |
| 4677 | NewFilterElts.push_back(Elt: cast<Constant>(Val: Elt)); |
| 4678 | } |
| 4679 | // A filter containing a catch-all cannot match anything by definition. |
| 4680 | if (SawCatchAll) { |
| 4681 | // Throw the filter away. |
| 4682 | MakeNewInstruction = true; |
| 4683 | continue; |
| 4684 | } |
| 4685 | |
| 4686 | // If we dropped something from the filter, make a new one. |
| 4687 | if (NewFilterElts.size() < NumTypeInfos) |
| 4688 | MakeNewFilter = true; |
| 4689 | } |
| 4690 | if (MakeNewFilter) { |
| 4691 | FilterType = ArrayType::get(ElementType: FilterType->getElementType(), |
| 4692 | NumElements: NewFilterElts.size()); |
| 4693 | FilterClause = ConstantArray::get(T: FilterType, V: NewFilterElts); |
| 4694 | MakeNewInstruction = true; |
| 4695 | } |
| 4696 | |
| 4697 | NewClauses.push_back(Elt: FilterClause); |
| 4698 | |
| 4699 | // If the new filter is empty then it will catch everything so there is |
| 4700 | // no point in keeping any following clauses or marking the landingpad |
| 4701 | // as having a cleanup. The case of the original filter being empty was |
| 4702 | // already handled above. |
| 4703 | if (MakeNewFilter && !NewFilterElts.size()) { |
| 4704 | assert(MakeNewInstruction && "New filter but not a new instruction!" ); |
| 4705 | CleanupFlag = false; |
| 4706 | break; |
| 4707 | } |
| 4708 | } |
| 4709 | } |
| 4710 | |
| 4711 | // If several filters occur in a row then reorder them so that the shortest |
| 4712 | // filters come first (those with the smallest number of elements). This is |
| 4713 | // advantageous because shorter filters are more likely to match, speeding up |
| 4714 | // unwinding, but mostly because it increases the effectiveness of the other |
| 4715 | // filter optimizations below. |
| 4716 | for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) { |
| 4717 | unsigned j; |
| 4718 | // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters. |
| 4719 | for (j = i; j != e; ++j) |
| 4720 | if (!isa<ArrayType>(Val: NewClauses[j]->getType())) |
| 4721 | break; |
| 4722 | |
| 4723 | // Check whether the filters are already sorted by length. We need to know |
| 4724 | // if sorting them is actually going to do anything so that we only make a |
| 4725 | // new landingpad instruction if it does. |
| 4726 | for (unsigned k = i; k + 1 < j; ++k) |
| 4727 | if (shorter_filter(LHS: NewClauses[k+1], RHS: NewClauses[k])) { |
| 4728 | // Not sorted, so sort the filters now. Doing an unstable sort would be |
| 4729 | // correct too but reordering filters pointlessly might confuse users. |
| 4730 | std::stable_sort(first: NewClauses.begin() + i, last: NewClauses.begin() + j, |
| 4731 | comp: shorter_filter); |
| 4732 | MakeNewInstruction = true; |
| 4733 | break; |
| 4734 | } |
| 4735 | |
| 4736 | // Look for the next batch of filters. |
| 4737 | i = j + 1; |
| 4738 | } |
| 4739 | |
| 4740 | // If typeinfos matched if and only if equal, then the elements of a filter L |
| 4741 | // that occurs later than a filter F could be replaced by the intersection of |
| 4742 | // the elements of F and L. In reality two typeinfos can match without being |
| 4743 | // equal (for example if one represents a C++ class, and the other some class |
| 4744 | // derived from it) so it would be wrong to perform this transform in general. |
| 4745 | // However the transform is correct and useful if F is a subset of L. In that |
| 4746 | // case L can be replaced by F, and thus removed altogether since repeating a |
| 4747 | // filter is pointless. So here we look at all pairs of filters F and L where |
| 4748 | // L follows F in the list of clauses, and remove L if every element of F is |
| 4749 | // an element of L. This can occur when inlining C++ functions with exception |
| 4750 | // specifications. |
| 4751 | for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) { |
| 4752 | // Examine each filter in turn. |
| 4753 | Value *Filter = NewClauses[i]; |
| 4754 | ArrayType *FTy = dyn_cast<ArrayType>(Val: Filter->getType()); |
| 4755 | if (!FTy) |
| 4756 | // Not a filter - skip it. |
| 4757 | continue; |
| 4758 | unsigned FElts = FTy->getNumElements(); |
| 4759 | // Examine each filter following this one. Doing this backwards means that |
| 4760 | // we don't have to worry about filters disappearing under us when removed. |
| 4761 | for (unsigned j = NewClauses.size() - 1; j != i; --j) { |
| 4762 | Value *LFilter = NewClauses[j]; |
| 4763 | ArrayType *LTy = dyn_cast<ArrayType>(Val: LFilter->getType()); |
| 4764 | if (!LTy) |
| 4765 | // Not a filter - skip it. |
| 4766 | continue; |
| 4767 | // If Filter is a subset of LFilter, i.e. every element of Filter is also |
| 4768 | // an element of LFilter, then discard LFilter. |
| 4769 | SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j; |
| 4770 | // If Filter is empty then it is a subset of LFilter. |
| 4771 | if (!FElts) { |
| 4772 | // Discard LFilter. |
| 4773 | NewClauses.erase(CI: J); |
| 4774 | MakeNewInstruction = true; |
| 4775 | // Move on to the next filter. |
| 4776 | continue; |
| 4777 | } |
| 4778 | unsigned LElts = LTy->getNumElements(); |
| 4779 | // If Filter is longer than LFilter then it cannot be a subset of it. |
| 4780 | if (FElts > LElts) |
| 4781 | // Move on to the next filter. |
| 4782 | continue; |
| 4783 | // At this point we know that LFilter has at least one element. |
| 4784 | if (isa<ConstantAggregateZero>(Val: LFilter)) { // LFilter only contains zeros. |
| 4785 | // Filter is a subset of LFilter iff Filter contains only zeros (as we |
| 4786 | // already know that Filter is not longer than LFilter). |
| 4787 | if (isa<ConstantAggregateZero>(Val: Filter)) { |
| 4788 | assert(FElts <= LElts && "Should have handled this case earlier!" ); |
| 4789 | // Discard LFilter. |
| 4790 | NewClauses.erase(CI: J); |
| 4791 | MakeNewInstruction = true; |
| 4792 | } |
| 4793 | // Move on to the next filter. |
| 4794 | continue; |
| 4795 | } |
| 4796 | ConstantArray *LArray = cast<ConstantArray>(Val: LFilter); |
| 4797 | if (isa<ConstantAggregateZero>(Val: Filter)) { // Filter only contains zeros. |
| 4798 | // Since Filter is non-empty and contains only zeros, it is a subset of |
| 4799 | // LFilter iff LFilter contains a zero. |
| 4800 | assert(FElts > 0 && "Should have eliminated the empty filter earlier!" ); |
| 4801 | for (unsigned l = 0; l != LElts; ++l) |
| 4802 | if (LArray->getOperand(i_nocapture: l)->isNullValue()) { |
| 4803 | // LFilter contains a zero - discard it. |
| 4804 | NewClauses.erase(CI: J); |
| 4805 | MakeNewInstruction = true; |
| 4806 | break; |
| 4807 | } |
| 4808 | // Move on to the next filter. |
| 4809 | continue; |
| 4810 | } |
| 4811 | // At this point we know that both filters are ConstantArrays. Loop over |
| 4812 | // operands to see whether every element of Filter is also an element of |
| 4813 | // LFilter. Since filters tend to be short this is probably faster than |
| 4814 | // using a method that scales nicely. |
| 4815 | ConstantArray *FArray = cast<ConstantArray>(Val: Filter); |
| 4816 | bool AllFound = true; |
| 4817 | for (unsigned f = 0; f != FElts; ++f) { |
| 4818 | Value *FTypeInfo = FArray->getOperand(i_nocapture: f)->stripPointerCasts(); |
| 4819 | AllFound = false; |
| 4820 | for (unsigned l = 0; l != LElts; ++l) { |
| 4821 | Value *LTypeInfo = LArray->getOperand(i_nocapture: l)->stripPointerCasts(); |
| 4822 | if (LTypeInfo == FTypeInfo) { |
| 4823 | AllFound = true; |
| 4824 | break; |
| 4825 | } |
| 4826 | } |
| 4827 | if (!AllFound) |
| 4828 | break; |
| 4829 | } |
| 4830 | if (AllFound) { |
| 4831 | // Discard LFilter. |
| 4832 | NewClauses.erase(CI: J); |
| 4833 | MakeNewInstruction = true; |
| 4834 | } |
| 4835 | // Move on to the next filter. |
| 4836 | } |
| 4837 | } |
| 4838 | |
| 4839 | // If we changed any of the clauses, replace the old landingpad instruction |
| 4840 | // with a new one. |
| 4841 | if (MakeNewInstruction) { |
| 4842 | LandingPadInst *NLI = LandingPadInst::Create(RetTy: LI.getType(), |
| 4843 | NumReservedClauses: NewClauses.size()); |
| 4844 | for (Constant *C : NewClauses) |
| 4845 | NLI->addClause(ClauseVal: C); |
| 4846 | // A landing pad with no clauses must have the cleanup flag set. It is |
| 4847 | // theoretically possible, though highly unlikely, that we eliminated all |
| 4848 | // clauses. If so, force the cleanup flag to true. |
| 4849 | if (NewClauses.empty()) |
| 4850 | CleanupFlag = true; |
| 4851 | NLI->setCleanup(CleanupFlag); |
| 4852 | return NLI; |
| 4853 | } |
| 4854 | |
| 4855 | // Even if none of the clauses changed, we may nonetheless have understood |
| 4856 | // that the cleanup flag is pointless. Clear it if so. |
| 4857 | if (LI.isCleanup() != CleanupFlag) { |
| 4858 | assert(!CleanupFlag && "Adding a cleanup, not removing one?!" ); |
| 4859 | LI.setCleanup(CleanupFlag); |
| 4860 | return &LI; |
| 4861 | } |
| 4862 | |
| 4863 | return nullptr; |
| 4864 | } |
| 4865 | |
| 4866 | Value * |
| 4867 | InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) { |
| 4868 | // Try to push freeze through instructions that propagate but don't produce |
| 4869 | // poison as far as possible. If an operand of freeze follows three |
| 4870 | // conditions 1) one-use, 2) does not produce poison, and 3) has all but one |
| 4871 | // guaranteed-non-poison operands then push the freeze through to the one |
| 4872 | // operand that is not guaranteed non-poison. The actual transform is as |
| 4873 | // follows. |
| 4874 | // Op1 = ... ; Op1 can be posion |
| 4875 | // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have |
| 4876 | // ; single guaranteed-non-poison operands |
| 4877 | // ... = Freeze(Op0) |
| 4878 | // => |
| 4879 | // Op1 = ... |
| 4880 | // Op1.fr = Freeze(Op1) |
| 4881 | // ... = Inst(Op1.fr, NonPoisonOps...) |
| 4882 | auto *OrigOp = OrigFI.getOperand(i_nocapture: 0); |
| 4883 | auto *OrigOpInst = dyn_cast<Instruction>(Val: OrigOp); |
| 4884 | |
| 4885 | // While we could change the other users of OrigOp to use freeze(OrigOp), that |
| 4886 | // potentially reduces their optimization potential, so let's only do this iff |
| 4887 | // the OrigOp is only used by the freeze. |
| 4888 | if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(Val: OrigOp)) |
| 4889 | return nullptr; |
| 4890 | |
| 4891 | // We can't push the freeze through an instruction which can itself create |
| 4892 | // poison. If the only source of new poison is flags, we can simply |
| 4893 | // strip them (since we know the only use is the freeze and nothing can |
| 4894 | // benefit from them.) |
| 4895 | if (canCreateUndefOrPoison(Op: cast<Operator>(Val: OrigOp), |
| 4896 | /*ConsiderFlagsAndMetadata*/ false)) |
| 4897 | return nullptr; |
| 4898 | |
| 4899 | // If operand is guaranteed not to be poison, there is no need to add freeze |
| 4900 | // to the operand. So we first find the operand that is not guaranteed to be |
| 4901 | // poison. |
| 4902 | Use *MaybePoisonOperand = nullptr; |
| 4903 | for (Use &U : OrigOpInst->operands()) { |
| 4904 | if (isa<MetadataAsValue>(Val: U.get()) || |
| 4905 | isGuaranteedNotToBeUndefOrPoison(V: U.get())) |
| 4906 | continue; |
| 4907 | if (!MaybePoisonOperand) |
| 4908 | MaybePoisonOperand = &U; |
| 4909 | else |
| 4910 | return nullptr; |
| 4911 | } |
| 4912 | |
| 4913 | OrigOpInst->dropPoisonGeneratingAnnotations(); |
| 4914 | |
| 4915 | // If all operands are guaranteed to be non-poison, we can drop freeze. |
| 4916 | if (!MaybePoisonOperand) |
| 4917 | return OrigOp; |
| 4918 | |
| 4919 | Builder.SetInsertPoint(OrigOpInst); |
| 4920 | auto *FrozenMaybePoisonOperand = Builder.CreateFreeze( |
| 4921 | V: MaybePoisonOperand->get(), Name: MaybePoisonOperand->get()->getName() + ".fr" ); |
| 4922 | |
| 4923 | replaceUse(U&: *MaybePoisonOperand, NewValue: FrozenMaybePoisonOperand); |
| 4924 | return OrigOp; |
| 4925 | } |
| 4926 | |
| 4927 | Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI, |
| 4928 | PHINode *PN) { |
| 4929 | // Detect whether this is a recurrence with a start value and some number of |
| 4930 | // backedge values. We'll check whether we can push the freeze through the |
| 4931 | // backedge values (possibly dropping poison flags along the way) until we |
| 4932 | // reach the phi again. In that case, we can move the freeze to the start |
| 4933 | // value. |
| 4934 | Use *StartU = nullptr; |
| 4935 | SmallVector<Value *> Worklist; |
| 4936 | for (Use &U : PN->incoming_values()) { |
| 4937 | if (DT.dominates(A: PN->getParent(), B: PN->getIncomingBlock(U))) { |
| 4938 | // Add backedge value to worklist. |
| 4939 | Worklist.push_back(Elt: U.get()); |
| 4940 | continue; |
| 4941 | } |
| 4942 | |
| 4943 | // Don't bother handling multiple start values. |
| 4944 | if (StartU) |
| 4945 | return nullptr; |
| 4946 | StartU = &U; |
| 4947 | } |
| 4948 | |
| 4949 | if (!StartU || Worklist.empty()) |
| 4950 | return nullptr; // Not a recurrence. |
| 4951 | |
| 4952 | Value *StartV = StartU->get(); |
| 4953 | BasicBlock *StartBB = PN->getIncomingBlock(U: *StartU); |
| 4954 | bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(V: StartV); |
| 4955 | // We can't insert freeze if the start value is the result of the |
| 4956 | // terminator (e.g. an invoke). |
| 4957 | if (StartNeedsFreeze && StartBB->getTerminator() == StartV) |
| 4958 | return nullptr; |
| 4959 | |
| 4960 | SmallPtrSet<Value *, 32> Visited; |
| 4961 | SmallVector<Instruction *> DropFlags; |
| 4962 | while (!Worklist.empty()) { |
| 4963 | Value *V = Worklist.pop_back_val(); |
| 4964 | if (!Visited.insert(Ptr: V).second) |
| 4965 | continue; |
| 4966 | |
| 4967 | if (Visited.size() > 32) |
| 4968 | return nullptr; // Limit the total number of values we inspect. |
| 4969 | |
| 4970 | // Assume that PN is non-poison, because it will be after the transform. |
| 4971 | if (V == PN || isGuaranteedNotToBeUndefOrPoison(V)) |
| 4972 | continue; |
| 4973 | |
| 4974 | Instruction *I = dyn_cast<Instruction>(Val: V); |
| 4975 | if (!I || canCreateUndefOrPoison(Op: cast<Operator>(Val: I), |
| 4976 | /*ConsiderFlagsAndMetadata*/ false)) |
| 4977 | return nullptr; |
| 4978 | |
| 4979 | DropFlags.push_back(Elt: I); |
| 4980 | append_range(C&: Worklist, R: I->operands()); |
| 4981 | } |
| 4982 | |
| 4983 | for (Instruction *I : DropFlags) |
| 4984 | I->dropPoisonGeneratingAnnotations(); |
| 4985 | |
| 4986 | if (StartNeedsFreeze) { |
| 4987 | Builder.SetInsertPoint(StartBB->getTerminator()); |
| 4988 | Value *FrozenStartV = Builder.CreateFreeze(V: StartV, |
| 4989 | Name: StartV->getName() + ".fr" ); |
| 4990 | replaceUse(U&: *StartU, NewValue: FrozenStartV); |
| 4991 | } |
| 4992 | return replaceInstUsesWith(I&: FI, V: PN); |
| 4993 | } |
| 4994 | |
| 4995 | bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) { |
| 4996 | Value *Op = FI.getOperand(i_nocapture: 0); |
| 4997 | |
| 4998 | if (isa<Constant>(Val: Op) || Op->hasOneUse()) |
| 4999 | return false; |
| 5000 | |
| 5001 | // Move the freeze directly after the definition of its operand, so that |
| 5002 | // it dominates the maximum number of uses. Note that it may not dominate |
| 5003 | // *all* uses if the operand is an invoke/callbr and the use is in a phi on |
| 5004 | // the normal/default destination. This is why the domination check in the |
| 5005 | // replacement below is still necessary. |
| 5006 | BasicBlock::iterator MoveBefore; |
| 5007 | if (isa<Argument>(Val: Op)) { |
| 5008 | MoveBefore = |
| 5009 | FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca(); |
| 5010 | } else { |
| 5011 | auto MoveBeforeOpt = cast<Instruction>(Val: Op)->getInsertionPointAfterDef(); |
| 5012 | if (!MoveBeforeOpt) |
| 5013 | return false; |
| 5014 | MoveBefore = *MoveBeforeOpt; |
| 5015 | } |
| 5016 | |
| 5017 | // Re-point iterator to come after any debug-info records. |
| 5018 | MoveBefore.setHeadBit(false); |
| 5019 | |
| 5020 | bool Changed = false; |
| 5021 | if (&FI != &*MoveBefore) { |
| 5022 | FI.moveBefore(BB&: *MoveBefore->getParent(), I: MoveBefore); |
| 5023 | Changed = true; |
| 5024 | } |
| 5025 | |
| 5026 | Op->replaceUsesWithIf(New: &FI, ShouldReplace: [&](Use &U) -> bool { |
| 5027 | bool Dominates = DT.dominates(Def: &FI, U); |
| 5028 | Changed |= Dominates; |
| 5029 | return Dominates; |
| 5030 | }); |
| 5031 | |
| 5032 | return Changed; |
| 5033 | } |
| 5034 | |
| 5035 | // Check if any direct or bitcast user of this value is a shuffle instruction. |
| 5036 | static bool isUsedWithinShuffleVector(Value *V) { |
| 5037 | for (auto *U : V->users()) { |
| 5038 | if (isa<ShuffleVectorInst>(Val: U)) |
| 5039 | return true; |
| 5040 | else if (match(V: U, P: m_BitCast(Op: m_Specific(V))) && isUsedWithinShuffleVector(V: U)) |
| 5041 | return true; |
| 5042 | } |
| 5043 | return false; |
| 5044 | } |
| 5045 | |
| 5046 | Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { |
| 5047 | Value *Op0 = I.getOperand(i_nocapture: 0); |
| 5048 | |
| 5049 | if (Value *V = simplifyFreezeInst(Op: Op0, Q: SQ.getWithInstruction(I: &I))) |
| 5050 | return replaceInstUsesWith(I, V); |
| 5051 | |
| 5052 | // freeze (phi const, x) --> phi const, (freeze x) |
| 5053 | if (auto *PN = dyn_cast<PHINode>(Val: Op0)) { |
| 5054 | if (Instruction *NV = foldOpIntoPhi(I, PN)) |
| 5055 | return NV; |
| 5056 | if (Instruction *NV = foldFreezeIntoRecurrence(FI&: I, PN)) |
| 5057 | return NV; |
| 5058 | } |
| 5059 | |
| 5060 | if (Value *NI = pushFreezeToPreventPoisonFromPropagating(OrigFI&: I)) |
| 5061 | return replaceInstUsesWith(I, V: NI); |
| 5062 | |
| 5063 | // If I is freeze(undef), check its uses and fold it to a fixed constant. |
| 5064 | // - or: pick -1 |
| 5065 | // - select's condition: if the true value is constant, choose it by making |
| 5066 | // the condition true. |
| 5067 | // - default: pick 0 |
| 5068 | // |
| 5069 | // Note that this transform is intentionally done here rather than |
| 5070 | // via an analysis in InstSimplify or at individual user sites. That is |
| 5071 | // because we must produce the same value for all uses of the freeze - |
| 5072 | // it's the reason "freeze" exists! |
| 5073 | // |
| 5074 | // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid |
| 5075 | // duplicating logic for binops at least. |
| 5076 | auto getUndefReplacement = [&](Type *Ty) { |
| 5077 | Value *BestValue = nullptr; |
| 5078 | Value *NullValue = Constant::getNullValue(Ty); |
| 5079 | for (const auto *U : I.users()) { |
| 5080 | Value *V = NullValue; |
| 5081 | if (match(V: U, P: m_Or(L: m_Value(), R: m_Value()))) |
| 5082 | V = ConstantInt::getAllOnesValue(Ty); |
| 5083 | else if (match(V: U, P: m_Select(C: m_Specific(V: &I), L: m_Constant(), R: m_Value()))) |
| 5084 | V = ConstantInt::getTrue(Ty); |
| 5085 | else if (match(V: U, P: m_c_Select(L: m_Specific(V: &I), R: m_Value(V)))) { |
| 5086 | if (!isGuaranteedNotToBeUndefOrPoison(V, AC: &AC, CtxI: &I, DT: &DT)) |
| 5087 | V = NullValue; |
| 5088 | } |
| 5089 | |
| 5090 | if (!BestValue) |
| 5091 | BestValue = V; |
| 5092 | else if (BestValue != V) |
| 5093 | BestValue = NullValue; |
| 5094 | } |
| 5095 | assert(BestValue && "Must have at least one use" ); |
| 5096 | return BestValue; |
| 5097 | }; |
| 5098 | |
| 5099 | if (match(V: Op0, P: m_Undef())) { |
| 5100 | // Don't fold freeze(undef/poison) if it's used as a vector operand in |
| 5101 | // a shuffle. This may improve codegen for shuffles that allow |
| 5102 | // unspecified inputs. |
| 5103 | if (isUsedWithinShuffleVector(V: &I)) |
| 5104 | return nullptr; |
| 5105 | return replaceInstUsesWith(I, V: getUndefReplacement(I.getType())); |
| 5106 | } |
| 5107 | |
| 5108 | auto getFreezeVectorReplacement = [](Constant *C) -> Constant * { |
| 5109 | Type *Ty = C->getType(); |
| 5110 | auto *VTy = dyn_cast<FixedVectorType>(Val: Ty); |
| 5111 | if (!VTy) |
| 5112 | return nullptr; |
| 5113 | unsigned NumElts = VTy->getNumElements(); |
| 5114 | Constant *BestValue = Constant::getNullValue(Ty: VTy->getScalarType()); |
| 5115 | for (unsigned i = 0; i != NumElts; ++i) { |
| 5116 | Constant *EltC = C->getAggregateElement(Elt: i); |
| 5117 | if (EltC && !match(V: EltC, P: m_Undef())) { |
| 5118 | BestValue = EltC; |
| 5119 | break; |
| 5120 | } |
| 5121 | } |
| 5122 | return Constant::replaceUndefsWith(C, Replacement: BestValue); |
| 5123 | }; |
| 5124 | |
| 5125 | Constant *C; |
| 5126 | if (match(V: Op0, P: m_Constant(C)) && C->containsUndefOrPoisonElement() && |
| 5127 | !C->containsConstantExpression()) { |
| 5128 | if (Constant *Repl = getFreezeVectorReplacement(C)) |
| 5129 | return replaceInstUsesWith(I, V: Repl); |
| 5130 | } |
| 5131 | |
| 5132 | // Replace uses of Op with freeze(Op). |
| 5133 | if (freezeOtherUses(FI&: I)) |
| 5134 | return &I; |
| 5135 | |
| 5136 | return nullptr; |
| 5137 | } |
| 5138 | |
| 5139 | /// Check for case where the call writes to an otherwise dead alloca. This |
| 5140 | /// shows up for unused out-params in idiomatic C/C++ code. Note that this |
| 5141 | /// helper *only* analyzes the write; doesn't check any other legality aspect. |
| 5142 | static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) { |
| 5143 | auto *CB = dyn_cast<CallBase>(Val: I); |
| 5144 | if (!CB) |
| 5145 | // TODO: handle e.g. store to alloca here - only worth doing if we extend |
| 5146 | // to allow reload along used path as described below. Otherwise, this |
| 5147 | // is simply a store to a dead allocation which will be removed. |
| 5148 | return false; |
| 5149 | std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: CB, TLI); |
| 5150 | if (!Dest) |
| 5151 | return false; |
| 5152 | auto *AI = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: Dest->Ptr)); |
| 5153 | if (!AI) |
| 5154 | // TODO: allow malloc? |
| 5155 | return false; |
| 5156 | // TODO: allow memory access dominated by move point? Note that since AI |
| 5157 | // could have a reference to itself captured by the call, we would need to |
| 5158 | // account for cycles in doing so. |
| 5159 | SmallVector<const User *> AllocaUsers; |
| 5160 | SmallPtrSet<const User *, 4> Visited; |
| 5161 | auto pushUsers = [&](const Instruction &I) { |
| 5162 | for (const User *U : I.users()) { |
| 5163 | if (Visited.insert(Ptr: U).second) |
| 5164 | AllocaUsers.push_back(Elt: U); |
| 5165 | } |
| 5166 | }; |
| 5167 | pushUsers(*AI); |
| 5168 | while (!AllocaUsers.empty()) { |
| 5169 | auto *UserI = cast<Instruction>(Val: AllocaUsers.pop_back_val()); |
| 5170 | if (isa<GetElementPtrInst>(Val: UserI) || isa<AddrSpaceCastInst>(Val: UserI)) { |
| 5171 | pushUsers(*UserI); |
| 5172 | continue; |
| 5173 | } |
| 5174 | if (UserI == CB) |
| 5175 | continue; |
| 5176 | // TODO: support lifetime.start/end here |
| 5177 | return false; |
| 5178 | } |
| 5179 | return true; |
| 5180 | } |
| 5181 | |
| 5182 | /// Try to move the specified instruction from its current block into the |
| 5183 | /// beginning of DestBlock, which can only happen if it's safe to move the |
| 5184 | /// instruction past all of the instructions between it and the end of its |
| 5185 | /// block. |
| 5186 | bool InstCombinerImpl::tryToSinkInstruction(Instruction *I, |
| 5187 | BasicBlock *DestBlock) { |
| 5188 | BasicBlock *SrcBlock = I->getParent(); |
| 5189 | |
| 5190 | // Cannot move control-flow-involving, volatile loads, vaarg, etc. |
| 5191 | if (isa<PHINode>(Val: I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || |
| 5192 | I->isTerminator()) |
| 5193 | return false; |
| 5194 | |
| 5195 | // Do not sink static or dynamic alloca instructions. Static allocas must |
| 5196 | // remain in the entry block, and dynamic allocas must not be sunk in between |
| 5197 | // a stacksave / stackrestore pair, which would incorrectly shorten its |
| 5198 | // lifetime. |
| 5199 | if (isa<AllocaInst>(Val: I)) |
| 5200 | return false; |
| 5201 | |
| 5202 | // Do not sink into catchswitch blocks. |
| 5203 | if (isa<CatchSwitchInst>(Val: DestBlock->getTerminator())) |
| 5204 | return false; |
| 5205 | |
| 5206 | // Do not sink convergent call instructions. |
| 5207 | if (auto *CI = dyn_cast<CallInst>(Val: I)) { |
| 5208 | if (CI->isConvergent()) |
| 5209 | return false; |
| 5210 | } |
| 5211 | |
| 5212 | // Unless we can prove that the memory write isn't visibile except on the |
| 5213 | // path we're sinking to, we must bail. |
| 5214 | if (I->mayWriteToMemory()) { |
| 5215 | if (!SoleWriteToDeadLocal(I, TLI)) |
| 5216 | return false; |
| 5217 | } |
| 5218 | |
| 5219 | // We can only sink load instructions if there is nothing between the load and |
| 5220 | // the end of block that could change the value. |
| 5221 | if (I->mayReadFromMemory() && |
| 5222 | !I->hasMetadata(KindID: LLVMContext::MD_invariant_load)) { |
| 5223 | // We don't want to do any sophisticated alias analysis, so we only check |
| 5224 | // the instructions after I in I's parent block if we try to sink to its |
| 5225 | // successor block. |
| 5226 | if (DestBlock->getUniquePredecessor() != I->getParent()) |
| 5227 | return false; |
| 5228 | for (BasicBlock::iterator Scan = std::next(x: I->getIterator()), |
| 5229 | E = I->getParent()->end(); |
| 5230 | Scan != E; ++Scan) |
| 5231 | if (Scan->mayWriteToMemory()) |
| 5232 | return false; |
| 5233 | } |
| 5234 | |
| 5235 | I->dropDroppableUses(ShouldDrop: [&](const Use *U) { |
| 5236 | auto *I = dyn_cast<Instruction>(Val: U->getUser()); |
| 5237 | if (I && I->getParent() != DestBlock) { |
| 5238 | Worklist.add(I); |
| 5239 | return true; |
| 5240 | } |
| 5241 | return false; |
| 5242 | }); |
| 5243 | /// FIXME: We could remove droppable uses that are not dominated by |
| 5244 | /// the new position. |
| 5245 | |
| 5246 | BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); |
| 5247 | I->moveBefore(BB&: *DestBlock, I: InsertPos); |
| 5248 | ++NumSunkInst; |
| 5249 | |
| 5250 | // Also sink all related debug uses from the source basic block. Otherwise we |
| 5251 | // get debug use before the def. Attempt to salvage debug uses first, to |
| 5252 | // maximise the range variables have location for. If we cannot salvage, then |
| 5253 | // mark the location undef: we know it was supposed to receive a new location |
| 5254 | // here, but that computation has been sunk. |
| 5255 | SmallVector<DbgVariableIntrinsic *, 2> DbgUsers; |
| 5256 | SmallVector<DbgVariableRecord *, 2> DbgVariableRecords; |
| 5257 | findDbgUsers(DbgInsts&: DbgUsers, V: I, DbgVariableRecords: &DbgVariableRecords); |
| 5258 | if (!DbgUsers.empty()) |
| 5259 | tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers); |
| 5260 | if (!DbgVariableRecords.empty()) |
| 5261 | tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock, |
| 5262 | DPUsers&: DbgVariableRecords); |
| 5263 | |
| 5264 | // PS: there are numerous flaws with this behaviour, not least that right now |
| 5265 | // assignments can be re-ordered past other assignments to the same variable |
| 5266 | // if they use different Values. Creating more undef assignements can never be |
| 5267 | // undone. And salvaging all users outside of this block can un-necessarily |
| 5268 | // alter the lifetime of the live-value that the variable refers to. |
| 5269 | // Some of these things can be resolved by tolerating debug use-before-defs in |
| 5270 | // LLVM-IR, however it depends on the instruction-referencing CodeGen backend |
| 5271 | // being used for more architectures. |
| 5272 | |
| 5273 | return true; |
| 5274 | } |
| 5275 | |
| 5276 | void InstCombinerImpl::tryToSinkInstructionDbgValues( |
| 5277 | Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, |
| 5278 | BasicBlock *DestBlock, SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers) { |
| 5279 | // For all debug values in the destination block, the sunk instruction |
| 5280 | // will still be available, so they do not need to be dropped. |
| 5281 | SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSalvage; |
| 5282 | for (auto &DbgUser : DbgUsers) |
| 5283 | if (DbgUser->getParent() != DestBlock) |
| 5284 | DbgUsersToSalvage.push_back(Elt: DbgUser); |
| 5285 | |
| 5286 | // Process the sinking DbgUsersToSalvage in reverse order, as we only want |
| 5287 | // to clone the last appearing debug intrinsic for each given variable. |
| 5288 | SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSink; |
| 5289 | for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage) |
| 5290 | if (DVI->getParent() == SrcBlock) |
| 5291 | DbgUsersToSink.push_back(Elt: DVI); |
| 5292 | llvm::sort(C&: DbgUsersToSink, |
| 5293 | Comp: [](auto *A, auto *B) { return B->comesBefore(A); }); |
| 5294 | |
| 5295 | SmallVector<DbgVariableIntrinsic *, 2> DIIClones; |
| 5296 | SmallSet<DebugVariable, 4> SunkVariables; |
| 5297 | for (auto *User : DbgUsersToSink) { |
| 5298 | // A dbg.declare instruction should not be cloned, since there can only be |
| 5299 | // one per variable fragment. It should be left in the original place |
| 5300 | // because the sunk instruction is not an alloca (otherwise we could not be |
| 5301 | // here). |
| 5302 | if (isa<DbgDeclareInst>(Val: User)) |
| 5303 | continue; |
| 5304 | |
| 5305 | DebugVariable DbgUserVariable = |
| 5306 | DebugVariable(User->getVariable(), User->getExpression(), |
| 5307 | User->getDebugLoc()->getInlinedAt()); |
| 5308 | |
| 5309 | if (!SunkVariables.insert(V: DbgUserVariable).second) |
| 5310 | continue; |
| 5311 | |
| 5312 | // Leave dbg.assign intrinsics in their original positions and there should |
| 5313 | // be no need to insert a clone. |
| 5314 | if (isa<DbgAssignIntrinsic>(Val: User)) |
| 5315 | continue; |
| 5316 | |
| 5317 | DIIClones.emplace_back(Args: cast<DbgVariableIntrinsic>(Val: User->clone())); |
| 5318 | if (isa<DbgDeclareInst>(Val: User) && isa<CastInst>(Val: I)) |
| 5319 | DIIClones.back()->replaceVariableLocationOp(OldValue: I, NewValue: I->getOperand(i: 0)); |
| 5320 | LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n'); |
| 5321 | } |
| 5322 | |
| 5323 | // Perform salvaging without the clones, then sink the clones. |
| 5324 | if (!DIIClones.empty()) { |
| 5325 | salvageDebugInfoForDbgValues(I&: *I, Insns: DbgUsersToSalvage, DPInsns: {}); |
| 5326 | // The clones are in reverse order of original appearance, reverse again to |
| 5327 | // maintain the original order. |
| 5328 | for (auto &DIIClone : llvm::reverse(C&: DIIClones)) { |
| 5329 | DIIClone->insertBefore(InsertPos); |
| 5330 | LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n'); |
| 5331 | } |
| 5332 | } |
| 5333 | } |
| 5334 | |
| 5335 | void InstCombinerImpl::tryToSinkInstructionDbgVariableRecords( |
| 5336 | Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, |
| 5337 | BasicBlock *DestBlock, |
| 5338 | SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) { |
| 5339 | // Implementation of tryToSinkInstructionDbgValues, but for the |
| 5340 | // DbgVariableRecord of variable assignments rather than dbg.values. |
| 5341 | |
| 5342 | // Fetch all DbgVariableRecords not already in the destination. |
| 5343 | SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage; |
| 5344 | for (auto &DVR : DbgVariableRecords) |
| 5345 | if (DVR->getParent() != DestBlock) |
| 5346 | DbgVariableRecordsToSalvage.push_back(Elt: DVR); |
| 5347 | |
| 5348 | // Fetch a second collection, of DbgVariableRecords in the source block that |
| 5349 | // we're going to sink. |
| 5350 | SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink; |
| 5351 | for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage) |
| 5352 | if (DVR->getParent() == SrcBlock) |
| 5353 | DbgVariableRecordsToSink.push_back(Elt: DVR); |
| 5354 | |
| 5355 | // Sort DbgVariableRecords according to their position in the block. This is a |
| 5356 | // partial order: DbgVariableRecords attached to different instructions will |
| 5357 | // be ordered by the instruction order, but DbgVariableRecords attached to the |
| 5358 | // same instruction won't have an order. |
| 5359 | auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool { |
| 5360 | return B->getInstruction()->comesBefore(Other: A->getInstruction()); |
| 5361 | }; |
| 5362 | llvm::stable_sort(Range&: DbgVariableRecordsToSink, C: Order); |
| 5363 | |
| 5364 | // If there are two assignments to the same variable attached to the same |
| 5365 | // instruction, the ordering between the two assignments is important. Scan |
| 5366 | // for this (rare) case and establish which is the last assignment. |
| 5367 | using InstVarPair = std::pair<const Instruction *, DebugVariable>; |
| 5368 | SmallDenseMap<InstVarPair, DbgVariableRecord *> FilterOutMap; |
| 5369 | if (DbgVariableRecordsToSink.size() > 1) { |
| 5370 | SmallDenseMap<InstVarPair, unsigned> CountMap; |
| 5371 | // Count how many assignments to each variable there is per instruction. |
| 5372 | for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) { |
| 5373 | DebugVariable DbgUserVariable = |
| 5374 | DebugVariable(DVR->getVariable(), DVR->getExpression(), |
| 5375 | DVR->getDebugLoc()->getInlinedAt()); |
| 5376 | CountMap[std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable)] += 1; |
| 5377 | } |
| 5378 | |
| 5379 | // If there are any instructions with two assignments, add them to the |
| 5380 | // FilterOutMap to record that they need extra filtering. |
| 5381 | SmallPtrSet<const Instruction *, 4> DupSet; |
| 5382 | for (auto It : CountMap) { |
| 5383 | if (It.second > 1) { |
| 5384 | FilterOutMap[It.first] = nullptr; |
| 5385 | DupSet.insert(Ptr: It.first.first); |
| 5386 | } |
| 5387 | } |
| 5388 | |
| 5389 | // For all instruction/variable pairs needing extra filtering, find the |
| 5390 | // latest assignment. |
| 5391 | for (const Instruction *Inst : DupSet) { |
| 5392 | for (DbgVariableRecord &DVR : |
| 5393 | llvm::reverse(C: filterDbgVars(R: Inst->getDbgRecordRange()))) { |
| 5394 | DebugVariable DbgUserVariable = |
| 5395 | DebugVariable(DVR.getVariable(), DVR.getExpression(), |
| 5396 | DVR.getDebugLoc()->getInlinedAt()); |
| 5397 | auto FilterIt = |
| 5398 | FilterOutMap.find(Val: std::make_pair(x&: Inst, y&: DbgUserVariable)); |
| 5399 | if (FilterIt == FilterOutMap.end()) |
| 5400 | continue; |
| 5401 | if (FilterIt->second != nullptr) |
| 5402 | continue; |
| 5403 | FilterIt->second = &DVR; |
| 5404 | } |
| 5405 | } |
| 5406 | } |
| 5407 | |
| 5408 | // Perform cloning of the DbgVariableRecords that we plan on sinking, filter |
| 5409 | // out any duplicate assignments identified above. |
| 5410 | SmallVector<DbgVariableRecord *, 2> DVRClones; |
| 5411 | SmallSet<DebugVariable, 4> SunkVariables; |
| 5412 | for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) { |
| 5413 | if (DVR->Type == DbgVariableRecord::LocationType::Declare) |
| 5414 | continue; |
| 5415 | |
| 5416 | DebugVariable DbgUserVariable = |
| 5417 | DebugVariable(DVR->getVariable(), DVR->getExpression(), |
| 5418 | DVR->getDebugLoc()->getInlinedAt()); |
| 5419 | |
| 5420 | // For any variable where there were multiple assignments in the same place, |
| 5421 | // ignore all but the last assignment. |
| 5422 | if (!FilterOutMap.empty()) { |
| 5423 | InstVarPair IVP = std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable); |
| 5424 | auto It = FilterOutMap.find(Val: IVP); |
| 5425 | |
| 5426 | // Filter out. |
| 5427 | if (It != FilterOutMap.end() && It->second != DVR) |
| 5428 | continue; |
| 5429 | } |
| 5430 | |
| 5431 | if (!SunkVariables.insert(V: DbgUserVariable).second) |
| 5432 | continue; |
| 5433 | |
| 5434 | if (DVR->isDbgAssign()) |
| 5435 | continue; |
| 5436 | |
| 5437 | DVRClones.emplace_back(Args: DVR->clone()); |
| 5438 | LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n'); |
| 5439 | } |
| 5440 | |
| 5441 | // Perform salvaging without the clones, then sink the clones. |
| 5442 | if (DVRClones.empty()) |
| 5443 | return; |
| 5444 | |
| 5445 | salvageDebugInfoForDbgValues(I&: *I, Insns: {}, DPInsns: DbgVariableRecordsToSalvage); |
| 5446 | |
| 5447 | // The clones are in reverse order of original appearance. Assert that the |
| 5448 | // head bit is set on the iterator as we _should_ have received it via |
| 5449 | // getFirstInsertionPt. Inserting like this will reverse the clone order as |
| 5450 | // we'll repeatedly insert at the head, such as: |
| 5451 | // DVR-3 (third insertion goes here) |
| 5452 | // DVR-2 (second insertion goes here) |
| 5453 | // DVR-1 (first insertion goes here) |
| 5454 | // Any-Prior-DVRs |
| 5455 | // InsertPtInst |
| 5456 | assert(InsertPos.getHeadBit()); |
| 5457 | for (DbgVariableRecord *DVRClone : DVRClones) { |
| 5458 | InsertPos->getParent()->insertDbgRecordBefore(DR: DVRClone, Here: InsertPos); |
| 5459 | LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n'); |
| 5460 | } |
| 5461 | } |
| 5462 | |
| 5463 | bool InstCombinerImpl::run() { |
| 5464 | while (!Worklist.isEmpty()) { |
| 5465 | // Walk deferred instructions in reverse order, and push them to the |
| 5466 | // worklist, which means they'll end up popped from the worklist in-order. |
| 5467 | while (Instruction *I = Worklist.popDeferred()) { |
| 5468 | // Check to see if we can DCE the instruction. We do this already here to |
| 5469 | // reduce the number of uses and thus allow other folds to trigger. |
| 5470 | // Note that eraseInstFromFunction() may push additional instructions on |
| 5471 | // the deferred worklist, so this will DCE whole instruction chains. |
| 5472 | if (isInstructionTriviallyDead(I, TLI: &TLI)) { |
| 5473 | eraseInstFromFunction(I&: *I); |
| 5474 | ++NumDeadInst; |
| 5475 | continue; |
| 5476 | } |
| 5477 | |
| 5478 | Worklist.push(I); |
| 5479 | } |
| 5480 | |
| 5481 | Instruction *I = Worklist.removeOne(); |
| 5482 | if (I == nullptr) continue; // skip null values. |
| 5483 | |
| 5484 | // Check to see if we can DCE the instruction. |
| 5485 | if (isInstructionTriviallyDead(I, TLI: &TLI)) { |
| 5486 | eraseInstFromFunction(I&: *I); |
| 5487 | ++NumDeadInst; |
| 5488 | continue; |
| 5489 | } |
| 5490 | |
| 5491 | if (!DebugCounter::shouldExecute(CounterName: VisitCounter)) |
| 5492 | continue; |
| 5493 | |
| 5494 | // See if we can trivially sink this instruction to its user if we can |
| 5495 | // prove that the successor is not executed more frequently than our block. |
| 5496 | // Return the UserBlock if successful. |
| 5497 | auto getOptionalSinkBlockForInst = |
| 5498 | [this](Instruction *I) -> std::optional<BasicBlock *> { |
| 5499 | if (!EnableCodeSinking) |
| 5500 | return std::nullopt; |
| 5501 | |
| 5502 | BasicBlock *BB = I->getParent(); |
| 5503 | BasicBlock *UserParent = nullptr; |
| 5504 | unsigned NumUsers = 0; |
| 5505 | |
| 5506 | for (Use &U : I->uses()) { |
| 5507 | User *User = U.getUser(); |
| 5508 | if (User->isDroppable()) |
| 5509 | continue; |
| 5510 | if (NumUsers > MaxSinkNumUsers) |
| 5511 | return std::nullopt; |
| 5512 | |
| 5513 | Instruction *UserInst = cast<Instruction>(Val: User); |
| 5514 | // Special handling for Phi nodes - get the block the use occurs in. |
| 5515 | BasicBlock *UserBB = UserInst->getParent(); |
| 5516 | if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst)) |
| 5517 | UserBB = PN->getIncomingBlock(U); |
| 5518 | // Bail out if we have uses in different blocks. We don't do any |
| 5519 | // sophisticated analysis (i.e finding NearestCommonDominator of these |
| 5520 | // use blocks). |
| 5521 | if (UserParent && UserParent != UserBB) |
| 5522 | return std::nullopt; |
| 5523 | UserParent = UserBB; |
| 5524 | |
| 5525 | // Make sure these checks are done only once, naturally we do the checks |
| 5526 | // the first time we get the userparent, this will save compile time. |
| 5527 | if (NumUsers == 0) { |
| 5528 | // Try sinking to another block. If that block is unreachable, then do |
| 5529 | // not bother. SimplifyCFG should handle it. |
| 5530 | if (UserParent == BB || !DT.isReachableFromEntry(A: UserParent)) |
| 5531 | return std::nullopt; |
| 5532 | |
| 5533 | auto *Term = UserParent->getTerminator(); |
| 5534 | // See if the user is one of our successors that has only one |
| 5535 | // predecessor, so that we don't have to split the critical edge. |
| 5536 | // Another option where we can sink is a block that ends with a |
| 5537 | // terminator that does not pass control to other block (such as |
| 5538 | // return or unreachable or resume). In this case: |
| 5539 | // - I dominates the User (by SSA form); |
| 5540 | // - the User will be executed at most once. |
| 5541 | // So sinking I down to User is always profitable or neutral. |
| 5542 | if (UserParent->getUniquePredecessor() != BB && !succ_empty(I: Term)) |
| 5543 | return std::nullopt; |
| 5544 | |
| 5545 | assert(DT.dominates(BB, UserParent) && "Dominance relation broken?" ); |
| 5546 | } |
| 5547 | |
| 5548 | NumUsers++; |
| 5549 | } |
| 5550 | |
| 5551 | // No user or only has droppable users. |
| 5552 | if (!UserParent) |
| 5553 | return std::nullopt; |
| 5554 | |
| 5555 | return UserParent; |
| 5556 | }; |
| 5557 | |
| 5558 | auto OptBB = getOptionalSinkBlockForInst(I); |
| 5559 | if (OptBB) { |
| 5560 | auto *UserParent = *OptBB; |
| 5561 | // Okay, the CFG is simple enough, try to sink this instruction. |
| 5562 | if (tryToSinkInstruction(I, DestBlock: UserParent)) { |
| 5563 | LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); |
| 5564 | MadeIRChange = true; |
| 5565 | // We'll add uses of the sunk instruction below, but since |
| 5566 | // sinking can expose opportunities for it's *operands* add |
| 5567 | // them to the worklist |
| 5568 | for (Use &U : I->operands()) |
| 5569 | if (Instruction *OpI = dyn_cast<Instruction>(Val: U.get())) |
| 5570 | Worklist.push(I: OpI); |
| 5571 | } |
| 5572 | } |
| 5573 | |
| 5574 | // Now that we have an instruction, try combining it to simplify it. |
| 5575 | Builder.SetInsertPoint(I); |
| 5576 | Builder.CollectMetadataToCopy( |
| 5577 | Src: I, MetadataKinds: {LLVMContext::MD_dbg, LLVMContext::MD_annotation}); |
| 5578 | |
| 5579 | #ifndef NDEBUG |
| 5580 | std::string OrigI; |
| 5581 | #endif |
| 5582 | LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS);); |
| 5583 | LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n'); |
| 5584 | |
| 5585 | if (Instruction *Result = visit(I&: *I)) { |
| 5586 | ++NumCombined; |
| 5587 | // Should we replace the old instruction with a new one? |
| 5588 | if (Result != I) { |
| 5589 | LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n' |
| 5590 | << " New = " << *Result << '\n'); |
| 5591 | |
| 5592 | // We copy the old instruction's DebugLoc to the new instruction, unless |
| 5593 | // InstCombine already assigned a DebugLoc to it, in which case we |
| 5594 | // should trust the more specifically selected DebugLoc. |
| 5595 | Result->setDebugLoc(Result->getDebugLoc().orElse(Other: I->getDebugLoc())); |
| 5596 | // We also copy annotation metadata to the new instruction. |
| 5597 | Result->copyMetadata(SrcInst: *I, WL: LLVMContext::MD_annotation); |
| 5598 | // Everything uses the new instruction now. |
| 5599 | I->replaceAllUsesWith(V: Result); |
| 5600 | |
| 5601 | // Move the name to the new instruction first. |
| 5602 | Result->takeName(V: I); |
| 5603 | |
| 5604 | // Insert the new instruction into the basic block... |
| 5605 | BasicBlock *InstParent = I->getParent(); |
| 5606 | BasicBlock::iterator InsertPos = I->getIterator(); |
| 5607 | |
| 5608 | // Are we replace a PHI with something that isn't a PHI, or vice versa? |
| 5609 | if (isa<PHINode>(Val: Result) != isa<PHINode>(Val: I)) { |
| 5610 | // We need to fix up the insertion point. |
| 5611 | if (isa<PHINode>(Val: I)) // PHI -> Non-PHI |
| 5612 | InsertPos = InstParent->getFirstInsertionPt(); |
| 5613 | else // Non-PHI -> PHI |
| 5614 | InsertPos = InstParent->getFirstNonPHIIt(); |
| 5615 | } |
| 5616 | |
| 5617 | Result->insertInto(ParentBB: InstParent, It: InsertPos); |
| 5618 | |
| 5619 | // Push the new instruction and any users onto the worklist. |
| 5620 | Worklist.pushUsersToWorkList(I&: *Result); |
| 5621 | Worklist.push(I: Result); |
| 5622 | |
| 5623 | eraseInstFromFunction(I&: *I); |
| 5624 | } else { |
| 5625 | LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n' |
| 5626 | << " New = " << *I << '\n'); |
| 5627 | |
| 5628 | // If the instruction was modified, it's possible that it is now dead. |
| 5629 | // if so, remove it. |
| 5630 | if (isInstructionTriviallyDead(I, TLI: &TLI)) { |
| 5631 | eraseInstFromFunction(I&: *I); |
| 5632 | } else { |
| 5633 | Worklist.pushUsersToWorkList(I&: *I); |
| 5634 | Worklist.push(I); |
| 5635 | } |
| 5636 | } |
| 5637 | MadeIRChange = true; |
| 5638 | } |
| 5639 | } |
| 5640 | |
| 5641 | Worklist.zap(); |
| 5642 | return MadeIRChange; |
| 5643 | } |
| 5644 | |
| 5645 | // Track the scopes used by !alias.scope and !noalias. In a function, a |
| 5646 | // @llvm.experimental.noalias.scope.decl is only useful if that scope is used |
| 5647 | // by both sets. If not, the declaration of the scope can be safely omitted. |
| 5648 | // The MDNode of the scope can be omitted as well for the instructions that are |
| 5649 | // part of this function. We do not do that at this point, as this might become |
| 5650 | // too time consuming to do. |
| 5651 | class AliasScopeTracker { |
| 5652 | SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists; |
| 5653 | SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists; |
| 5654 | |
| 5655 | public: |
| 5656 | void analyse(Instruction *I) { |
| 5657 | // This seems to be faster than checking 'mayReadOrWriteMemory()'. |
| 5658 | if (!I->hasMetadataOtherThanDebugLoc()) |
| 5659 | return; |
| 5660 | |
| 5661 | auto Track = [](Metadata *ScopeList, auto &Container) { |
| 5662 | const auto *MDScopeList = dyn_cast_or_null<MDNode>(Val: ScopeList); |
| 5663 | if (!MDScopeList || !Container.insert(MDScopeList).second) |
| 5664 | return; |
| 5665 | for (const auto &MDOperand : MDScopeList->operands()) |
| 5666 | if (auto *MDScope = dyn_cast<MDNode>(Val: MDOperand)) |
| 5667 | Container.insert(MDScope); |
| 5668 | }; |
| 5669 | |
| 5670 | Track(I->getMetadata(KindID: LLVMContext::MD_alias_scope), UsedAliasScopesAndLists); |
| 5671 | Track(I->getMetadata(KindID: LLVMContext::MD_noalias), UsedNoAliasScopesAndLists); |
| 5672 | } |
| 5673 | |
| 5674 | bool isNoAliasScopeDeclDead(Instruction *Inst) { |
| 5675 | NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: Inst); |
| 5676 | if (!Decl) |
| 5677 | return false; |
| 5678 | |
| 5679 | assert(Decl->use_empty() && |
| 5680 | "llvm.experimental.noalias.scope.decl in use ?" ); |
| 5681 | const MDNode *MDSL = Decl->getScopeList(); |
| 5682 | assert(MDSL->getNumOperands() == 1 && |
| 5683 | "llvm.experimental.noalias.scope should refer to a single scope" ); |
| 5684 | auto &MDOperand = MDSL->getOperand(I: 0); |
| 5685 | if (auto *MD = dyn_cast<MDNode>(Val: MDOperand)) |
| 5686 | return !UsedAliasScopesAndLists.contains(Ptr: MD) || |
| 5687 | !UsedNoAliasScopesAndLists.contains(Ptr: MD); |
| 5688 | |
| 5689 | // Not an MDNode ? throw away. |
| 5690 | return true; |
| 5691 | } |
| 5692 | }; |
| 5693 | |
| 5694 | /// Populate the IC worklist from a function, by walking it in reverse |
| 5695 | /// post-order and adding all reachable code to the worklist. |
| 5696 | /// |
| 5697 | /// This has a couple of tricks to make the code faster and more powerful. In |
| 5698 | /// particular, we constant fold and DCE instructions as we go, to avoid adding |
| 5699 | /// them to the worklist (this significantly speeds up instcombine on code where |
| 5700 | /// many instructions are dead or constant). Additionally, if we find a branch |
| 5701 | /// whose condition is a known constant, we only visit the reachable successors. |
| 5702 | bool InstCombinerImpl::prepareWorklist(Function &F) { |
| 5703 | bool MadeIRChange = false; |
| 5704 | SmallPtrSet<BasicBlock *, 32> LiveBlocks; |
| 5705 | SmallVector<Instruction *, 128> InstrsForInstructionWorklist; |
| 5706 | DenseMap<Constant *, Constant *> FoldedConstants; |
| 5707 | AliasScopeTracker SeenAliasScopes; |
| 5708 | |
| 5709 | auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) { |
| 5710 | for (BasicBlock *Succ : successors(BB)) |
| 5711 | if (Succ != LiveSucc && DeadEdges.insert(V: {BB, Succ}).second) |
| 5712 | for (PHINode &PN : Succ->phis()) |
| 5713 | for (Use &U : PN.incoming_values()) |
| 5714 | if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(Val: U)) { |
| 5715 | U.set(PoisonValue::get(T: PN.getType())); |
| 5716 | MadeIRChange = true; |
| 5717 | } |
| 5718 | }; |
| 5719 | |
| 5720 | for (BasicBlock *BB : RPOT) { |
| 5721 | if (!BB->isEntryBlock() && all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) { |
| 5722 | return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred); |
| 5723 | })) { |
| 5724 | HandleOnlyLiveSuccessor(BB, nullptr); |
| 5725 | continue; |
| 5726 | } |
| 5727 | LiveBlocks.insert(Ptr: BB); |
| 5728 | |
| 5729 | for (Instruction &Inst : llvm::make_early_inc_range(Range&: *BB)) { |
| 5730 | // ConstantProp instruction if trivially constant. |
| 5731 | if (!Inst.use_empty() && |
| 5732 | (Inst.getNumOperands() == 0 || isa<Constant>(Val: Inst.getOperand(i: 0)))) |
| 5733 | if (Constant *C = ConstantFoldInstruction(I: &Inst, DL, TLI: &TLI)) { |
| 5734 | LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst |
| 5735 | << '\n'); |
| 5736 | Inst.replaceAllUsesWith(V: C); |
| 5737 | ++NumConstProp; |
| 5738 | if (isInstructionTriviallyDead(I: &Inst, TLI: &TLI)) |
| 5739 | Inst.eraseFromParent(); |
| 5740 | MadeIRChange = true; |
| 5741 | continue; |
| 5742 | } |
| 5743 | |
| 5744 | // See if we can constant fold its operands. |
| 5745 | for (Use &U : Inst.operands()) { |
| 5746 | if (!isa<ConstantVector>(Val: U) && !isa<ConstantExpr>(Val: U)) |
| 5747 | continue; |
| 5748 | |
| 5749 | auto *C = cast<Constant>(Val&: U); |
| 5750 | Constant *&FoldRes = FoldedConstants[C]; |
| 5751 | if (!FoldRes) |
| 5752 | FoldRes = ConstantFoldConstant(C, DL, TLI: &TLI); |
| 5753 | |
| 5754 | if (FoldRes != C) { |
| 5755 | LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst |
| 5756 | << "\n Old = " << *C |
| 5757 | << "\n New = " << *FoldRes << '\n'); |
| 5758 | U = FoldRes; |
| 5759 | MadeIRChange = true; |
| 5760 | } |
| 5761 | } |
| 5762 | |
| 5763 | // Skip processing debug and pseudo intrinsics in InstCombine. Processing |
| 5764 | // these call instructions consumes non-trivial amount of time and |
| 5765 | // provides no value for the optimization. |
| 5766 | if (!Inst.isDebugOrPseudoInst()) { |
| 5767 | InstrsForInstructionWorklist.push_back(Elt: &Inst); |
| 5768 | SeenAliasScopes.analyse(I: &Inst); |
| 5769 | } |
| 5770 | } |
| 5771 | |
| 5772 | // If this is a branch or switch on a constant, mark only the single |
| 5773 | // live successor. Otherwise assume all successors are live. |
| 5774 | Instruction *TI = BB->getTerminator(); |
| 5775 | if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI); BI && BI->isConditional()) { |
| 5776 | if (isa<UndefValue>(Val: BI->getCondition())) { |
| 5777 | // Branch on undef is UB. |
| 5778 | HandleOnlyLiveSuccessor(BB, nullptr); |
| 5779 | continue; |
| 5780 | } |
| 5781 | if (auto *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition())) { |
| 5782 | bool CondVal = Cond->getZExtValue(); |
| 5783 | HandleOnlyLiveSuccessor(BB, BI->getSuccessor(i: !CondVal)); |
| 5784 | continue; |
| 5785 | } |
| 5786 | } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) { |
| 5787 | if (isa<UndefValue>(Val: SI->getCondition())) { |
| 5788 | // Switch on undef is UB. |
| 5789 | HandleOnlyLiveSuccessor(BB, nullptr); |
| 5790 | continue; |
| 5791 | } |
| 5792 | if (auto *Cond = dyn_cast<ConstantInt>(Val: SI->getCondition())) { |
| 5793 | HandleOnlyLiveSuccessor(BB, |
| 5794 | SI->findCaseValue(C: Cond)->getCaseSuccessor()); |
| 5795 | continue; |
| 5796 | } |
| 5797 | } |
| 5798 | } |
| 5799 | |
| 5800 | // Remove instructions inside unreachable blocks. This prevents the |
| 5801 | // instcombine code from having to deal with some bad special cases, and |
| 5802 | // reduces use counts of instructions. |
| 5803 | for (BasicBlock &BB : F) { |
| 5804 | if (LiveBlocks.count(Ptr: &BB)) |
| 5805 | continue; |
| 5806 | |
| 5807 | unsigned NumDeadInstInBB; |
| 5808 | NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(BB: &BB); |
| 5809 | |
| 5810 | MadeIRChange |= NumDeadInstInBB != 0; |
| 5811 | NumDeadInst += NumDeadInstInBB; |
| 5812 | } |
| 5813 | |
| 5814 | // Once we've found all of the instructions to add to instcombine's worklist, |
| 5815 | // add them in reverse order. This way instcombine will visit from the top |
| 5816 | // of the function down. This jives well with the way that it adds all uses |
| 5817 | // of instructions to the worklist after doing a transformation, thus avoiding |
| 5818 | // some N^2 behavior in pathological cases. |
| 5819 | Worklist.reserve(Size: InstrsForInstructionWorklist.size()); |
| 5820 | for (Instruction *Inst : reverse(C&: InstrsForInstructionWorklist)) { |
| 5821 | // DCE instruction if trivially dead. As we iterate in reverse program |
| 5822 | // order here, we will clean up whole chains of dead instructions. |
| 5823 | if (isInstructionTriviallyDead(I: Inst, TLI: &TLI) || |
| 5824 | SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) { |
| 5825 | ++NumDeadInst; |
| 5826 | LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); |
| 5827 | salvageDebugInfo(I&: *Inst); |
| 5828 | Inst->eraseFromParent(); |
| 5829 | MadeIRChange = true; |
| 5830 | continue; |
| 5831 | } |
| 5832 | |
| 5833 | Worklist.push(I: Inst); |
| 5834 | } |
| 5835 | |
| 5836 | return MadeIRChange; |
| 5837 | } |
| 5838 | |
| 5839 | void InstCombiner::computeBackEdges() { |
| 5840 | // Collect backedges. |
| 5841 | SmallPtrSet<BasicBlock *, 16> Visited; |
| 5842 | for (BasicBlock *BB : RPOT) { |
| 5843 | Visited.insert(Ptr: BB); |
| 5844 | for (BasicBlock *Succ : successors(BB)) |
| 5845 | if (Visited.contains(Ptr: Succ)) |
| 5846 | BackEdges.insert(V: {BB, Succ}); |
| 5847 | } |
| 5848 | ComputedBackEdges = true; |
| 5849 | } |
| 5850 | |
| 5851 | static bool combineInstructionsOverFunction( |
| 5852 | Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, |
| 5853 | AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, |
| 5854 | DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, |
| 5855 | BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, |
| 5856 | const InstCombineOptions &Opts) { |
| 5857 | auto &DL = F.getDataLayout(); |
| 5858 | bool VerifyFixpoint = Opts.VerifyFixpoint && |
| 5859 | !F.hasFnAttribute(Kind: "instcombine-no-verify-fixpoint" ); |
| 5860 | |
| 5861 | /// Builder - This is an IRBuilder that automatically inserts new |
| 5862 | /// instructions into the worklist when they are created. |
| 5863 | IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder( |
| 5864 | F.getContext(), TargetFolder(DL), |
| 5865 | IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) { |
| 5866 | Worklist.add(I); |
| 5867 | if (auto *Assume = dyn_cast<AssumeInst>(Val: I)) |
| 5868 | AC.registerAssumption(CI: Assume); |
| 5869 | })); |
| 5870 | |
| 5871 | ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front()); |
| 5872 | |
| 5873 | // Lower dbg.declare intrinsics otherwise their value may be clobbered |
| 5874 | // by instcombiner. |
| 5875 | bool MadeIRChange = false; |
| 5876 | if (ShouldLowerDbgDeclare) |
| 5877 | MadeIRChange = LowerDbgDeclare(F); |
| 5878 | |
| 5879 | // Iterate while there is work to do. |
| 5880 | unsigned Iteration = 0; |
| 5881 | while (true) { |
| 5882 | if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) { |
| 5883 | LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations |
| 5884 | << " on " << F.getName() |
| 5885 | << " reached; stopping without verifying fixpoint\n" ); |
| 5886 | break; |
| 5887 | } |
| 5888 | |
| 5889 | ++Iteration; |
| 5890 | ++NumWorklistIterations; |
| 5891 | LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " |
| 5892 | << F.getName() << "\n" ); |
| 5893 | |
| 5894 | InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, |
| 5895 | ORE, BFI, BPI, PSI, DL, RPOT); |
| 5896 | IC.MaxArraySizeForCombine = MaxArraySize; |
| 5897 | bool MadeChangeInThisIteration = IC.prepareWorklist(F); |
| 5898 | MadeChangeInThisIteration |= IC.run(); |
| 5899 | if (!MadeChangeInThisIteration) |
| 5900 | break; |
| 5901 | |
| 5902 | MadeIRChange = true; |
| 5903 | if (Iteration > Opts.MaxIterations) { |
| 5904 | reportFatalUsageError( |
| 5905 | reason: "Instruction Combining on " + Twine(F.getName()) + |
| 5906 | " did not reach a fixpoint after " + Twine(Opts.MaxIterations) + |
| 5907 | " iterations. " + |
| 5908 | "Use 'instcombine<no-verify-fixpoint>' or function attribute " |
| 5909 | "'instcombine-no-verify-fixpoint' to suppress this error." ); |
| 5910 | } |
| 5911 | } |
| 5912 | |
| 5913 | if (Iteration == 1) |
| 5914 | ++NumOneIteration; |
| 5915 | else if (Iteration == 2) |
| 5916 | ++NumTwoIterations; |
| 5917 | else if (Iteration == 3) |
| 5918 | ++NumThreeIterations; |
| 5919 | else |
| 5920 | ++NumFourOrMoreIterations; |
| 5921 | |
| 5922 | return MadeIRChange; |
| 5923 | } |
| 5924 | |
| 5925 | InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options(Opts) {} |
| 5926 | |
| 5927 | void InstCombinePass::printPipeline( |
| 5928 | raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { |
| 5929 | static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline( |
| 5930 | OS, MapClassName2PassName); |
| 5931 | OS << '<'; |
| 5932 | OS << "max-iterations=" << Options.MaxIterations << ";" ; |
| 5933 | OS << (Options.VerifyFixpoint ? "" : "no-" ) << "verify-fixpoint" ; |
| 5934 | OS << '>'; |
| 5935 | } |
| 5936 | |
| 5937 | char InstCombinePass::ID = 0; |
| 5938 | |
| 5939 | PreservedAnalyses InstCombinePass::run(Function &F, |
| 5940 | FunctionAnalysisManager &AM) { |
| 5941 | auto &LRT = AM.getResult<LastRunTrackingAnalysis>(IR&: F); |
| 5942 | // No changes since last InstCombine pass, exit early. |
| 5943 | if (LRT.shouldSkip(ID: &ID)) |
| 5944 | return PreservedAnalyses::all(); |
| 5945 | |
| 5946 | auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F); |
| 5947 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
| 5948 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F); |
| 5949 | auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F); |
| 5950 | auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F); |
| 5951 | |
| 5952 | auto *AA = &AM.getResult<AAManager>(IR&: F); |
| 5953 | auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F); |
| 5954 | ProfileSummaryInfo *PSI = |
| 5955 | MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent()); |
| 5956 | auto *BFI = (PSI && PSI->hasProfileSummary()) ? |
| 5957 | &AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr; |
| 5958 | auto *BPI = AM.getCachedResult<BranchProbabilityAnalysis>(IR&: F); |
| 5959 | |
| 5960 | if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, |
| 5961 | BFI, BPI, PSI, Opts: Options)) { |
| 5962 | // No changes, all analyses are preserved. |
| 5963 | LRT.update(ID: &ID, /*Changed=*/false); |
| 5964 | return PreservedAnalyses::all(); |
| 5965 | } |
| 5966 | |
| 5967 | // Mark all the analyses that instcombine updates as preserved. |
| 5968 | PreservedAnalyses PA; |
| 5969 | LRT.update(ID: &ID, /*Changed=*/true); |
| 5970 | PA.preserve<LastRunTrackingAnalysis>(); |
| 5971 | PA.preserveSet<CFGAnalyses>(); |
| 5972 | return PA; |
| 5973 | } |
| 5974 | |
| 5975 | void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const { |
| 5976 | AU.setPreservesCFG(); |
| 5977 | AU.addRequired<AAResultsWrapperPass>(); |
| 5978 | AU.addRequired<AssumptionCacheTracker>(); |
| 5979 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
| 5980 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
| 5981 | AU.addRequired<DominatorTreeWrapperPass>(); |
| 5982 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); |
| 5983 | AU.addPreserved<DominatorTreeWrapperPass>(); |
| 5984 | AU.addPreserved<AAResultsWrapperPass>(); |
| 5985 | AU.addPreserved<BasicAAWrapperPass>(); |
| 5986 | AU.addPreserved<GlobalsAAWrapperPass>(); |
| 5987 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
| 5988 | LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); |
| 5989 | } |
| 5990 | |
| 5991 | bool InstructionCombiningPass::runOnFunction(Function &F) { |
| 5992 | if (skipFunction(F)) |
| 5993 | return false; |
| 5994 | |
| 5995 | // Required analyses. |
| 5996 | auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
| 5997 | auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); |
| 5998 | auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
| 5999 | auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
| 6000 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
| 6001 | auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); |
| 6002 | |
| 6003 | // Optional analyses. |
| 6004 | ProfileSummaryInfo *PSI = |
| 6005 | &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
| 6006 | BlockFrequencyInfo *BFI = |
| 6007 | (PSI && PSI->hasProfileSummary()) ? |
| 6008 | &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : |
| 6009 | nullptr; |
| 6010 | BranchProbabilityInfo *BPI = nullptr; |
| 6011 | if (auto *WrapperPass = |
| 6012 | getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>()) |
| 6013 | BPI = &WrapperPass->getBPI(); |
| 6014 | |
| 6015 | return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, |
| 6016 | BFI, BPI, PSI, Opts: InstCombineOptions()); |
| 6017 | } |
| 6018 | |
| 6019 | char InstructionCombiningPass::ID = 0; |
| 6020 | |
| 6021 | InstructionCombiningPass::InstructionCombiningPass() : FunctionPass(ID) { |
| 6022 | initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry()); |
| 6023 | } |
| 6024 | |
| 6025 | INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine" , |
| 6026 | "Combine redundant instructions" , false, false) |
| 6027 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
| 6028 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
| 6029 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
| 6030 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
| 6031 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
| 6032 | INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) |
| 6033 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) |
| 6034 | INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) |
| 6035 | INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) |
| 6036 | INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine" , |
| 6037 | "Combine redundant instructions" , false, false) |
| 6038 | |
| 6039 | // Initialization Routines |
| 6040 | void llvm::initializeInstCombine(PassRegistry &Registry) { |
| 6041 | initializeInstructionCombiningPassPass(Registry); |
| 6042 | } |
| 6043 | |
| 6044 | FunctionPass *llvm::createInstructionCombiningPass() { |
| 6045 | return new InstructionCombiningPass(); |
| 6046 | } |
| 6047 | |