| 1 | //===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "VPlanUtils.h" |
| 10 | #include "LoopVectorizationPlanner.h" |
| 11 | #include "VPlanAnalysis.h" |
| 12 | #include "VPlanCFG.h" |
| 13 | #include "VPlanDominatorTree.h" |
| 14 | #include "VPlanPatternMatch.h" |
| 15 | #include "llvm/ADT/TypeSwitch.h" |
| 16 | #include "llvm/Analysis/MemoryLocation.h" |
| 17 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
| 18 | #include "llvm/Analysis/ScalarEvolutionPatternMatch.h" |
| 19 | #include "llvm/IR/Dominators.h" |
| 20 | #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" |
| 21 | |
| 22 | using namespace llvm; |
| 23 | using namespace llvm::VPlanPatternMatch; |
| 24 | using namespace llvm::SCEVPatternMatch; |
| 25 | |
| 26 | bool vputils::onlyFirstLaneUsed(const VPValue *Def) { |
| 27 | return all_of(Range: Def->users(), |
| 28 | P: [Def](const VPUser *U) { return U->usesFirstLaneOnly(Op: Def); }); |
| 29 | } |
| 30 | |
| 31 | bool vputils::onlyFirstPartUsed(const VPValue *Def) { |
| 32 | return all_of(Range: Def->users(), |
| 33 | P: [Def](const VPUser *U) { return U->usesFirstPartOnly(Op: Def); }); |
| 34 | } |
| 35 | |
| 36 | bool vputils::onlyScalarValuesUsed(const VPValue *Def) { |
| 37 | return all_of(Range: Def->users(), |
| 38 | P: [Def](const VPUser *U) { return U->usesScalars(Op: Def); }); |
| 39 | } |
| 40 | |
| 41 | VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { |
| 42 | if (auto *E = dyn_cast<SCEVConstant>(Val: Expr)) |
| 43 | return Plan.getOrAddLiveIn(V: E->getValue()); |
| 44 | // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction |
| 45 | // value. Otherwise the value may be defined in a loop and using it directly |
| 46 | // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA |
| 47 | // form. |
| 48 | auto *U = dyn_cast<SCEVUnknown>(Val: Expr); |
| 49 | if (U && !isa<Instruction>(Val: U->getValue())) |
| 50 | return Plan.getOrAddLiveIn(V: U->getValue()); |
| 51 | auto *Expanded = new VPExpandSCEVRecipe(Expr); |
| 52 | VPBasicBlock *EntryVPBB = Plan.getEntry(); |
| 53 | auto Iter = EntryVPBB->getFirstNonPhi(); |
| 54 | while (Iter != EntryVPBB->end() && isa<VPIRInstruction>(Val: *Iter)) |
| 55 | ++Iter; |
| 56 | EntryVPBB->insert(Recipe: Expanded, InsertPt: Iter); |
| 57 | return Expanded; |
| 58 | } |
| 59 | |
| 60 | bool vputils::(const VPValue *V, const VPlan &Plan) { |
| 61 | if (isa<VPActiveLaneMaskPHIRecipe>(Val: V)) |
| 62 | return true; |
| 63 | |
| 64 | VPValue *A, *B; |
| 65 | |
| 66 | auto m_CanonicalScalarIVSteps = m_ScalarIVSteps( |
| 67 | Op0: m_CombineOr(Ps: m_CanonicalIV(), |
| 68 | Ps: m_DerivedIV(Op0: m_ZeroInt(), Op1: m_CanonicalIV(), Op2: m_One())), |
| 69 | Op1: m_One(), Op2: m_Specific(VPV: &Plan.getVF())); |
| 70 | |
| 71 | // A wide canonical IV is either an explicit VPWidenCanonicalIVRecipe or a |
| 72 | // canonical VPWidenIntOrFpInductionRecipe. |
| 73 | auto m_WideCanonicalIV = |
| 74 | m_CombineOr(Ps: m_Isa<VPWidenCanonicalIVRecipe>(), Ps: m_CanonicalWidenIV()); |
| 75 | |
| 76 | if (match(V, P: m_ActiveLaneMask(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B), Op2: m_One()))) |
| 77 | return B == Plan.getTripCount() && |
| 78 | (match(V: A, P: m_CanonicalScalarIVSteps) || match(V: A, P: m_WideCanonicalIV)); |
| 79 | |
| 80 | // For scalar plans, the header mask uses the scalar steps. |
| 81 | if (match(V, P: m_ICmp(Op0: m_CanonicalScalarIVSteps, |
| 82 | Op1: m_Specific(VPV: Plan.getBackedgeTakenCount())))) { |
| 83 | assert(Plan.hasScalarVFOnly() && |
| 84 | "Non-scalar VF using scalar IV steps for header mask?" ); |
| 85 | return true; |
| 86 | } |
| 87 | |
| 88 | auto MaskMatch = m_ICmp(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B)); |
| 89 | if (match(V, P: m_CombineOr(Ps: MaskMatch, Ps: m_Reverse(Op0: MaskMatch)))) |
| 90 | return match(V: A, P: m_WideCanonicalIV) && B == Plan.getBackedgeTakenCount(); |
| 91 | |
| 92 | return match( |
| 93 | V, P: m_c_BinaryAnd(Op0: m_VPValue(), |
| 94 | Op1: m_VPInstruction<VPInstruction::IncomingAliasMask>())); |
| 95 | } |
| 96 | |
| 97 | /// Returns true if \p R propagates poison from any operand to its result. |
| 98 | static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R) { |
| 99 | return TypeSwitch<const VPRecipeBase *, bool>(R) |
| 100 | .Case<VPWidenGEPRecipe, VPWidenCastRecipe>( |
| 101 | caseFn: [](const VPRecipeBase *) { return true; }) |
| 102 | .Case(caseFn: [](const VPReplicateRecipe *Rep) { |
| 103 | // GEP and casts propagate poison from all operands. |
| 104 | unsigned Opcode = Rep->getOpcode(); |
| 105 | return Opcode == Instruction::GetElementPtr || |
| 106 | Instruction::isCast(Opcode); |
| 107 | }) |
| 108 | .Default(defaultFn: [](const VPRecipeBase *) { return false; }); |
| 109 | } |
| 110 | |
| 111 | /// Returns true if \p V being poison is guaranteed to trigger UB because it |
| 112 | /// propagates to the address of a memory recipe. |
| 113 | static bool poisonGuaranteesUB(const VPValue *V) { |
| 114 | SmallPtrSet<const VPValue *, 8> Visited; |
| 115 | SmallVector<const VPValue *, 16> Worklist; |
| 116 | |
| 117 | Worklist.push_back(Elt: V); |
| 118 | |
| 119 | while (!Worklist.empty()) { |
| 120 | const VPValue *Current = Worklist.pop_back_val(); |
| 121 | if (!Visited.insert(Ptr: Current).second) |
| 122 | continue; |
| 123 | |
| 124 | for (VPUser *U : Current->users()) { |
| 125 | // Check if Current is used as an address operand for load/store. |
| 126 | if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(Val: cast<VPRecipeBase>(Val: U))) { |
| 127 | if (MemR->getAddr() == Current) |
| 128 | return true; |
| 129 | continue; |
| 130 | } |
| 131 | if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: U)) { |
| 132 | unsigned Opcode = Rep->getOpcode(); |
| 133 | if ((Opcode == Instruction::Load && Rep->getOperand(N: 0) == Current) || |
| 134 | (Opcode == Instruction::Store && Rep->getOperand(N: 1) == Current)) |
| 135 | return true; |
| 136 | } |
| 137 | |
| 138 | // Check if poison propagates through this recipe to any of its users. |
| 139 | auto *R = cast<VPRecipeBase>(Val: U); |
| 140 | for (const VPValue *Op : R->operands()) { |
| 141 | if (Op == Current && propagatesPoisonFromRecipeOp(R)) { |
| 142 | Worklist.push_back(Elt: R->getVPSingleValue()); |
| 143 | break; |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | return false; |
| 150 | } |
| 151 | |
| 152 | GEPNoWrapFlags vputils::getGEPFlagsForPtr(VPValue *Ptr) { |
| 153 | // Like IR stripPointerCasts, look through GEPs with all-zero indices and |
| 154 | // casts to find a root GEP VPInstruction. |
| 155 | while (auto *PtrVPI = dyn_cast<VPInstruction>(Val: Ptr)) { |
| 156 | unsigned Opcode = PtrVPI->getOpcode(); |
| 157 | if (Opcode == Instruction::GetElementPtr) { |
| 158 | if (!all_of(Range: drop_begin(RangeOrContainer: PtrVPI->operands()), P: match_fn(P: m_ZeroInt()))) |
| 159 | return PtrVPI->getGEPNoWrapFlags(); |
| 160 | Ptr = PtrVPI->getOperand(N: 0); |
| 161 | continue; |
| 162 | } |
| 163 | if (Opcode != Instruction::BitCast && Opcode != Instruction::AddrSpaceCast) |
| 164 | break; |
| 165 | Ptr = PtrVPI->getOperand(N: 0); |
| 166 | } |
| 167 | return GEPNoWrapFlags::none(); |
| 168 | } |
| 169 | |
| 170 | const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V, |
| 171 | PredicatedScalarEvolution &PSE, |
| 172 | const Loop *L) { |
| 173 | ScalarEvolution &SE = *PSE.getSE(); |
| 174 | if (isa<VPIRValue, VPSymbolicValue>(Val: V)) { |
| 175 | Value *LiveIn = V->getUnderlyingValue(); |
| 176 | if (LiveIn && SE.isSCEVable(Ty: LiveIn->getType())) |
| 177 | return SE.getSCEV(V: LiveIn); |
| 178 | return SE.getCouldNotCompute(); |
| 179 | } |
| 180 | |
| 181 | if (auto *RV = dyn_cast<VPRegionValue>(Val: V)) { |
| 182 | assert(RV == RV->getDefiningRegion()->getCanonicalIV() && |
| 183 | "RegionValue must be canonical IV" ); |
| 184 | if (!L) |
| 185 | return SE.getCouldNotCompute(); |
| 186 | return SE.getAddRecExpr(Start: SE.getZero(Ty: RV->getType()), Step: SE.getOne(Ty: RV->getType()), |
| 187 | L, Flags: SCEV::FlagAnyWrap); |
| 188 | } |
| 189 | |
| 190 | // Helper to create SCEVs for binary and unary operations. |
| 191 | auto CreateSCEV = [&](ArrayRef<VPValue *> Ops, |
| 192 | function_ref<const SCEV *(ArrayRef<SCEVUse>)> CreateFn) |
| 193 | -> const SCEV * { |
| 194 | SmallVector<SCEVUse, 2> SCEVOps; |
| 195 | for (VPValue *Op : Ops) { |
| 196 | const SCEV *S = getSCEVExprForVPValue(V: Op, PSE, L); |
| 197 | if (isa<SCEVCouldNotCompute>(Val: S)) |
| 198 | return SE.getCouldNotCompute(); |
| 199 | SCEVOps.push_back(Elt: S); |
| 200 | } |
| 201 | return PSE.getPredicatedSCEV(Expr: CreateFn(SCEVOps)); |
| 202 | }; |
| 203 | |
| 204 | VPValue *LHSVal, *RHSVal; |
| 205 | if (match(V, P: m_Add(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 206 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 207 | return SE.getAddExpr(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0); |
| 208 | }); |
| 209 | if (match(V, P: m_Sub(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 210 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 211 | return SE.getMinusSCEV(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0); |
| 212 | }); |
| 213 | if (match(V, P: m_Not(Op0: m_VPValue(V&: LHSVal)))) { |
| 214 | // not X = xor X, -1 = -1 - X |
| 215 | return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 216 | return SE.getMinusSCEV(LHS: SE.getMinusOne(Ty: Ops[0]->getType()), RHS: Ops[0]); |
| 217 | }); |
| 218 | } |
| 219 | if (match(V, P: m_Mul(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 220 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 221 | return SE.getMulExpr(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0); |
| 222 | }); |
| 223 | // Handle shl by constant: x << c is equivalent to x * (1 << c). A shift |
| 224 | // amount >= the bit width produces poison; do not rewrite it, as |
| 225 | // getPowerOfTwo requires the power to be in range. |
| 226 | uint64_t ShiftAmt; |
| 227 | if (match(V, P: m_Shl(Op0: m_VPValue(V&: LHSVal), Op1: m_ConstantInt(C&: ShiftAmt))) && |
| 228 | ShiftAmt < LHSVal->getScalarType()->getScalarSizeInBits()) |
| 229 | return CreateSCEV(LHSVal, [&](ArrayRef<SCEVUse> Ops) { |
| 230 | return SE.getMulExpr(LHS: Ops[0], |
| 231 | RHS: SE.getPowerOfTwo(Ty: Ops[0]->getType(), Power: ShiftAmt)); |
| 232 | }); |
| 233 | if (match(V, P: m_LShr(Op0: m_VPValue(V&: LHSVal), Op1: m_ConstantInt(C&: ShiftAmt)))) { |
| 234 | Type *Ty = V->getScalarType(); |
| 235 | if (ShiftAmt < SE.getTypeSizeInBits(Ty)) |
| 236 | return CreateSCEV(LHSVal, [&](ArrayRef<SCEVUse> Ops) { |
| 237 | return SE.getUDivExpr(LHS: Ops[0], RHS: SE.getPowerOfTwo(Ty, Power: ShiftAmt)); |
| 238 | }); |
| 239 | } |
| 240 | if (match(V, P: m_UDiv(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 241 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 242 | return SE.getUDivExpr(LHS: Ops[0], RHS: Ops[1]); |
| 243 | }); |
| 244 | if (match(V, P: m_URem(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 245 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 246 | return SE.getURemExpr(LHS: Ops[0], RHS: Ops[1]); |
| 247 | }); |
| 248 | // A SRem with non-negative operands is equivalent to an URem. |
| 249 | if (match(V, P: m_SRem(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) { |
| 250 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 251 | if (!SE.isKnownNonNegative(S: Ops[0]) || !SE.isKnownNonNegative(S: Ops[1])) |
| 252 | return SE.getCouldNotCompute(); |
| 253 | return SE.getURemExpr(LHS: Ops[0], RHS: Ops[1]); |
| 254 | }); |
| 255 | } |
| 256 | // Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n. |
| 257 | const APInt *Mask; |
| 258 | if (match(V, P: m_c_BinaryAnd(Op0: m_VPValue(V&: LHSVal), Op1: m_APInt(C&: Mask))) && |
| 259 | (*Mask + 1).isPowerOf2()) |
| 260 | return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 261 | return SE.getURemExpr(LHS: Ops[0], RHS: SE.getConstant(Val: *Mask + 1)); |
| 262 | }); |
| 263 | if (match(V, P: m_Trunc(Op0: m_VPValue(V&: LHSVal)))) { |
| 264 | Type *DestTy = V->getScalarType(); |
| 265 | return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 266 | return SE.getTruncateExpr(Op: Ops[0], Ty: DestTy); |
| 267 | }); |
| 268 | } |
| 269 | if (match(V, P: m_ZExt(Op0: m_VPValue(V&: LHSVal)))) { |
| 270 | Type *DestTy = V->getScalarType(); |
| 271 | return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 272 | return SE.getZeroExtendExpr(Op: Ops[0], Ty: DestTy); |
| 273 | }); |
| 274 | } |
| 275 | if (match(V, P: m_SExt(Op0: m_VPValue(V&: LHSVal)))) { |
| 276 | Type *DestTy = V->getScalarType(); |
| 277 | |
| 278 | // Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension |
| 279 | // onto the operands before computing the subtraction. |
| 280 | VPValue *SubLHS, *SubRHS; |
| 281 | auto *SubR = dyn_cast<VPRecipeWithIRFlags>(Val: LHSVal); |
| 282 | if (match(V: LHSVal, P: m_Sub(Op0: m_VPValue(V&: SubLHS), Op1: m_VPValue(V&: SubRHS))) && SubR && |
| 283 | SubR->hasNoSignedWrap() && poisonGuaranteesUB(V: LHSVal)) { |
| 284 | const SCEV *V1 = getSCEVExprForVPValue(V: SubLHS, PSE, L); |
| 285 | const SCEV *V2 = getSCEVExprForVPValue(V: SubRHS, PSE, L); |
| 286 | if (!isa<SCEVCouldNotCompute>(Val: V1) && !isa<SCEVCouldNotCompute>(Val: V2)) |
| 287 | return SE.getMinusSCEV(LHS: SE.getSignExtendExpr(Op: V1, Ty: DestTy), |
| 288 | RHS: SE.getSignExtendExpr(Op: V2, Ty: DestTy), Flags: SCEV::FlagNSW); |
| 289 | } |
| 290 | |
| 291 | return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 292 | return SE.getSignExtendExpr(Op: Ops[0], Ty: DestTy); |
| 293 | }); |
| 294 | } |
| 295 | if (match(V, |
| 296 | P: m_Intrinsic<Intrinsic::umax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 297 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 298 | return SE.getUMaxExpr(LHS: Ops[0], RHS: Ops[1]); |
| 299 | }); |
| 300 | if (match(V, |
| 301 | P: m_Intrinsic<Intrinsic::smax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 302 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 303 | return SE.getSMaxExpr(LHS: Ops[0], RHS: Ops[1]); |
| 304 | }); |
| 305 | if (match(V, |
| 306 | P: m_Intrinsic<Intrinsic::umin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 307 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 308 | return SE.getUMinExpr(LHS: Ops[0], RHS: Ops[1]); |
| 309 | }); |
| 310 | if (match(V, |
| 311 | P: m_Intrinsic<Intrinsic::smin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 312 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 313 | return SE.getSMinExpr(LHS: Ops[0], RHS: Ops[1]); |
| 314 | }); |
| 315 | if (match(V, P: m_Intrinsic<Intrinsic::abs>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue()))) |
| 316 | return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 317 | // is_int_min_poison is local to this intrinsic: poison on INT_MIN is |
| 318 | // not proof that the input is never INT_MIN, nor that poison reaches |
| 319 | // UB. Do not translate it to SCEV's global IsNSW flag. |
| 320 | return SE.getAbsExpr(Op: Ops[0], /*IsNSW=*/false); |
| 321 | }); |
| 322 | |
| 323 | ArrayRef<VPValue *> Ops; |
| 324 | Type *SourceElementType; |
| 325 | if (match(V, P: m_GetElementPtr(SourceElementType, Operands&: Ops))) { |
| 326 | return CreateSCEV(Ops, [&](ArrayRef<SCEVUse> Ops) { |
| 327 | return SE.getGEPExpr(BaseExpr: Ops.front(), IndexExprs: Ops.drop_front(), SrcElementTy: SourceElementType); |
| 328 | }); |
| 329 | } |
| 330 | |
| 331 | // TODO: Support constructing SCEVs for more recipes as needed. |
| 332 | const VPRecipeBase *DefR = V->getDefiningRecipe(); |
| 333 | const SCEV *Expr = |
| 334 | TypeSwitch<const VPRecipeBase *, const SCEV *>(DefR) |
| 335 | .Case(caseFn: [](const VPExpandSCEVRecipe *R) { return R->getSCEV(); }) |
| 336 | .Case(caseFn: [&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) { |
| 337 | const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L); |
| 338 | if (!L || isa<SCEVCouldNotCompute>(Val: Step)) |
| 339 | return SE.getCouldNotCompute(); |
| 340 | const SCEV *Start = |
| 341 | getSCEVExprForVPValue(V: R->getStartValue(), PSE, L); |
| 342 | const SCEV *AddRec = |
| 343 | SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap); |
| 344 | if (R->getTruncInst()) |
| 345 | return SE.getTruncateExpr(Op: AddRec, Ty: R->getScalarType()); |
| 346 | return AddRec; |
| 347 | }) |
| 348 | .Case(caseFn: [&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) { |
| 349 | const SCEV *Start = |
| 350 | getSCEVExprForVPValue(V: R->getStartValue(), PSE, L); |
| 351 | if (!L || isa<SCEVCouldNotCompute>(Val: Start)) |
| 352 | return SE.getCouldNotCompute(); |
| 353 | const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L); |
| 354 | if (isa<SCEVCouldNotCompute>(Val: Step)) |
| 355 | return SE.getCouldNotCompute(); |
| 356 | return SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap); |
| 357 | }) |
| 358 | .Case(caseFn: [&SE, &PSE, L](const VPDerivedIVRecipe *R) { |
| 359 | const SCEV *Start = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L); |
| 360 | const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: 1), PSE, L); |
| 361 | const SCEV *Scale = getSCEVExprForVPValue(V: R->getOperand(N: 2), PSE, L); |
| 362 | if (any_of(Range: ArrayRef({Start, IV, Scale}), |
| 363 | P: IsaPred<SCEVCouldNotCompute>)) |
| 364 | return SE.getCouldNotCompute(); |
| 365 | |
| 366 | return SE.getAddExpr( |
| 367 | LHS: SE.getTruncateOrSignExtend(V: Start, Ty: IV->getType()), |
| 368 | RHS: SE.getMulExpr( |
| 369 | LHS: IV, RHS: SE.getTruncateOrSignExtend(V: Scale, Ty: IV->getType()))); |
| 370 | }) |
| 371 | .Case(caseFn: [&SE, &PSE, L](const VPScalarIVStepsRecipe *R) { |
| 372 | const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L); |
| 373 | const SCEV *Step = getSCEVExprForVPValue(V: R->getOperand(N: 1), PSE, L); |
| 374 | if (isa<SCEVCouldNotCompute>(Val: IV) || !isa<SCEVConstant>(Val: Step)) |
| 375 | return SE.getCouldNotCompute(); |
| 376 | return SE.getTruncateOrSignExtend(V: IV, Ty: Step->getType()); |
| 377 | }) |
| 378 | .Default( |
| 379 | defaultFn: [&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); }); |
| 380 | |
| 381 | return PSE.getPredicatedSCEV(Expr); |
| 382 | } |
| 383 | |
| 384 | bool vputils::isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE, |
| 385 | const Loop *L) { |
| 386 | // If address is an SCEVAddExpr, we require that all operands must be either |
| 387 | // be invariant or a (possibly sign-extend) affine AddRec. |
| 388 | if (auto *PtrAdd = dyn_cast<SCEVAddExpr>(Val: Addr)) { |
| 389 | return all_of(Range: PtrAdd->operands(), P: [&SE, L](const SCEV *Op) { |
| 390 | return SE.isLoopInvariant(S: Op, L) || |
| 391 | match(S: Op, P: m_scev_SExt(Op0: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()))) || |
| 392 | match(S: Op, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV())); |
| 393 | }); |
| 394 | } |
| 395 | |
| 396 | // Otherwise, check if address is loop invariant or an affine add recurrence. |
| 397 | return SE.isLoopInvariant(S: Addr, L) || |
| 398 | match(S: Addr, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV())); |
| 399 | } |
| 400 | |
| 401 | /// Returns true if \p Opcode preserves uniformity, i.e., if all operands are |
| 402 | /// uniform, the result will also be uniform. |
| 403 | static bool preservesUniformity(unsigned Opcode) { |
| 404 | if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode)) |
| 405 | return true; |
| 406 | switch (Opcode) { |
| 407 | case Instruction::Freeze: |
| 408 | case Instruction::GetElementPtr: |
| 409 | case Instruction::ICmp: |
| 410 | case Instruction::FCmp: |
| 411 | case Instruction::Select: |
| 412 | case VPInstruction::Not: |
| 413 | case VPInstruction::Broadcast: |
| 414 | case VPInstruction::MaskedCond: |
| 415 | case VPInstruction::PtrAdd: |
| 416 | return true; |
| 417 | default: |
| 418 | return false; |
| 419 | } |
| 420 | } |
| 421 | |
| 422 | bool vputils::isSingleScalar(const VPValue *VPV) { |
| 423 | // Live-in, symbolic and region-values represent single-scalar values. |
| 424 | if (isa<VPIRValue, VPSymbolicValue, VPRegionValue>(Val: VPV)) |
| 425 | return true; |
| 426 | |
| 427 | if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: VPV)) { |
| 428 | const VPRegionBlock *RegionOfR = Rep->getRegion(); |
| 429 | // Don't consider recipes in replicate regions as uniform yet; their first |
| 430 | // lane cannot be accessed when executing the replicate region for other |
| 431 | // lanes. |
| 432 | if (RegionOfR && RegionOfR->isReplicator()) |
| 433 | return false; |
| 434 | return Rep->isSingleScalar() || (preservesUniformity(Opcode: Rep->getOpcode()) && |
| 435 | all_of(Range: Rep->operands(), P: isSingleScalar)); |
| 436 | } |
| 437 | if (isa<VPWidenGEPRecipe, VPBlendRecipe>(Val: VPV)) |
| 438 | return all_of(Range: VPV->getDefiningRecipe()->operands(), P: isSingleScalar); |
| 439 | if (auto *WidenR = dyn_cast<VPWidenRecipe>(Val: VPV)) { |
| 440 | return preservesUniformity(Opcode: WidenR->getOpcode()) && |
| 441 | all_of(Range: WidenR->operands(), P: isSingleScalar); |
| 442 | } |
| 443 | if (auto *VPI = dyn_cast<VPInstruction>(Val: VPV)) |
| 444 | return VPI->isSingleScalar() || VPI->isVectorToScalar() || |
| 445 | (preservesUniformity(Opcode: VPI->getOpcode()) && |
| 446 | all_of(Range: VPI->operands(), P: isSingleScalar)); |
| 447 | if (auto *RR = dyn_cast<VPReductionRecipe>(Val: VPV)) |
| 448 | return !RR->isPartialReduction(); |
| 449 | if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe, VPDerivedIVRecipe>( |
| 450 | Val: VPV)) |
| 451 | return true; |
| 452 | if (auto *Expr = dyn_cast<VPExpressionRecipe>(Val: VPV)) |
| 453 | return Expr->isVectorToScalar(); |
| 454 | |
| 455 | // VPExpandSCEVRecipes must be placed in the entry and are always uniform. |
| 456 | return isa<VPExpandSCEVRecipe>(Val: VPV); |
| 457 | } |
| 458 | |
| 459 | bool vputils::isUniformAcrossVFsAndUFs(const VPValue *V) { |
| 460 | // Live-ins and region values are uniform. |
| 461 | if (isa<VPIRValue, VPSymbolicValue, VPRegionValue>(Val: V)) |
| 462 | return true; |
| 463 | |
| 464 | const VPRecipeBase *R = V->getDefiningRecipe(); |
| 465 | const VPBasicBlock *VPBB = R ? R->getParent() : nullptr; |
| 466 | const VPlan *Plan = VPBB ? VPBB->getPlan() : nullptr; |
| 467 | if (VPBB) { |
| 468 | if ((VPBB == Plan->getVectorPreheader() || VPBB == Plan->getEntry())) { |
| 469 | if (match(V: V->getDefiningRecipe(), |
| 470 | P: m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>())) |
| 471 | return false; |
| 472 | return all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs); |
| 473 | } |
| 474 | } |
| 475 | |
| 476 | return TypeSwitch<const VPRecipeBase *, bool>(R) |
| 477 | .Case(caseFn: [](const VPDerivedIVRecipe *R) { return true; }) |
| 478 | .Case(caseFn: [](const VPReplicateRecipe *R) { |
| 479 | // Be conservative about side-effects, except for the |
| 480 | // known-side-effecting assumes and stores, which we know will be |
| 481 | // uniform. |
| 482 | return R->isSingleScalar() && |
| 483 | (!R->mayHaveSideEffects() || |
| 484 | isa<AssumeInst, StoreInst>(Val: R->getUnderlyingInstr())) && |
| 485 | all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs); |
| 486 | }) |
| 487 | .Case(caseFn: [](const VPWidenRecipe *R) { |
| 488 | return preservesUniformity(Opcode: R->getOpcode()) && |
| 489 | all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs); |
| 490 | }) |
| 491 | .Case(caseFn: [](const VPPhi *) { |
| 492 | // Bail out on VPPhi, as we can end up in infinite cycles. |
| 493 | return false; |
| 494 | }) |
| 495 | .Case(caseFn: [](const VPInstruction *VPI) { |
| 496 | return (VPI->isSingleScalar() || VPI->isVectorToScalar() || |
| 497 | preservesUniformity(Opcode: VPI->getOpcode())) && |
| 498 | all_of(Range: VPI->operands(), P: isUniformAcrossVFsAndUFs); |
| 499 | }) |
| 500 | .Case(caseFn: [](const VPWidenCastRecipe *R) { |
| 501 | // A cast is uniform according to its operand. |
| 502 | return isUniformAcrossVFsAndUFs(V: R->getOperand(N: 0)); |
| 503 | }) |
| 504 | .Default(defaultFn: [](const VPRecipeBase *) { // A value is considered non-uniform |
| 505 | // unless proven otherwise. |
| 506 | return false; |
| 507 | }); |
| 508 | } |
| 509 | |
| 510 | VPBasicBlock *vputils::(VPlan &Plan, VPDominatorTree &VPDT) { |
| 511 | auto DepthFirst = vp_depth_first_shallow(G: Plan.getEntry()); |
| 512 | auto I = find_if(Range&: DepthFirst, P: [&VPDT](VPBlockBase *VPB) { |
| 513 | return VPBlockUtils::isHeader(VPB, VPDT); |
| 514 | }); |
| 515 | return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(Val: *I); |
| 516 | } |
| 517 | |
| 518 | unsigned vputils::getVFScaleFactor(VPRecipeBase *R) { |
| 519 | if (!R) |
| 520 | return 1; |
| 521 | if (auto *RR = dyn_cast<VPReductionPHIRecipe>(Val: R)) |
| 522 | return RR->getVFScaleFactor(); |
| 523 | if (auto *RR = dyn_cast<VPReductionRecipe>(Val: R)) |
| 524 | return RR->getVFScaleFactor(); |
| 525 | if (auto *ER = dyn_cast<VPExpressionRecipe>(Val: R)) |
| 526 | return ER->getVFScaleFactor(); |
| 527 | assert( |
| 528 | (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() != |
| 529 | VPInstruction::ReductionStartVector) && |
| 530 | "getting scaling factor of reduction-start-vector not implemented yet" ); |
| 531 | return 1; |
| 532 | } |
| 533 | |
| 534 | bool vputils::cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking) { |
| 535 | // Assumes don't alias anything or throw; as long as they're guaranteed to |
| 536 | // execute, they're safe to hoist. They should however not be sunk, as it |
| 537 | // would destroy information. |
| 538 | if (match(V: &R, P: m_Intrinsic<Intrinsic::assume>())) |
| 539 | return Sinking; |
| 540 | if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi()) |
| 541 | return true; |
| 542 | // Allocas cannot be hoisted. |
| 543 | auto *RepR = dyn_cast<VPReplicateRecipe>(Val: &R); |
| 544 | return RepR && RepR->getOpcode() == Instruction::Alloca; |
| 545 | } |
| 546 | |
| 547 | VPSingleDefRecipe *vputils::(VPlan &Plan) { |
| 548 | if (VPValue *AliasMask = findIncomingAliasMask(Plan)) { |
| 549 | assert(match(AliasMask->getSingleUser(), |
| 550 | m_c_BinaryAnd(m_VPValue(), m_Specific(AliasMask))) && |
| 551 | "AliasMask must only be used with the original header mask" ); |
| 552 | return cast<VPSingleDefRecipe>(Val: AliasMask->getSingleUser()); |
| 553 | } |
| 554 | |
| 555 | VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); |
| 556 | SmallVector<VPValue *> WideCanonicalIVs; |
| 557 | auto *WideCanonicalIV = |
| 558 | findUserOf<VPWidenCanonicalIVRecipe>(V: LoopRegion->getCanonicalIV()); |
| 559 | assert(count_if(LoopRegion->getCanonicalIV()->users(), |
| 560 | IsaPred<VPWidenCanonicalIVRecipe>) <= 1 && |
| 561 | "Must have at most one VPWideCanonicalIVRecipe" ); |
| 562 | if (WideCanonicalIV) |
| 563 | WideCanonicalIVs.push_back(Elt: WideCanonicalIV); |
| 564 | |
| 565 | // Also include VPWidenIntOrFpInductionRecipes that represent a widened |
| 566 | // version of the canonical induction. |
| 567 | VPBasicBlock * = LoopRegion->getEntryBasicBlock(); |
| 568 | for (VPRecipeBase &Phi : HeaderVPBB->phis()) { |
| 569 | VPWidenIntOrFpInductionRecipe *WidenIV; |
| 570 | if (match(V: &Phi, P: m_CanonicalWidenIV(V&: WidenIV))) |
| 571 | WideCanonicalIVs.push_back(Elt: WidenIV); |
| 572 | } |
| 573 | |
| 574 | // Walk users of wide canonical IVs and find the single compare of the form |
| 575 | // (ICMP_ULE, WideCanonicalIV, backedge-taken-count). |
| 576 | VPSingleDefRecipe * = nullptr; |
| 577 | for (auto *Wide : WideCanonicalIVs) { |
| 578 | for (VPUser *U : Wide->users()) { |
| 579 | auto *VPI = dyn_cast<VPInstruction>(Val: U); |
| 580 | if (!VPI || !vputils::isHeaderMask(V: VPI, Plan)) |
| 581 | continue; |
| 582 | |
| 583 | assert(VPI->getOperand(0) == Wide && |
| 584 | "WidenCanonicalIV must be the first operand of the compare" ); |
| 585 | assert(!HeaderMask && "Multiple header masks found?" ); |
| 586 | HeaderMask = VPI; |
| 587 | } |
| 588 | } |
| 589 | |
| 590 | for (VPRecipeBase &R : LoopRegion->getEntryBasicBlock()->phis()) { |
| 591 | auto *Def = cast<VPSingleDefRecipe>(Val: &R); |
| 592 | if (vputils::isHeaderMask(V: Def, Plan)) { |
| 593 | assert(!HeaderMask && "Multiple header masks found?" ); |
| 594 | HeaderMask = Def; |
| 595 | } |
| 596 | } |
| 597 | |
| 598 | return HeaderMask; |
| 599 | } |
| 600 | |
| 601 | SmallVector<VPBasicBlock *> |
| 602 | VPBlockUtils::blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, |
| 603 | VPBasicBlock *LastBB) { |
| 604 | assert(FirstBB->getParent() == LastBB->getParent() && |
| 605 | "FirstBB and LastBB from different regions" ); |
| 606 | #ifndef NDEBUG |
| 607 | bool InSingleSuccChain = false; |
| 608 | for (VPBlockBase *Succ = FirstBB; Succ; Succ = Succ->getSingleSuccessor()) |
| 609 | InSingleSuccChain |= (Succ == LastBB); |
| 610 | assert(InSingleSuccChain && |
| 611 | "LastBB unreachable from FirstBB in single-successor chain" ); |
| 612 | #endif |
| 613 | auto Blocks = to_vector( |
| 614 | Range: VPBlockUtils::blocksOnly<VPBasicBlock>(Range: vp_depth_first_deep(G: FirstBB))); |
| 615 | auto *LastIt = find(Range&: Blocks, Val: LastBB); |
| 616 | assert(LastIt != Blocks.end() && |
| 617 | "LastBB unreachable from FirstBB in depth-first traversal" ); |
| 618 | Blocks.erase(CS: std::next(x: LastIt), CE: Blocks.end()); |
| 619 | return Blocks; |
| 620 | } |
| 621 | |
| 622 | VPValue *vputils::findIncomingAliasMask(const VPlan &Plan) { |
| 623 | for (VPRecipeBase &R : *Plan.getVectorPreheader()) |
| 624 | if (match(V: &R, P: m_VPInstruction<VPInstruction::IncomingAliasMask>())) |
| 625 | return cast<VPInstruction>(Val: &R); |
| 626 | return nullptr; |
| 627 | } |
| 628 | |
| 629 | bool VPBlockUtils::(const VPBlockBase *VPB, |
| 630 | const VPDominatorTree &VPDT) { |
| 631 | auto *VPBB = dyn_cast<VPBasicBlock>(Val: VPB); |
| 632 | if (!VPBB) |
| 633 | return false; |
| 634 | |
| 635 | // If VPBB is in a region R, VPBB is a loop header if R is a loop region with |
| 636 | // VPBB as its entry, i.e., free of predecessors. |
| 637 | if (auto *R = VPBB->getParent()) |
| 638 | return !R->isReplicator() && !VPBB->hasPredecessors(); |
| 639 | |
| 640 | // A header dominates its second predecessor (the latch), with the other |
| 641 | // predecessor being the preheader |
| 642 | return VPB->getPredecessors().size() == 2 && |
| 643 | VPDT.dominates(A: VPB, B: VPB->getPredecessors()[1]); |
| 644 | } |
| 645 | |
| 646 | bool VPBlockUtils::isLatch(const VPBlockBase *VPB, |
| 647 | const VPDominatorTree &VPDT) { |
| 648 | // A latch has a header as its last successor, with its other successors |
| 649 | // leaving the loop. A preheader OTOH has a header as its first (and only) |
| 650 | // successor. |
| 651 | return VPB->getNumSuccessors() >= 2 && |
| 652 | VPBlockUtils::isHeader(VPB: VPB->getSuccessors().back(), VPDT); |
| 653 | } |
| 654 | |
| 655 | std::pair<VPBasicBlock *, VPBasicBlock *> |
| 656 | VPBlockUtils::getPlainCFGHeaderAndLatch(const VPlan &Plan) { |
| 657 | auto * = cast<VPBasicBlock>( |
| 658 | Val: Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor()); |
| 659 | auto *Latch = cast<VPBasicBlock>(Val: Header->getPredecessors()[1]); |
| 660 | return {Header, Latch}; |
| 661 | } |
| 662 | |
| 663 | VPBasicBlock *VPBlockUtils::getPlainCFGMiddleBlock(const VPlan &Plan) { |
| 664 | return cast<VPBasicBlock>(Val: Plan.getScalarPreheader()->getPredecessors()[0]); |
| 665 | } |
| 666 | |
| 667 | std::optional<MemoryLocation> |
| 668 | vputils::getMemoryLocation(const VPRecipeBase &R) { |
| 669 | auto *M = dyn_cast<VPIRMetadata>(Val: &R); |
| 670 | if (!M) |
| 671 | return std::nullopt; |
| 672 | MemoryLocation Loc; |
| 673 | // Populate noalias metadata from VPIRMetadata. |
| 674 | if (MDNode *NoAliasMD = M->getMetadata(Kind: LLVMContext::MD_noalias)) |
| 675 | Loc.AATags.NoAlias = NoAliasMD; |
| 676 | if (MDNode *AliasScopeMD = M->getMetadata(Kind: LLVMContext::MD_alias_scope)) |
| 677 | Loc.AATags.Scope = AliasScopeMD; |
| 678 | return Loc; |
| 679 | } |
| 680 | |
| 681 | VPInstruction *vputils::findCanonicalIVIncrement(VPlan &Plan) { |
| 682 | VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); |
| 683 | VPRegionValue *CanIV = LoopRegion->getCanonicalIV(); |
| 684 | assert(CanIV && "Expected loop region to have a canonical IV" ); |
| 685 | |
| 686 | VPSymbolicValue &VFxUF = Plan.getVFxUF(); |
| 687 | |
| 688 | // Check if \p Step matches the expected increment step, accounting for |
| 689 | // materialization of VFxUF and UF. |
| 690 | auto IsIncrementStep = [&](VPValue *Step) -> bool { |
| 691 | if (!VFxUF.isMaterialized()) |
| 692 | return Step == &VFxUF; |
| 693 | |
| 694 | VPSymbolicValue &UF = Plan.getUF(); |
| 695 | if (!UF.isMaterialized()) |
| 696 | return Step == &UF || |
| 697 | match(V: Step, P: m_c_Mul(Op0: m_Specific(VPV: &Plan.getUF()), |
| 698 | Op1: m_VPInstruction<VPInstruction::VScale>())); |
| 699 | |
| 700 | // Alias masking: step is number of active lanes of a dependence mask. |
| 701 | if (match(V: Step, P: m_ZExtOrTruncOrSelf( |
| 702 | Op0: m_VPInstruction<VPInstruction::NumActiveLanes>()))) |
| 703 | return true; |
| 704 | |
| 705 | unsigned ConcreteUF = Plan.getConcreteUF(); |
| 706 | // Fixed VF: step is just the concrete UF. |
| 707 | if (match(V: Step, P: m_SpecificInt(V: ConcreteUF))) |
| 708 | return true; |
| 709 | |
| 710 | // Scalable VF: step involves VScale. |
| 711 | if (ConcreteUF == 1) |
| 712 | return match(V: Step, P: m_VPInstruction<VPInstruction::VScale>()); |
| 713 | if (match(V: Step, P: m_c_Mul(Op0: m_SpecificInt(V: ConcreteUF), |
| 714 | Op1: m_VPInstruction<VPInstruction::VScale>()))) |
| 715 | return true; |
| 716 | // mul(VScale, ConcreteUF) may have been simplified to |
| 717 | // shl(VScale, log2(ConcreteUF)) when ConcreteUF is a power of 2. |
| 718 | return isPowerOf2_32(Value: ConcreteUF) && |
| 719 | match(V: Step, P: m_Shl(Op0: m_VPInstruction<VPInstruction::VScale>(), |
| 720 | Op1: m_SpecificInt(V: Log2_32(Value: ConcreteUF)))); |
| 721 | }; |
| 722 | |
| 723 | VPInstruction *Increment = nullptr; |
| 724 | for (VPUser *U : CanIV->users()) { |
| 725 | VPValue *Step; |
| 726 | if (isa<VPInstruction>(Val: U) && |
| 727 | match(U, P: m_c_Add(Op0: m_Specific(VPV: CanIV), Op1: m_VPValue(V&: Step))) && |
| 728 | IsIncrementStep(Step)) { |
| 729 | assert(!Increment && "There must be a unique increment" ); |
| 730 | Increment = cast<VPInstruction>(Val: U); |
| 731 | } |
| 732 | } |
| 733 | |
| 734 | assert((!VFxUF.isMaterialized() || Increment) && |
| 735 | "After materializing VFxUF, an increment must exist" ); |
| 736 | assert((!Increment || |
| 737 | LoopRegion->hasCanonicalIVNUW() == Increment->hasNoUnsignedWrap()) && |
| 738 | "NUW flag in region and increment must match" ); |
| 739 | return Increment; |
| 740 | } |
| 741 | |
| 742 | /// Find the ComputeReductionResult recipe for \p PhiR, looking through selects |
| 743 | /// inserted for predicated reductions or tail folding. |
| 744 | VPInstruction *vputils::findComputeReductionResult(VPReductionPHIRecipe *PhiR) { |
| 745 | VPValue *BackedgeVal = PhiR->getBackedgeValue(); |
| 746 | if (auto *Res = |
| 747 | findUserOf<VPInstruction::ComputeReductionResult>(V: BackedgeVal)) |
| 748 | return Res; |
| 749 | |
| 750 | // Look through selects inserted for tail folding or predicated reductions. |
| 751 | VPRecipeBase *SelR = |
| 752 | findUserOf(V: BackedgeVal, P: m_Select(Op0: m_VPValue(), Op1: m_VPValue(), Op2: m_VPValue())); |
| 753 | if (!SelR) |
| 754 | return nullptr; |
| 755 | return findUserOf<VPInstruction::ComputeReductionResult>( |
| 756 | V: cast<VPSingleDefRecipe>(Val: SelR)); |
| 757 | } |
| 758 | |
| 759 | bool vputils::isUsedByLoadStoreAddress(const VPValue *V) { |
| 760 | SmallPtrSet<const VPValue *, 4> Seen; |
| 761 | SmallVector<const VPValue *> WorkList = {V}; |
| 762 | |
| 763 | while (!WorkList.empty()) { |
| 764 | const VPValue *Cur = WorkList.pop_back_val(); |
| 765 | if (!Seen.insert(Ptr: Cur).second) |
| 766 | continue; |
| 767 | |
| 768 | auto *Blend = dyn_cast<VPBlendRecipe>(Val: Cur); |
| 769 | // Skip blends that use V only through a compare by checking if any incoming |
| 770 | // value was already visited. |
| 771 | if (Blend && none_of(Range: seq<unsigned>(Begin: 0, End: Blend->getNumIncomingValues()), |
| 772 | P: [&](unsigned I) { |
| 773 | return Seen.contains(Ptr: Blend->getIncomingValue(Idx: I)); |
| 774 | })) |
| 775 | continue; |
| 776 | |
| 777 | for (VPUser *U : Cur->users()) { |
| 778 | if (auto *InterleaveR = dyn_cast<VPInterleaveBase>(Val: U)) |
| 779 | if (InterleaveR->getAddr() == Cur) |
| 780 | return true; |
| 781 | // Cur is used as the pointer of a (possibly masked) load (operand 0) or |
| 782 | // store (operand 1). |
| 783 | if (match(U, P: m_CombineOr(Ps: m_Unary<Instruction::Load>(Op0: m_Specific(VPV: Cur)), |
| 784 | Ps: m_Binary<Instruction::Store>(Op0: m_VPValue(), |
| 785 | Op1: m_Specific(VPV: Cur))))) |
| 786 | return true; |
| 787 | if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(Val: cast<VPRecipeBase>(Val: U))) { |
| 788 | if (MemR->getAddr() == Cur && MemR->isConsecutive()) |
| 789 | return true; |
| 790 | } |
| 791 | } |
| 792 | |
| 793 | // The legacy cost model only supports scalarization loads/stores with phi |
| 794 | // addresses, if the phi is directly used as load/store address. Don't |
| 795 | // traverse further for Blends. |
| 796 | if (Blend) |
| 797 | continue; |
| 798 | |
| 799 | // Only traverse further through users that also define a value (and can |
| 800 | // thus have their own users walked). Skip when Cur is only used as mask , |
| 801 | // as well as loads: a loaded value does not depend on the load's operand. |
| 802 | for (VPUser *U : Cur->users()) { |
| 803 | auto *VPI = dyn_cast<VPInstruction>(Val: U); |
| 804 | if (VPI && VPI->getMask() == Cur && |
| 805 | none_of(Range: VPI->operandsWithoutMask(), |
| 806 | P: [Cur](VPValue *Op) { return Op == Cur; })) |
| 807 | continue; |
| 808 | if (match(U, P: m_VPInstruction<Instruction::Load>())) |
| 809 | continue; |
| 810 | if (auto *SDR = dyn_cast<VPSingleDefRecipe>(Val: U)) |
| 811 | WorkList.push_back(Elt: SDR); |
| 812 | } |
| 813 | } |
| 814 | return false; |
| 815 | } |
| 816 | |
| 817 | /// Try to find a loop-invariant IR value for \p S in the plan's entry block |
| 818 | /// that can be reused. Returns the corresponding live-in VPValue, or nullptr |
| 819 | /// if no reusable IR value is found. |
| 820 | VPValue *VPSCEVExpander::tryToReuseIRValue(const SCEV *S) { |
| 821 | if (isa<SCEVConstant, SCEVUnknown>(Val: S)) |
| 822 | return nullptr; |
| 823 | VPlan &Plan = Builder.getPlan(); |
| 824 | BasicBlock *PH = cast<VPIRBasicBlock>(Val: Plan.getEntry())->getIRBasicBlock(); |
| 825 | for (Value *V : SE.getSCEVValues(S)) { |
| 826 | // Only reuse instructions in the plan's entry block, or, when a |
| 827 | // DominatorTree is available, any instruction that dominates it. |
| 828 | // Instructions in sibling branches may not dominate the entry block. |
| 829 | auto *I = dyn_cast<Instruction>(Val: V); |
| 830 | if (!I) |
| 831 | return Plan.getOrAddLiveIn(V); |
| 832 | if (!SE.DT.dominates(A: I->getParent(), B: PH)) |
| 833 | continue; |
| 834 | SmallVector<Instruction *> DropPoisonGeneratingInsts; |
| 835 | if (!SE.canReuseInstruction(S, I, DropPoisonGeneratingInsts)) |
| 836 | continue; |
| 837 | for (Instruction *DropI : DropPoisonGeneratingInsts) |
| 838 | SCEVExpander::dropPoisonGeneratingAnnotationsAndReinfer(SE, I: DropI); |
| 839 | return Plan.getOrAddLiveIn(V); |
| 840 | } |
| 841 | return nullptr; |
| 842 | } |
| 843 | |
| 844 | VPValue *VPSCEVExpander::tryToExpand(const SCEV *S) { |
| 845 | if (VPValue *V = tryToReuseIRValue(S)) |
| 846 | return V; |
| 847 | |
| 848 | switch (S->getSCEVType()) { |
| 849 | case scConstant: |
| 850 | return Builder.getPlan().getOrAddLiveIn(V: cast<SCEVConstant>(Val: S)->getValue()); |
| 851 | case scUnknown: |
| 852 | return Builder.getPlan().getOrAddLiveIn(V: cast<SCEVUnknown>(Val: S)->getValue()); |
| 853 | case scVScale: |
| 854 | return Builder.createNaryOp(Opcode: VPInstruction::VScale, Operands: {}, ResultTy: S->getType()); |
| 855 | case scAddExpr: |
| 856 | case scMulExpr: { |
| 857 | if (S->getType()->isPointerTy()) |
| 858 | return nullptr; |
| 859 | auto *NAry = cast<SCEVNAryExpr>(Val: S); |
| 860 | unsigned Opcode = |
| 861 | S->getSCEVType() == scAddExpr ? Instruction::Add : Instruction::Mul; |
| 862 | // Iterate in reverse so that constants are emitted last. |
| 863 | SmallVector<VPValue *, 2> Ops; |
| 864 | for (const SCEVUse &Op : reverse(C: NAry->operands())) { |
| 865 | VPValue *OpV = tryToExpand(S: Op); |
| 866 | if (!OpV) |
| 867 | return nullptr; |
| 868 | Ops.push_back(Elt: OpV); |
| 869 | } |
| 870 | VPIRFlags::WrapFlagsTy WrapFlags(NAry->hasNoUnsignedWrap(), |
| 871 | NAry->hasNoSignedWrap()); |
| 872 | VPValue *Result = Ops.front(); |
| 873 | for (VPValue *Op : drop_begin(RangeOrContainer&: Ops)) |
| 874 | Result = Builder.createOverflowingOp(Opcode, Operands: {Result, Op}, WrapFlags, DL); |
| 875 | return Result; |
| 876 | } |
| 877 | case scUDivExpr: { |
| 878 | auto *UDiv = cast<SCEVUDivExpr>(Val: S); |
| 879 | VPValue *LHS = tryToExpand(S: UDiv->getLHS()); |
| 880 | if (!LHS) |
| 881 | return nullptr; |
| 882 | VPValue *RHS = tryToExpand(S: UDiv->getRHS()); |
| 883 | if (!RHS) |
| 884 | return nullptr; |
| 885 | return Builder.createNaryOp(Opcode: Instruction::UDiv, Operands: {LHS, RHS}, |
| 886 | Flags: VPIRFlags::getDefaultFlags(Opcode: Instruction::UDiv), |
| 887 | DL); |
| 888 | } |
| 889 | default: |
| 890 | return nullptr; |
| 891 | } |
| 892 | } |
| 893 | |