| 1 | //===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "VPlanUtils.h" |
| 10 | #include "VPlanAnalysis.h" |
| 11 | #include "VPlanCFG.h" |
| 12 | #include "VPlanDominatorTree.h" |
| 13 | #include "VPlanPatternMatch.h" |
| 14 | #include "llvm/ADT/TypeSwitch.h" |
| 15 | #include "llvm/Analysis/MemoryLocation.h" |
| 16 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
| 17 | #include "llvm/Analysis/ScalarEvolutionPatternMatch.h" |
| 18 | |
| 19 | using namespace llvm; |
| 20 | using namespace llvm::VPlanPatternMatch; |
| 21 | using namespace llvm::SCEVPatternMatch; |
| 22 | |
| 23 | bool vputils::onlyFirstLaneUsed(const VPValue *Def) { |
| 24 | return all_of(Range: Def->users(), |
| 25 | P: [Def](const VPUser *U) { return U->usesFirstLaneOnly(Op: Def); }); |
| 26 | } |
| 27 | |
| 28 | bool vputils::onlyFirstPartUsed(const VPValue *Def) { |
| 29 | return all_of(Range: Def->users(), |
| 30 | P: [Def](const VPUser *U) { return U->usesFirstPartOnly(Op: Def); }); |
| 31 | } |
| 32 | |
| 33 | bool vputils::onlyScalarValuesUsed(const VPValue *Def) { |
| 34 | return all_of(Range: Def->users(), |
| 35 | P: [Def](const VPUser *U) { return U->usesScalars(Op: Def); }); |
| 36 | } |
| 37 | |
| 38 | VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { |
| 39 | if (auto *E = dyn_cast<SCEVConstant>(Val: Expr)) |
| 40 | return Plan.getOrAddLiveIn(V: E->getValue()); |
| 41 | // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction |
| 42 | // value. Otherwise the value may be defined in a loop and using it directly |
| 43 | // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA |
| 44 | // form. |
| 45 | auto *U = dyn_cast<SCEVUnknown>(Val: Expr); |
| 46 | if (U && !isa<Instruction>(Val: U->getValue())) |
| 47 | return Plan.getOrAddLiveIn(V: U->getValue()); |
| 48 | auto *Expanded = new VPExpandSCEVRecipe(Expr); |
| 49 | Plan.getEntry()->appendRecipe(Recipe: Expanded); |
| 50 | return Expanded; |
| 51 | } |
| 52 | |
| 53 | bool vputils::(const VPValue *V, const VPlan &Plan) { |
| 54 | if (isa<VPActiveLaneMaskPHIRecipe>(Val: V)) |
| 55 | return true; |
| 56 | |
| 57 | auto IsWideCanonicalIV = [](VPValue *A) { |
| 58 | return isa<VPWidenCanonicalIVRecipe>(Val: A) || |
| 59 | (isa<VPWidenIntOrFpInductionRecipe>(Val: A) && |
| 60 | cast<VPWidenIntOrFpInductionRecipe>(Val: A)->isCanonical()); |
| 61 | }; |
| 62 | |
| 63 | VPValue *A, *B; |
| 64 | |
| 65 | auto m_CanonicalScalarIVSteps = m_ScalarIVSteps( |
| 66 | Op0: m_CombineOr(L: m_CanonicalIV(), |
| 67 | R: m_DerivedIV(Op0: m_ZeroInt(), Op1: m_CanonicalIV(), Op2: m_One())), |
| 68 | Op1: m_One(), Op2: m_Specific(VPV: &Plan.getVF())); |
| 69 | |
| 70 | if (match(V, P: m_ActiveLaneMask(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B), Op2: m_One()))) |
| 71 | return B == Plan.getTripCount() && |
| 72 | (match(V: A, P: m_CanonicalScalarIVSteps) || IsWideCanonicalIV(A)); |
| 73 | |
| 74 | // For scalar plans, the header mask uses the scalar steps. |
| 75 | if (match(V, P: m_ICmp(Op0: m_CanonicalScalarIVSteps, |
| 76 | Op1: m_Specific(VPV: Plan.getBackedgeTakenCount())))) { |
| 77 | assert(Plan.hasScalarVFOnly() && |
| 78 | "Non-scalar VF using scalar IV steps for header mask?" ); |
| 79 | return true; |
| 80 | } |
| 81 | |
| 82 | return match(V, P: m_ICmp(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B))) && IsWideCanonicalIV(A) && |
| 83 | B == Plan.getBackedgeTakenCount(); |
| 84 | } |
| 85 | |
| 86 | /// Returns true if \p R propagates poison from any operand to its result. |
| 87 | static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R) { |
| 88 | return TypeSwitch<const VPRecipeBase *, bool>(R) |
| 89 | .Case<VPWidenGEPRecipe, VPWidenCastRecipe>( |
| 90 | caseFn: [](const VPRecipeBase *) { return true; }) |
| 91 | .Case(caseFn: [](const VPReplicateRecipe *Rep) { |
| 92 | // GEP and casts propagate poison from all operands. |
| 93 | unsigned Opcode = Rep->getOpcode(); |
| 94 | return Opcode == Instruction::GetElementPtr || |
| 95 | Instruction::isCast(Opcode); |
| 96 | }) |
| 97 | .Default(defaultFn: [](const VPRecipeBase *) { return false; }); |
| 98 | } |
| 99 | |
| 100 | /// Returns true if \p V being poison is guaranteed to trigger UB because it |
| 101 | /// propagates to the address of a memory recipe. |
| 102 | static bool poisonGuaranteesUB(const VPValue *V) { |
| 103 | SmallPtrSet<const VPValue *, 8> Visited; |
| 104 | SmallVector<const VPValue *, 16> Worklist; |
| 105 | |
| 106 | Worklist.push_back(Elt: V); |
| 107 | |
| 108 | while (!Worklist.empty()) { |
| 109 | const VPValue *Current = Worklist.pop_back_val(); |
| 110 | if (!Visited.insert(Ptr: Current).second) |
| 111 | continue; |
| 112 | |
| 113 | for (VPUser *U : Current->users()) { |
| 114 | // Check if Current is used as an address operand for load/store. |
| 115 | if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(Val: U)) { |
| 116 | if (MemR->getAddr() == Current) |
| 117 | return true; |
| 118 | continue; |
| 119 | } |
| 120 | if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: U)) { |
| 121 | unsigned Opcode = Rep->getOpcode(); |
| 122 | if ((Opcode == Instruction::Load && Rep->getOperand(N: 0) == Current) || |
| 123 | (Opcode == Instruction::Store && Rep->getOperand(N: 1) == Current)) |
| 124 | return true; |
| 125 | } |
| 126 | |
| 127 | // Check if poison propagates through this recipe to any of its users. |
| 128 | auto *R = cast<VPRecipeBase>(Val: U); |
| 129 | for (const VPValue *Op : R->operands()) { |
| 130 | if (Op == Current && propagatesPoisonFromRecipeOp(R)) { |
| 131 | Worklist.push_back(Elt: R->getVPSingleValue()); |
| 132 | break; |
| 133 | } |
| 134 | } |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | return false; |
| 139 | } |
| 140 | |
| 141 | const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V, |
| 142 | PredicatedScalarEvolution &PSE, |
| 143 | const Loop *L) { |
| 144 | ScalarEvolution &SE = *PSE.getSE(); |
| 145 | if (isa<VPIRValue, VPSymbolicValue>(Val: V)) { |
| 146 | Value *LiveIn = V->getUnderlyingValue(); |
| 147 | if (LiveIn && SE.isSCEVable(Ty: LiveIn->getType())) |
| 148 | return SE.getSCEV(V: LiveIn); |
| 149 | return SE.getCouldNotCompute(); |
| 150 | } |
| 151 | |
| 152 | // Helper to create SCEVs for binary and unary operations. |
| 153 | auto CreateSCEV = [&](ArrayRef<VPValue *> Ops, |
| 154 | function_ref<const SCEV *(ArrayRef<SCEVUse>)> CreateFn) |
| 155 | -> const SCEV * { |
| 156 | SmallVector<SCEVUse, 2> SCEVOps; |
| 157 | for (VPValue *Op : Ops) { |
| 158 | const SCEV *S = getSCEVExprForVPValue(V: Op, PSE, L); |
| 159 | if (isa<SCEVCouldNotCompute>(Val: S)) |
| 160 | return SE.getCouldNotCompute(); |
| 161 | SCEVOps.push_back(Elt: S); |
| 162 | } |
| 163 | return CreateFn(SCEVOps); |
| 164 | }; |
| 165 | |
| 166 | VPValue *LHSVal, *RHSVal; |
| 167 | if (match(V, P: m_Add(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 168 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 169 | return SE.getAddExpr(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0); |
| 170 | }); |
| 171 | if (match(V, P: m_Sub(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 172 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 173 | return SE.getMinusSCEV(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0); |
| 174 | }); |
| 175 | if (match(V, P: m_Not(Op0: m_VPValue(V&: LHSVal)))) { |
| 176 | // not X = xor X, -1 = -1 - X |
| 177 | return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 178 | return SE.getMinusSCEV(LHS: SE.getMinusOne(Ty: Ops[0]->getType()), RHS: Ops[0]); |
| 179 | }); |
| 180 | } |
| 181 | if (match(V, P: m_Mul(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 182 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 183 | return SE.getMulExpr(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0); |
| 184 | }); |
| 185 | if (match(V, |
| 186 | P: m_Binary<Instruction::UDiv>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 187 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 188 | return SE.getUDivExpr(LHS: Ops[0], RHS: Ops[1]); |
| 189 | }); |
| 190 | // Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n. |
| 191 | const APInt *Mask; |
| 192 | if (match(V, P: m_c_BinaryAnd(Op0: m_VPValue(V&: LHSVal), Op1: m_APInt(C&: Mask))) && |
| 193 | (*Mask + 1).isPowerOf2()) |
| 194 | return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 195 | return SE.getURemExpr(LHS: Ops[0], RHS: SE.getConstant(Val: *Mask + 1)); |
| 196 | }); |
| 197 | if (match(V, P: m_Trunc(Op0: m_VPValue(V&: LHSVal)))) { |
| 198 | const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan(); |
| 199 | Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V); |
| 200 | return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 201 | return SE.getTruncateExpr(Op: Ops[0], Ty: DestTy); |
| 202 | }); |
| 203 | } |
| 204 | if (match(V, P: m_ZExt(Op0: m_VPValue(V&: LHSVal)))) { |
| 205 | const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan(); |
| 206 | Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V); |
| 207 | return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 208 | return SE.getZeroExtendExpr(Op: Ops[0], Ty: DestTy); |
| 209 | }); |
| 210 | } |
| 211 | if (match(V, P: m_SExt(Op0: m_VPValue(V&: LHSVal)))) { |
| 212 | const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan(); |
| 213 | Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V); |
| 214 | |
| 215 | // Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension |
| 216 | // onto the operands before computing the subtraction. |
| 217 | VPValue *SubLHS, *SubRHS; |
| 218 | auto *SubR = dyn_cast<VPRecipeWithIRFlags>(Val: LHSVal); |
| 219 | if (match(V: LHSVal, P: m_Sub(Op0: m_VPValue(V&: SubLHS), Op1: m_VPValue(V&: SubRHS))) && SubR && |
| 220 | SubR->hasNoSignedWrap() && poisonGuaranteesUB(V: LHSVal)) { |
| 221 | const SCEV *V1 = getSCEVExprForVPValue(V: SubLHS, PSE, L); |
| 222 | const SCEV *V2 = getSCEVExprForVPValue(V: SubRHS, PSE, L); |
| 223 | if (!isa<SCEVCouldNotCompute>(Val: V1) && !isa<SCEVCouldNotCompute>(Val: V2)) |
| 224 | return SE.getMinusSCEV(LHS: SE.getSignExtendExpr(Op: V1, Ty: DestTy), |
| 225 | RHS: SE.getSignExtendExpr(Op: V2, Ty: DestTy), Flags: SCEV::FlagNSW); |
| 226 | } |
| 227 | |
| 228 | return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 229 | return SE.getSignExtendExpr(Op: Ops[0], Ty: DestTy); |
| 230 | }); |
| 231 | } |
| 232 | if (match(V, |
| 233 | P: m_Intrinsic<Intrinsic::umax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 234 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 235 | return SE.getUMaxExpr(LHS: Ops[0], RHS: Ops[1]); |
| 236 | }); |
| 237 | if (match(V, |
| 238 | P: m_Intrinsic<Intrinsic::smax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 239 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 240 | return SE.getSMaxExpr(LHS: Ops[0], RHS: Ops[1]); |
| 241 | }); |
| 242 | if (match(V, |
| 243 | P: m_Intrinsic<Intrinsic::umin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 244 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 245 | return SE.getUMinExpr(LHS: Ops[0], RHS: Ops[1]); |
| 246 | }); |
| 247 | if (match(V, |
| 248 | P: m_Intrinsic<Intrinsic::smin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 249 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) { |
| 250 | return SE.getSMinExpr(LHS: Ops[0], RHS: Ops[1]); |
| 251 | }); |
| 252 | |
| 253 | ArrayRef<VPValue *> Ops; |
| 254 | Type *SourceElementType; |
| 255 | if (match(V, P: m_GetElementPtr(SourceElementType, Operands&: Ops))) { |
| 256 | const SCEV *GEPExpr = CreateSCEV(Ops, [&](ArrayRef<SCEVUse> Ops) { |
| 257 | return SE.getGEPExpr(BaseExpr: Ops.front(), IndexExprs: Ops.drop_front(), SrcElementTy: SourceElementType); |
| 258 | }); |
| 259 | return PSE.getPredicatedSCEV(Expr: GEPExpr); |
| 260 | } |
| 261 | |
| 262 | // TODO: Support constructing SCEVs for more recipes as needed. |
| 263 | const VPRecipeBase *DefR = V->getDefiningRecipe(); |
| 264 | const SCEV *Expr = |
| 265 | TypeSwitch<const VPRecipeBase *, const SCEV *>(DefR) |
| 266 | .Case(caseFn: [](const VPExpandSCEVRecipe *R) { return R->getSCEV(); }) |
| 267 | .Case(caseFn: [&SE, &PSE, L](const VPCanonicalIVPHIRecipe *R) { |
| 268 | if (!L) |
| 269 | return SE.getCouldNotCompute(); |
| 270 | const SCEV *Start = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L); |
| 271 | return SE.getAddRecExpr(Start, Step: SE.getOne(Ty: Start->getType()), L, |
| 272 | Flags: SCEV::FlagAnyWrap); |
| 273 | }) |
| 274 | .Case(caseFn: [&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) { |
| 275 | const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L); |
| 276 | if (!L || isa<SCEVCouldNotCompute>(Val: Step)) |
| 277 | return SE.getCouldNotCompute(); |
| 278 | const SCEV *Start = |
| 279 | getSCEVExprForVPValue(V: R->getStartValue(), PSE, L); |
| 280 | const SCEV *AddRec = |
| 281 | SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap); |
| 282 | if (R->getTruncInst()) |
| 283 | return SE.getTruncateExpr(Op: AddRec, Ty: R->getScalarType()); |
| 284 | return AddRec; |
| 285 | }) |
| 286 | .Case(caseFn: [&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) { |
| 287 | const SCEV *Start = |
| 288 | getSCEVExprForVPValue(V: R->getStartValue(), PSE, L); |
| 289 | if (!L || isa<SCEVCouldNotCompute>(Val: Start)) |
| 290 | return SE.getCouldNotCompute(); |
| 291 | const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L); |
| 292 | if (isa<SCEVCouldNotCompute>(Val: Step)) |
| 293 | return SE.getCouldNotCompute(); |
| 294 | return SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap); |
| 295 | }) |
| 296 | .Case(caseFn: [&SE, &PSE, L](const VPDerivedIVRecipe *R) { |
| 297 | const SCEV *Start = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L); |
| 298 | const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: 1), PSE, L); |
| 299 | const SCEV *Scale = getSCEVExprForVPValue(V: R->getOperand(N: 2), PSE, L); |
| 300 | if (any_of(Range: ArrayRef({Start, IV, Scale}), |
| 301 | P: IsaPred<SCEVCouldNotCompute>)) |
| 302 | return SE.getCouldNotCompute(); |
| 303 | |
| 304 | return SE.getAddExpr( |
| 305 | LHS: SE.getTruncateOrSignExtend(V: Start, Ty: IV->getType()), |
| 306 | RHS: SE.getMulExpr( |
| 307 | LHS: IV, RHS: SE.getTruncateOrSignExtend(V: Scale, Ty: IV->getType()))); |
| 308 | }) |
| 309 | .Case(caseFn: [&SE, &PSE, L](const VPScalarIVStepsRecipe *R) { |
| 310 | const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L); |
| 311 | const SCEV *Step = getSCEVExprForVPValue(V: R->getOperand(N: 1), PSE, L); |
| 312 | if (isa<SCEVCouldNotCompute>(Val: IV) || !isa<SCEVConstant>(Val: Step)) |
| 313 | return SE.getCouldNotCompute(); |
| 314 | return SE.getTruncateOrSignExtend(V: IV, Ty: Step->getType()); |
| 315 | }) |
| 316 | .Default( |
| 317 | defaultFn: [&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); }); |
| 318 | |
| 319 | return PSE.getPredicatedSCEV(Expr); |
| 320 | } |
| 321 | |
| 322 | bool vputils::isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE, |
| 323 | const Loop *L) { |
| 324 | // If address is an SCEVAddExpr, we require that all operands must be either |
| 325 | // be invariant or a (possibly sign-extend) affine AddRec. |
| 326 | if (auto *PtrAdd = dyn_cast<SCEVAddExpr>(Val: Addr)) { |
| 327 | return all_of(Range: PtrAdd->operands(), P: [&SE, L](const SCEV *Op) { |
| 328 | return SE.isLoopInvariant(S: Op, L) || |
| 329 | match(S: Op, P: m_scev_SExt(Op0: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()))) || |
| 330 | match(S: Op, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV())); |
| 331 | }); |
| 332 | } |
| 333 | |
| 334 | // Otherwise, check if address is loop invariant or an affine add recurrence. |
| 335 | return SE.isLoopInvariant(S: Addr, L) || |
| 336 | match(S: Addr, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV())); |
| 337 | } |
| 338 | |
| 339 | /// Returns true if \p Opcode preserves uniformity, i.e., if all operands are |
| 340 | /// uniform, the result will also be uniform. |
| 341 | static bool preservesUniformity(unsigned Opcode) { |
| 342 | if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode)) |
| 343 | return true; |
| 344 | switch (Opcode) { |
| 345 | case Instruction::Freeze: |
| 346 | case Instruction::GetElementPtr: |
| 347 | case Instruction::ICmp: |
| 348 | case Instruction::FCmp: |
| 349 | case Instruction::Select: |
| 350 | case VPInstruction::Not: |
| 351 | case VPInstruction::Broadcast: |
| 352 | case VPInstruction::MaskedCond: |
| 353 | case VPInstruction::PtrAdd: |
| 354 | return true; |
| 355 | default: |
| 356 | return false; |
| 357 | } |
| 358 | } |
| 359 | |
| 360 | bool vputils::isSingleScalar(const VPValue *VPV) { |
| 361 | // A live-in must be uniform across the scope of VPlan. |
| 362 | if (isa<VPIRValue, VPSymbolicValue>(Val: VPV)) |
| 363 | return true; |
| 364 | |
| 365 | if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: VPV)) { |
| 366 | const VPRegionBlock *RegionOfR = Rep->getRegion(); |
| 367 | // Don't consider recipes in replicate regions as uniform yet; their first |
| 368 | // lane cannot be accessed when executing the replicate region for other |
| 369 | // lanes. |
| 370 | if (RegionOfR && RegionOfR->isReplicator()) |
| 371 | return false; |
| 372 | return Rep->isSingleScalar() || (preservesUniformity(Opcode: Rep->getOpcode()) && |
| 373 | all_of(Range: Rep->operands(), P: isSingleScalar)); |
| 374 | } |
| 375 | if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(Val: VPV)) |
| 376 | return all_of(Range: VPV->getDefiningRecipe()->operands(), P: isSingleScalar); |
| 377 | if (auto *WidenR = dyn_cast<VPWidenRecipe>(Val: VPV)) { |
| 378 | return preservesUniformity(Opcode: WidenR->getOpcode()) && |
| 379 | all_of(Range: WidenR->operands(), P: isSingleScalar); |
| 380 | } |
| 381 | if (auto *VPI = dyn_cast<VPInstruction>(Val: VPV)) |
| 382 | return VPI->isSingleScalar() || VPI->isVectorToScalar() || |
| 383 | (preservesUniformity(Opcode: VPI->getOpcode()) && |
| 384 | all_of(Range: VPI->operands(), P: isSingleScalar)); |
| 385 | if (auto *RR = dyn_cast<VPReductionRecipe>(Val: VPV)) |
| 386 | return !RR->isPartialReduction(); |
| 387 | if (isa<VPCanonicalIVPHIRecipe, VPVectorPointerRecipe, |
| 388 | VPVectorEndPointerRecipe>(Val: VPV)) |
| 389 | return true; |
| 390 | if (auto *Expr = dyn_cast<VPExpressionRecipe>(Val: VPV)) |
| 391 | return Expr->isSingleScalar(); |
| 392 | |
| 393 | // VPExpandSCEVRecipes must be placed in the entry and are always uniform. |
| 394 | return isa<VPExpandSCEVRecipe>(Val: VPV); |
| 395 | } |
| 396 | |
| 397 | bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { |
| 398 | // Live-ins are uniform. |
| 399 | if (isa<VPIRValue, VPSymbolicValue>(Val: V)) |
| 400 | return true; |
| 401 | |
| 402 | VPRecipeBase *R = V->getDefiningRecipe(); |
| 403 | VPBasicBlock *VPBB = R ? R->getParent() : nullptr; |
| 404 | VPlan *Plan = VPBB ? VPBB->getPlan() : nullptr; |
| 405 | if (VPBB && |
| 406 | (VPBB == Plan->getVectorPreheader() || VPBB == Plan->getEntry())) { |
| 407 | if (match(V: V->getDefiningRecipe(), |
| 408 | P: m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>())) |
| 409 | return false; |
| 410 | return all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs); |
| 411 | } |
| 412 | |
| 413 | if (VPRegionBlock *EnclosingRegion = VPBB->getEnclosingLoopRegion()) { |
| 414 | // Canonical IV is uniform. |
| 415 | if (V == EnclosingRegion->getCanonicalIV()) |
| 416 | return true; |
| 417 | } |
| 418 | |
| 419 | return TypeSwitch<const VPRecipeBase *, bool>(R) |
| 420 | .Case(caseFn: [](const VPDerivedIVRecipe *R) { return true; }) |
| 421 | .Case(caseFn: [](const VPReplicateRecipe *R) { |
| 422 | // Be conservative about side-effects, except for the |
| 423 | // known-side-effecting assumes and stores, which we know will be |
| 424 | // uniform. |
| 425 | return R->isSingleScalar() && |
| 426 | (!R->mayHaveSideEffects() || |
| 427 | isa<AssumeInst, StoreInst>(Val: R->getUnderlyingInstr())) && |
| 428 | all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs); |
| 429 | }) |
| 430 | .Case(caseFn: [](const VPWidenRecipe *R) { |
| 431 | return preservesUniformity(Opcode: R->getOpcode()) && |
| 432 | all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs); |
| 433 | }) |
| 434 | .Case(caseFn: [](const VPInstruction *VPI) { |
| 435 | return preservesUniformity(Opcode: VPI->getOpcode()) && |
| 436 | all_of(Range: VPI->operands(), P: isUniformAcrossVFsAndUFs); |
| 437 | }) |
| 438 | .Case(caseFn: [](const VPWidenCastRecipe *R) { |
| 439 | // A cast is uniform according to its operand. |
| 440 | return isUniformAcrossVFsAndUFs(V: R->getOperand(N: 0)); |
| 441 | }) |
| 442 | .Default(defaultFn: [](const VPRecipeBase *) { // A value is considered non-uniform |
| 443 | // unless proven otherwise. |
| 444 | return false; |
| 445 | }); |
| 446 | } |
| 447 | |
| 448 | VPBasicBlock *vputils::(VPlan &Plan, VPDominatorTree &VPDT) { |
| 449 | auto DepthFirst = vp_depth_first_shallow(G: Plan.getEntry()); |
| 450 | auto I = find_if(Range&: DepthFirst, P: [&VPDT](VPBlockBase *VPB) { |
| 451 | return VPBlockUtils::isHeader(VPB, VPDT); |
| 452 | }); |
| 453 | return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(Val: *I); |
| 454 | } |
| 455 | |
| 456 | unsigned vputils::getVFScaleFactor(VPRecipeBase *R) { |
| 457 | if (!R) |
| 458 | return 1; |
| 459 | if (auto *RR = dyn_cast<VPReductionPHIRecipe>(Val: R)) |
| 460 | return RR->getVFScaleFactor(); |
| 461 | if (auto *RR = dyn_cast<VPReductionRecipe>(Val: R)) |
| 462 | return RR->getVFScaleFactor(); |
| 463 | if (auto *ER = dyn_cast<VPExpressionRecipe>(Val: R)) |
| 464 | return ER->getVFScaleFactor(); |
| 465 | assert( |
| 466 | (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() != |
| 467 | VPInstruction::ReductionStartVector) && |
| 468 | "getting scaling factor of reduction-start-vector not implemented yet" ); |
| 469 | return 1; |
| 470 | } |
| 471 | |
| 472 | std::optional<VPValue *> |
| 473 | vputils::getRecipesForUncountableExit(SmallVectorImpl<VPInstruction *> &Recipes, |
| 474 | SmallVectorImpl<VPInstruction *> &GEPs, |
| 475 | VPBasicBlock *LatchVPBB) { |
| 476 | // Given a plain CFG VPlan loop with countable latch exiting block |
| 477 | // \p LatchVPBB, we're looking to match the recipes contributing to the |
| 478 | // uncountable exit condition comparison (here, vp<%4>) back to either |
| 479 | // live-ins or the address nodes for the load used as part of the uncountable |
| 480 | // exit comparison so that we can either move them within the loop, or copy |
| 481 | // them to the preheader depending on the chosen method for dealing with |
| 482 | // stores in uncountable exit loops. |
| 483 | // |
| 484 | // Currently, the address of the load is restricted to a GEP with 2 operands |
| 485 | // and a live-in base address. This constraint may be relaxed later. |
| 486 | // |
| 487 | // VPlan ' for UF>=1' { |
| 488 | // Live-in vp<%0> = VF * UF |
| 489 | // Live-in vp<%1> = vector-trip-count |
| 490 | // Live-in ir<20> = original trip-count |
| 491 | // |
| 492 | // ir-bb<entry>: |
| 493 | // Successor(s): scalar.ph, vector.ph |
| 494 | // |
| 495 | // vector.ph: |
| 496 | // Successor(s): for.body |
| 497 | // |
| 498 | // for.body: |
| 499 | // EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| 500 | // EMIT-SCALAR ir<%iv> = phi [ ir<0>, vector.ph ], [ ir<%iv.next>, for.inc ] |
| 501 | // EMIT ir<%uncountable.addr> = getelementptr inbounds nuw ir<%pred>,ir<%iv> |
| 502 | // EMIT ir<%uncountable.val> = load ir<%uncountable.addr> |
| 503 | // EMIT ir<%uncountable.cond> = icmp sgt ir<%uncountable.val>, ir<500> |
| 504 | // EMIT vp<%3> = masked-cond ir<%uncountable.cond> |
| 505 | // Successor(s): for.inc |
| 506 | // |
| 507 | // for.inc: |
| 508 | // EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1> |
| 509 | // EMIT ir<%countable.cond> = icmp eq ir<%iv.next>, ir<20> |
| 510 | // EMIT vp<%index.next> = add nuw vp<%2>, vp<%0> |
| 511 | // EMIT vp<%4> = any-of ir<%3> |
| 512 | // EMIT vp<%5> = icmp eq vp<%index.next>, vp<%1> |
| 513 | // EMIT vp<%6> = or vp<%4>, vp<%5> |
| 514 | // EMIT branch-on-cond vp<%6> |
| 515 | // Successor(s): middle.block, for.body |
| 516 | // |
| 517 | // middle.block: |
| 518 | // Successor(s): ir-bb<exit>, scalar.ph |
| 519 | // |
| 520 | // ir-bb<exit>: |
| 521 | // No successors |
| 522 | // |
| 523 | // scalar.ph: |
| 524 | // } |
| 525 | |
| 526 | // Find the uncountable loop exit condition. |
| 527 | VPValue *UncountableCondition = nullptr; |
| 528 | if (!match(V: LatchVPBB->getTerminator(), |
| 529 | P: m_BranchOnCond(Op0: m_c_BinaryOr( |
| 530 | Op0: m_AnyOf(Op0: m_VPValue(V&: UncountableCondition)), Op1: m_VPValue())))) |
| 531 | return std::nullopt; |
| 532 | |
| 533 | SmallVector<VPValue *, 4> Worklist; |
| 534 | Worklist.push_back(Elt: UncountableCondition); |
| 535 | while (!Worklist.empty()) { |
| 536 | VPValue *V = Worklist.pop_back_val(); |
| 537 | |
| 538 | // Any value defined outside the loop does not need to be copied. |
| 539 | if (V->isDefinedOutsideLoopRegions()) |
| 540 | continue; |
| 541 | |
| 542 | // FIXME: Remove the single user restriction; it's here because we're |
| 543 | // starting with the simplest set of loops we can, and multiple |
| 544 | // users means needing to add PHI nodes in the transform. |
| 545 | if (V->getNumUsers() > 1) |
| 546 | return std::nullopt; |
| 547 | |
| 548 | VPValue *Op1, *Op2; |
| 549 | // Walk back through recipes until we find at least one load from memory. |
| 550 | if (match(V, P: m_ICmp(Op0: m_VPValue(V&: Op1), Op1: m_VPValue(V&: Op2)))) { |
| 551 | Worklist.push_back(Elt: Op1); |
| 552 | Worklist.push_back(Elt: Op2); |
| 553 | Recipes.push_back(Elt: cast<VPInstruction>(Val: V->getDefiningRecipe())); |
| 554 | } else if (match(V, P: m_VPInstruction<Instruction::Load>(Ops: m_VPValue(V&: Op1)))) { |
| 555 | VPRecipeBase *GepR = Op1->getDefiningRecipe(); |
| 556 | // Only matching base + single offset term for now. |
| 557 | if (GepR->getNumOperands() != 2) |
| 558 | return std::nullopt; |
| 559 | // Matching a GEP with a loop-invariant base ptr. |
| 560 | if (!match(V: GepR, P: m_VPInstruction<Instruction::GetElementPtr>( |
| 561 | Ops: m_LiveIn(), Ops: m_VPValue()))) |
| 562 | return std::nullopt; |
| 563 | Recipes.push_back(Elt: cast<VPInstruction>(Val: V->getDefiningRecipe())); |
| 564 | Recipes.push_back(Elt: cast<VPInstruction>(Val: GepR)); |
| 565 | GEPs.push_back(Elt: cast<VPInstruction>(Val: GepR)); |
| 566 | } else if (match(V, P: m_VPInstruction<VPInstruction::MaskedCond>( |
| 567 | Ops: m_VPValue(V&: Op1)))) { |
| 568 | Worklist.push_back(Elt: Op1); |
| 569 | Recipes.push_back(Elt: cast<VPInstruction>(Val: V->getDefiningRecipe())); |
| 570 | } else |
| 571 | return std::nullopt; |
| 572 | } |
| 573 | |
| 574 | // If we couldn't match anything, don't return the condition. It may be |
| 575 | // defined outside the loop. |
| 576 | if (Recipes.empty() || GEPs.empty()) |
| 577 | return std::nullopt; |
| 578 | |
| 579 | return UncountableCondition; |
| 580 | } |
| 581 | |
| 582 | VPSingleDefRecipe *vputils::(VPlan &Plan) { |
| 583 | VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); |
| 584 | SmallVector<VPValue *> WideCanonicalIVs; |
| 585 | auto *FoundWidenCanonicalIVUser = find_if( |
| 586 | Range: LoopRegion->getCanonicalIV()->users(), P: IsaPred<VPWidenCanonicalIVRecipe>); |
| 587 | assert(count_if(LoopRegion->getCanonicalIV()->users(), |
| 588 | IsaPred<VPWidenCanonicalIVRecipe>) <= 1 && |
| 589 | "Must have at most one VPWideCanonicalIVRecipe" ); |
| 590 | if (FoundWidenCanonicalIVUser != |
| 591 | LoopRegion->getCanonicalIV()->users().end()) { |
| 592 | auto *WideCanonicalIV = |
| 593 | cast<VPWidenCanonicalIVRecipe>(Val: *FoundWidenCanonicalIVUser); |
| 594 | WideCanonicalIVs.push_back(Elt: WideCanonicalIV); |
| 595 | } |
| 596 | |
| 597 | // Also include VPWidenIntOrFpInductionRecipes that represent a widened |
| 598 | // version of the canonical induction. |
| 599 | VPBasicBlock * = LoopRegion->getEntryBasicBlock(); |
| 600 | for (VPRecipeBase &Phi : HeaderVPBB->phis()) { |
| 601 | auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(Val: &Phi); |
| 602 | if (WidenOriginalIV && WidenOriginalIV->isCanonical()) |
| 603 | WideCanonicalIVs.push_back(Elt: WidenOriginalIV); |
| 604 | } |
| 605 | |
| 606 | // Walk users of wide canonical IVs and find the single compare of the form |
| 607 | // (ICMP_ULE, WideCanonicalIV, backedge-taken-count). |
| 608 | VPSingleDefRecipe * = nullptr; |
| 609 | for (auto *Wide : WideCanonicalIVs) { |
| 610 | for (VPUser *U : Wide->users()) { |
| 611 | auto *VPI = dyn_cast<VPInstruction>(Val: U); |
| 612 | if (!VPI || !vputils::isHeaderMask(V: VPI, Plan)) |
| 613 | continue; |
| 614 | |
| 615 | assert(VPI->getOperand(0) == Wide && |
| 616 | "WidenCanonicalIV must be the first operand of the compare" ); |
| 617 | assert(!HeaderMask && "Multiple header masks found?" ); |
| 618 | HeaderMask = VPI; |
| 619 | } |
| 620 | } |
| 621 | return HeaderMask; |
| 622 | } |
| 623 | |
| 624 | SmallVector<VPBasicBlock *> |
| 625 | VPBlockUtils::blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, |
| 626 | VPBasicBlock *LastBB) { |
| 627 | assert(FirstBB->getParent() == LastBB->getParent() && |
| 628 | "FirstBB and LastBB from different regions" ); |
| 629 | #ifndef NDEBUG |
| 630 | bool InSingleSuccChain = false; |
| 631 | for (VPBlockBase *Succ = FirstBB; Succ; Succ = Succ->getSingleSuccessor()) |
| 632 | InSingleSuccChain |= (Succ == LastBB); |
| 633 | assert(InSingleSuccChain && |
| 634 | "LastBB unreachable from FirstBB in single-successor chain" ); |
| 635 | #endif |
| 636 | auto Blocks = to_vector( |
| 637 | Range: VPBlockUtils::blocksOnly<VPBasicBlock>(Range: vp_depth_first_deep(G: FirstBB))); |
| 638 | auto *LastIt = find(Range&: Blocks, Val: LastBB); |
| 639 | assert(LastIt != Blocks.end() && |
| 640 | "LastBB unreachable from FirstBB in depth-first traversal" ); |
| 641 | Blocks.erase(CS: std::next(x: LastIt), CE: Blocks.end()); |
| 642 | return Blocks; |
| 643 | } |
| 644 | |
| 645 | bool VPBlockUtils::(const VPBlockBase *VPB, |
| 646 | const VPDominatorTree &VPDT) { |
| 647 | auto *VPBB = dyn_cast<VPBasicBlock>(Val: VPB); |
| 648 | if (!VPBB) |
| 649 | return false; |
| 650 | |
| 651 | // If VPBB is in a region R, VPBB is a loop header if R is a loop region with |
| 652 | // VPBB as its entry, i.e., free of predecessors. |
| 653 | if (auto *R = VPBB->getParent()) |
| 654 | return !R->isReplicator() && !VPBB->hasPredecessors(); |
| 655 | |
| 656 | // A header dominates its second predecessor (the latch), with the other |
| 657 | // predecessor being the preheader |
| 658 | return VPB->getPredecessors().size() == 2 && |
| 659 | VPDT.dominates(A: VPB, B: VPB->getPredecessors()[1]); |
| 660 | } |
| 661 | |
| 662 | bool VPBlockUtils::isLatch(const VPBlockBase *VPB, |
| 663 | const VPDominatorTree &VPDT) { |
| 664 | // A latch has a header as its second successor, with its other successor |
| 665 | // leaving the loop. A preheader OTOH has a header as its first (and only) |
| 666 | // successor. |
| 667 | return VPB->getNumSuccessors() == 2 && |
| 668 | VPBlockUtils::isHeader(VPB: VPB->getSuccessors()[1], VPDT); |
| 669 | } |
| 670 | |
| 671 | std::optional<MemoryLocation> |
| 672 | vputils::getMemoryLocation(const VPRecipeBase &R) { |
| 673 | auto *M = dyn_cast<VPIRMetadata>(Val: &R); |
| 674 | if (!M) |
| 675 | return std::nullopt; |
| 676 | MemoryLocation Loc; |
| 677 | // Populate noalias metadata from VPIRMetadata. |
| 678 | if (MDNode *NoAliasMD = M->getMetadata(Kind: LLVMContext::MD_noalias)) |
| 679 | Loc.AATags.NoAlias = NoAliasMD; |
| 680 | if (MDNode *AliasScopeMD = M->getMetadata(Kind: LLVMContext::MD_alias_scope)) |
| 681 | Loc.AATags.Scope = AliasScopeMD; |
| 682 | return Loc; |
| 683 | } |
| 684 | |
| 685 | /// Find the ComputeReductionResult recipe for \p PhiR, looking through selects |
| 686 | /// inserted for predicated reductions or tail folding. |
| 687 | VPInstruction *vputils::findComputeReductionResult(VPReductionPHIRecipe *PhiR) { |
| 688 | VPValue *BackedgeVal = PhiR->getBackedgeValue(); |
| 689 | if (auto *Res = vputils::findUserOf<VPInstruction::ComputeReductionResult>( |
| 690 | V: BackedgeVal)) |
| 691 | return Res; |
| 692 | |
| 693 | // Look through selects inserted for tail folding or predicated reductions. |
| 694 | VPRecipeBase *SelR = vputils::findUserOf( |
| 695 | V: BackedgeVal, P: m_Select(Op0: m_VPValue(), Op1: m_VPValue(), Op2: m_VPValue())); |
| 696 | if (!SelR) |
| 697 | return nullptr; |
| 698 | return vputils::findUserOf<VPInstruction::ComputeReductionResult>( |
| 699 | V: cast<VPSingleDefRecipe>(Val: SelR)); |
| 700 | } |
| 701 | |