| 1 | //===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "VPlanUtils.h" |
| 10 | #include "VPlanAnalysis.h" |
| 11 | #include "VPlanCFG.h" |
| 12 | #include "VPlanDominatorTree.h" |
| 13 | #include "VPlanPatternMatch.h" |
| 14 | #include "llvm/ADT/TypeSwitch.h" |
| 15 | #include "llvm/Analysis/MemoryLocation.h" |
| 16 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
| 17 | #include "llvm/Analysis/ScalarEvolutionPatternMatch.h" |
| 18 | |
| 19 | using namespace llvm; |
| 20 | using namespace llvm::VPlanPatternMatch; |
| 21 | using namespace llvm::SCEVPatternMatch; |
| 22 | |
| 23 | bool vputils::onlyFirstLaneUsed(const VPValue *Def) { |
| 24 | return all_of(Range: Def->users(), |
| 25 | P: [Def](const VPUser *U) { return U->usesFirstLaneOnly(Op: Def); }); |
| 26 | } |
| 27 | |
| 28 | bool vputils::onlyFirstPartUsed(const VPValue *Def) { |
| 29 | return all_of(Range: Def->users(), |
| 30 | P: [Def](const VPUser *U) { return U->usesFirstPartOnly(Op: Def); }); |
| 31 | } |
| 32 | |
| 33 | bool vputils::onlyScalarValuesUsed(const VPValue *Def) { |
| 34 | return all_of(Range: Def->users(), |
| 35 | P: [Def](const VPUser *U) { return U->usesScalars(Op: Def); }); |
| 36 | } |
| 37 | |
| 38 | VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { |
| 39 | if (auto *E = dyn_cast<SCEVConstant>(Val: Expr)) |
| 40 | return Plan.getOrAddLiveIn(V: E->getValue()); |
| 41 | // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction |
| 42 | // value. Otherwise the value may be defined in a loop and using it directly |
| 43 | // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA |
| 44 | // form. |
| 45 | auto *U = dyn_cast<SCEVUnknown>(Val: Expr); |
| 46 | if (U && !isa<Instruction>(Val: U->getValue())) |
| 47 | return Plan.getOrAddLiveIn(V: U->getValue()); |
| 48 | auto *Expanded = new VPExpandSCEVRecipe(Expr); |
| 49 | Plan.getEntry()->appendRecipe(Recipe: Expanded); |
| 50 | return Expanded; |
| 51 | } |
| 52 | |
| 53 | bool vputils::(const VPValue *V, const VPlan &Plan) { |
| 54 | if (isa<VPActiveLaneMaskPHIRecipe>(Val: V)) |
| 55 | return true; |
| 56 | |
| 57 | auto IsWideCanonicalIV = [](VPValue *A) { |
| 58 | return isa<VPWidenCanonicalIVRecipe>(Val: A) || |
| 59 | (isa<VPWidenIntOrFpInductionRecipe>(Val: A) && |
| 60 | cast<VPWidenIntOrFpInductionRecipe>(Val: A)->isCanonical()); |
| 61 | }; |
| 62 | |
| 63 | VPValue *A, *B; |
| 64 | |
| 65 | auto m_CanonicalScalarIVSteps = |
| 66 | m_ScalarIVSteps(Op0: m_Specific(VPV: Plan.getVectorLoopRegion()->getCanonicalIV()), |
| 67 | Op1: m_One(), Op2: m_Specific(VPV: &Plan.getVF())); |
| 68 | |
| 69 | if (match(V, P: m_ActiveLaneMask(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B), Op2: m_One()))) |
| 70 | return B == Plan.getTripCount() && |
| 71 | (match(V: A, P: m_CanonicalScalarIVSteps) || IsWideCanonicalIV(A)); |
| 72 | |
| 73 | // For scalar plans, the header mask uses the scalar steps. |
| 74 | if (match(V, P: m_ICmp(Op0: m_CanonicalScalarIVSteps, |
| 75 | Op1: m_Specific(VPV: Plan.getBackedgeTakenCount())))) { |
| 76 | assert(Plan.hasScalarVFOnly() && |
| 77 | "Non-scalar VF using scalar IV steps for header mask?" ); |
| 78 | return true; |
| 79 | } |
| 80 | |
| 81 | return match(V, P: m_ICmp(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B))) && IsWideCanonicalIV(A) && |
| 82 | B == Plan.getBackedgeTakenCount(); |
| 83 | } |
| 84 | |
| 85 | /// Returns true if \p R propagates poison from any operand to its result. |
| 86 | static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R) { |
| 87 | return TypeSwitch<const VPRecipeBase *, bool>(R) |
| 88 | .Case<VPWidenGEPRecipe, VPWidenCastRecipe>( |
| 89 | caseFn: [](const VPRecipeBase *) { return true; }) |
| 90 | .Case(caseFn: [](const VPReplicateRecipe *Rep) { |
| 91 | // GEP and casts propagate poison from all operands. |
| 92 | unsigned Opcode = Rep->getOpcode(); |
| 93 | return Opcode == Instruction::GetElementPtr || |
| 94 | Instruction::isCast(Opcode); |
| 95 | }) |
| 96 | .Default(defaultFn: [](const VPRecipeBase *) { return false; }); |
| 97 | } |
| 98 | |
| 99 | /// Returns true if \p V being poison is guaranteed to trigger UB because it |
| 100 | /// propagates to the address of a memory recipe. |
| 101 | static bool poisonGuaranteesUB(const VPValue *V) { |
| 102 | SmallPtrSet<const VPValue *, 8> Visited; |
| 103 | SmallVector<const VPValue *, 16> Worklist; |
| 104 | |
| 105 | Worklist.push_back(Elt: V); |
| 106 | |
| 107 | while (!Worklist.empty()) { |
| 108 | const VPValue *Current = Worklist.pop_back_val(); |
| 109 | if (!Visited.insert(Ptr: Current).second) |
| 110 | continue; |
| 111 | |
| 112 | for (VPUser *U : Current->users()) { |
| 113 | // Check if Current is used as an address operand for load/store. |
| 114 | if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(Val: U)) { |
| 115 | if (MemR->getAddr() == Current) |
| 116 | return true; |
| 117 | continue; |
| 118 | } |
| 119 | if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: U)) { |
| 120 | unsigned Opcode = Rep->getOpcode(); |
| 121 | if ((Opcode == Instruction::Load && Rep->getOperand(N: 0) == Current) || |
| 122 | (Opcode == Instruction::Store && Rep->getOperand(N: 1) == Current)) |
| 123 | return true; |
| 124 | } |
| 125 | |
| 126 | // Check if poison propagates through this recipe to any of its users. |
| 127 | auto *R = cast<VPRecipeBase>(Val: U); |
| 128 | for (const VPValue *Op : R->operands()) { |
| 129 | if (Op == Current && propagatesPoisonFromRecipeOp(R)) { |
| 130 | Worklist.push_back(Elt: R->getVPSingleValue()); |
| 131 | break; |
| 132 | } |
| 133 | } |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | return false; |
| 138 | } |
| 139 | |
| 140 | const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V, |
| 141 | PredicatedScalarEvolution &PSE, |
| 142 | const Loop *L) { |
| 143 | ScalarEvolution &SE = *PSE.getSE(); |
| 144 | if (isa<VPIRValue, VPSymbolicValue>(Val: V)) { |
| 145 | Value *LiveIn = V->getUnderlyingValue(); |
| 146 | if (LiveIn && SE.isSCEVable(Ty: LiveIn->getType())) |
| 147 | return SE.getSCEV(V: LiveIn); |
| 148 | return SE.getCouldNotCompute(); |
| 149 | } |
| 150 | |
| 151 | // Helper to create SCEVs for binary and unary operations. |
| 152 | auto CreateSCEV = |
| 153 | [&](ArrayRef<VPValue *> Ops, |
| 154 | function_ref<const SCEV *(ArrayRef<const SCEV *>)> CreateFn) |
| 155 | -> const SCEV * { |
| 156 | SmallVector<const SCEV *, 2> SCEVOps; |
| 157 | for (VPValue *Op : Ops) { |
| 158 | const SCEV *S = getSCEVExprForVPValue(V: Op, PSE, L); |
| 159 | if (isa<SCEVCouldNotCompute>(Val: S)) |
| 160 | return SE.getCouldNotCompute(); |
| 161 | SCEVOps.push_back(Elt: S); |
| 162 | } |
| 163 | return CreateFn(SCEVOps); |
| 164 | }; |
| 165 | |
| 166 | VPValue *LHSVal, *RHSVal; |
| 167 | if (match(V, P: m_Add(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 168 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| 169 | return SE.getAddExpr(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0); |
| 170 | }); |
| 171 | if (match(V, P: m_Sub(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 172 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| 173 | return SE.getMinusSCEV(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0); |
| 174 | }); |
| 175 | if (match(V, P: m_Not(Op0: m_VPValue(V&: LHSVal)))) { |
| 176 | // not X = xor X, -1 = -1 - X |
| 177 | return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| 178 | return SE.getMinusSCEV(LHS: SE.getMinusOne(Ty: Ops[0]->getType()), RHS: Ops[0]); |
| 179 | }); |
| 180 | } |
| 181 | if (match(V, P: m_Mul(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 182 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| 183 | return SE.getMulExpr(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0); |
| 184 | }); |
| 185 | if (match(V, |
| 186 | P: m_Binary<Instruction::UDiv>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 187 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| 188 | return SE.getUDivExpr(LHS: Ops[0], RHS: Ops[1]); |
| 189 | }); |
| 190 | // Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n. |
| 191 | const APInt *Mask; |
| 192 | if (match(V, P: m_c_BinaryAnd(Op0: m_VPValue(V&: LHSVal), Op1: m_APInt(C&: Mask))) && |
| 193 | (*Mask + 1).isPowerOf2()) |
| 194 | return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| 195 | return SE.getURemExpr(LHS: Ops[0], RHS: SE.getConstant(Val: *Mask + 1)); |
| 196 | }); |
| 197 | if (match(V, P: m_Trunc(Op0: m_VPValue(V&: LHSVal)))) { |
| 198 | const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan(); |
| 199 | Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V); |
| 200 | return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| 201 | return SE.getTruncateExpr(Op: Ops[0], Ty: DestTy); |
| 202 | }); |
| 203 | } |
| 204 | if (match(V, P: m_ZExt(Op0: m_VPValue(V&: LHSVal)))) { |
| 205 | const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan(); |
| 206 | Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V); |
| 207 | return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| 208 | return SE.getZeroExtendExpr(Op: Ops[0], Ty: DestTy); |
| 209 | }); |
| 210 | } |
| 211 | if (match(V, P: m_SExt(Op0: m_VPValue(V&: LHSVal)))) { |
| 212 | const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan(); |
| 213 | Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V); |
| 214 | |
| 215 | // Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension |
| 216 | // onto the operands before computing the subtraction. |
| 217 | VPValue *SubLHS, *SubRHS; |
| 218 | auto *SubR = dyn_cast<VPRecipeWithIRFlags>(Val: LHSVal); |
| 219 | if (match(V: LHSVal, P: m_Sub(Op0: m_VPValue(V&: SubLHS), Op1: m_VPValue(V&: SubRHS))) && SubR && |
| 220 | SubR->hasNoSignedWrap() && poisonGuaranteesUB(V: LHSVal)) { |
| 221 | const SCEV *V1 = getSCEVExprForVPValue(V: SubLHS, PSE, L); |
| 222 | const SCEV *V2 = getSCEVExprForVPValue(V: SubRHS, PSE, L); |
| 223 | if (!isa<SCEVCouldNotCompute>(Val: V1) && !isa<SCEVCouldNotCompute>(Val: V2)) |
| 224 | return SE.getMinusSCEV(LHS: SE.getSignExtendExpr(Op: V1, Ty: DestTy), |
| 225 | RHS: SE.getSignExtendExpr(Op: V2, Ty: DestTy), Flags: SCEV::FlagNSW); |
| 226 | } |
| 227 | |
| 228 | return CreateSCEV({LHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| 229 | return SE.getSignExtendExpr(Op: Ops[0], Ty: DestTy); |
| 230 | }); |
| 231 | } |
| 232 | if (match(V, |
| 233 | P: m_Intrinsic<Intrinsic::umax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 234 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| 235 | return SE.getUMaxExpr(LHS: Ops[0], RHS: Ops[1]); |
| 236 | }); |
| 237 | if (match(V, |
| 238 | P: m_Intrinsic<Intrinsic::smax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 239 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| 240 | return SE.getSMaxExpr(LHS: Ops[0], RHS: Ops[1]); |
| 241 | }); |
| 242 | if (match(V, |
| 243 | P: m_Intrinsic<Intrinsic::umin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 244 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| 245 | return SE.getUMinExpr(LHS: Ops[0], RHS: Ops[1]); |
| 246 | }); |
| 247 | if (match(V, |
| 248 | P: m_Intrinsic<Intrinsic::smin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) |
| 249 | return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<const SCEV *> Ops) { |
| 250 | return SE.getSMinExpr(LHS: Ops[0], RHS: Ops[1]); |
| 251 | }); |
| 252 | |
| 253 | ArrayRef<VPValue *> Ops; |
| 254 | Type *SourceElementType; |
| 255 | if (match(V, P: m_GetElementPtr(SourceElementType, Operands&: Ops))) { |
| 256 | const SCEV *GEPExpr = CreateSCEV(Ops, [&](ArrayRef<const SCEV *> Ops) { |
| 257 | return SE.getGEPExpr(BaseExpr: Ops.front(), IndexExprs: Ops.drop_front(), SrcElementTy: SourceElementType); |
| 258 | }); |
| 259 | return PSE.getPredicatedSCEV(Expr: GEPExpr); |
| 260 | } |
| 261 | |
| 262 | // TODO: Support constructing SCEVs for more recipes as needed. |
| 263 | const VPRecipeBase *DefR = V->getDefiningRecipe(); |
| 264 | const SCEV *Expr = |
| 265 | TypeSwitch<const VPRecipeBase *, const SCEV *>(DefR) |
| 266 | .Case(caseFn: [](const VPExpandSCEVRecipe *R) { return R->getSCEV(); }) |
| 267 | .Case(caseFn: [&SE, &PSE, L](const VPCanonicalIVPHIRecipe *R) { |
| 268 | if (!L) |
| 269 | return SE.getCouldNotCompute(); |
| 270 | const SCEV *Start = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L); |
| 271 | return SE.getAddRecExpr(Start, Step: SE.getOne(Ty: Start->getType()), L, |
| 272 | Flags: SCEV::FlagAnyWrap); |
| 273 | }) |
| 274 | .Case(caseFn: [&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) { |
| 275 | const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L); |
| 276 | if (!L || isa<SCEVCouldNotCompute>(Val: Step)) |
| 277 | return SE.getCouldNotCompute(); |
| 278 | const SCEV *Start = |
| 279 | getSCEVExprForVPValue(V: R->getStartValue(), PSE, L); |
| 280 | const SCEV *AddRec = |
| 281 | SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap); |
| 282 | if (R->getTruncInst()) |
| 283 | return SE.getTruncateExpr(Op: AddRec, Ty: R->getScalarType()); |
| 284 | return AddRec; |
| 285 | }) |
| 286 | .Case(caseFn: [&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) { |
| 287 | const SCEV *Start = |
| 288 | getSCEVExprForVPValue(V: R->getStartValue(), PSE, L); |
| 289 | if (!L || isa<SCEVCouldNotCompute>(Val: Start)) |
| 290 | return SE.getCouldNotCompute(); |
| 291 | const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L); |
| 292 | if (isa<SCEVCouldNotCompute>(Val: Step)) |
| 293 | return SE.getCouldNotCompute(); |
| 294 | return SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap); |
| 295 | }) |
| 296 | .Case(caseFn: [&SE, &PSE, L](const VPDerivedIVRecipe *R) { |
| 297 | const SCEV *Start = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L); |
| 298 | const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: 1), PSE, L); |
| 299 | const SCEV *Scale = getSCEVExprForVPValue(V: R->getOperand(N: 2), PSE, L); |
| 300 | if (any_of(Range: ArrayRef({Start, IV, Scale}), |
| 301 | P: IsaPred<SCEVCouldNotCompute>)) |
| 302 | return SE.getCouldNotCompute(); |
| 303 | |
| 304 | return SE.getAddExpr( |
| 305 | LHS: SE.getTruncateOrSignExtend(V: Start, Ty: IV->getType()), |
| 306 | RHS: SE.getMulExpr( |
| 307 | LHS: IV, RHS: SE.getTruncateOrSignExtend(V: Scale, Ty: IV->getType()))); |
| 308 | }) |
| 309 | .Case(caseFn: [&SE, &PSE, L](const VPScalarIVStepsRecipe *R) { |
| 310 | const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L); |
| 311 | const SCEV *Step = getSCEVExprForVPValue(V: R->getOperand(N: 1), PSE, L); |
| 312 | if (isa<SCEVCouldNotCompute>(Val: IV) || !isa<SCEVConstant>(Val: Step)) |
| 313 | return SE.getCouldNotCompute(); |
| 314 | return SE.getTruncateOrSignExtend(V: IV, Ty: Step->getType()); |
| 315 | }) |
| 316 | .Default( |
| 317 | defaultFn: [&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); }); |
| 318 | |
| 319 | return PSE.getPredicatedSCEV(Expr); |
| 320 | } |
| 321 | |
| 322 | bool vputils::isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE, |
| 323 | const Loop *L) { |
| 324 | // If address is an SCEVAddExpr, we require that all operands must be either |
| 325 | // be invariant or a (possibly sign-extend) affine AddRec. |
| 326 | if (auto *PtrAdd = dyn_cast<SCEVAddExpr>(Val: Addr)) { |
| 327 | return all_of(Range: PtrAdd->operands(), P: [&SE, L](const SCEV *Op) { |
| 328 | return SE.isLoopInvariant(S: Op, L) || |
| 329 | match(S: Op, P: m_scev_SExt(Op0: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()))) || |
| 330 | match(S: Op, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV())); |
| 331 | }); |
| 332 | } |
| 333 | |
| 334 | // Otherwise, check if address is loop invariant or an affine add recurrence. |
| 335 | return SE.isLoopInvariant(S: Addr, L) || |
| 336 | match(S: Addr, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV())); |
| 337 | } |
| 338 | |
| 339 | /// Returns true if \p Opcode preserves uniformity, i.e., if all operands are |
| 340 | /// uniform, the result will also be uniform. |
| 341 | static bool preservesUniformity(unsigned Opcode) { |
| 342 | if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode)) |
| 343 | return true; |
| 344 | switch (Opcode) { |
| 345 | case Instruction::Freeze: |
| 346 | case Instruction::GetElementPtr: |
| 347 | case Instruction::ICmp: |
| 348 | case Instruction::FCmp: |
| 349 | case Instruction::Select: |
| 350 | case VPInstruction::Not: |
| 351 | case VPInstruction::Broadcast: |
| 352 | case VPInstruction::PtrAdd: |
| 353 | return true; |
| 354 | default: |
| 355 | return false; |
| 356 | } |
| 357 | } |
| 358 | |
| 359 | bool vputils::isSingleScalar(const VPValue *VPV) { |
| 360 | // A live-in must be uniform across the scope of VPlan. |
| 361 | if (isa<VPIRValue, VPSymbolicValue>(Val: VPV)) |
| 362 | return true; |
| 363 | |
| 364 | if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: VPV)) { |
| 365 | const VPRegionBlock *RegionOfR = Rep->getRegion(); |
| 366 | // Don't consider recipes in replicate regions as uniform yet; their first |
| 367 | // lane cannot be accessed when executing the replicate region for other |
| 368 | // lanes. |
| 369 | if (RegionOfR && RegionOfR->isReplicator()) |
| 370 | return false; |
| 371 | return Rep->isSingleScalar() || (preservesUniformity(Opcode: Rep->getOpcode()) && |
| 372 | all_of(Range: Rep->operands(), P: isSingleScalar)); |
| 373 | } |
| 374 | if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(Val: VPV)) |
| 375 | return all_of(Range: VPV->getDefiningRecipe()->operands(), P: isSingleScalar); |
| 376 | if (auto *WidenR = dyn_cast<VPWidenRecipe>(Val: VPV)) { |
| 377 | return preservesUniformity(Opcode: WidenR->getOpcode()) && |
| 378 | all_of(Range: WidenR->operands(), P: isSingleScalar); |
| 379 | } |
| 380 | if (auto *VPI = dyn_cast<VPInstruction>(Val: VPV)) |
| 381 | return VPI->isSingleScalar() || VPI->isVectorToScalar() || |
| 382 | (preservesUniformity(Opcode: VPI->getOpcode()) && |
| 383 | all_of(Range: VPI->operands(), P: isSingleScalar)); |
| 384 | if (auto *RR = dyn_cast<VPReductionRecipe>(Val: VPV)) |
| 385 | return !RR->isPartialReduction(); |
| 386 | if (isa<VPCanonicalIVPHIRecipe, VPVectorPointerRecipe, |
| 387 | VPVectorEndPointerRecipe>(Val: VPV)) |
| 388 | return true; |
| 389 | if (auto *Expr = dyn_cast<VPExpressionRecipe>(Val: VPV)) |
| 390 | return Expr->isSingleScalar(); |
| 391 | |
| 392 | // VPExpandSCEVRecipes must be placed in the entry and are always uniform. |
| 393 | return isa<VPExpandSCEVRecipe>(Val: VPV); |
| 394 | } |
| 395 | |
| 396 | bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { |
| 397 | // Live-ins are uniform. |
| 398 | if (isa<VPIRValue, VPSymbolicValue>(Val: V)) |
| 399 | return true; |
| 400 | |
| 401 | VPRecipeBase *R = V->getDefiningRecipe(); |
| 402 | if (R && V->isDefinedOutsideLoopRegions()) { |
| 403 | if (match(V: V->getDefiningRecipe(), |
| 404 | P: m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>())) |
| 405 | return false; |
| 406 | return all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs); |
| 407 | } |
| 408 | |
| 409 | auto *CanonicalIV = |
| 410 | R->getParent()->getEnclosingLoopRegion()->getCanonicalIV(); |
| 411 | // Canonical IV chain is uniform. |
| 412 | if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue()) |
| 413 | return true; |
| 414 | |
| 415 | return TypeSwitch<const VPRecipeBase *, bool>(R) |
| 416 | .Case(caseFn: [](const VPDerivedIVRecipe *R) { return true; }) |
| 417 | .Case(caseFn: [](const VPReplicateRecipe *R) { |
| 418 | // Be conservative about side-effects, except for the |
| 419 | // known-side-effecting assumes and stores, which we know will be |
| 420 | // uniform. |
| 421 | return R->isSingleScalar() && |
| 422 | (!R->mayHaveSideEffects() || |
| 423 | isa<AssumeInst, StoreInst>(Val: R->getUnderlyingInstr())) && |
| 424 | all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs); |
| 425 | }) |
| 426 | .Case(caseFn: [](const VPWidenRecipe *R) { |
| 427 | return preservesUniformity(Opcode: R->getOpcode()) && |
| 428 | all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs); |
| 429 | }) |
| 430 | .Case(caseFn: [](const VPInstruction *VPI) { |
| 431 | return (VPI->isScalarCast() && |
| 432 | isUniformAcrossVFsAndUFs(V: VPI->getOperand(N: 0))) || |
| 433 | (preservesUniformity(Opcode: VPI->getOpcode()) && |
| 434 | all_of(Range: VPI->operands(), P: isUniformAcrossVFsAndUFs)); |
| 435 | }) |
| 436 | .Case(caseFn: [](const VPWidenCastRecipe *R) { |
| 437 | // A cast is uniform according to its operand. |
| 438 | return isUniformAcrossVFsAndUFs(V: R->getOperand(N: 0)); |
| 439 | }) |
| 440 | .Default(defaultFn: [](const VPRecipeBase *) { // A value is considered non-uniform |
| 441 | // unless proven otherwise. |
| 442 | return false; |
| 443 | }); |
| 444 | } |
| 445 | |
| 446 | VPBasicBlock *vputils::(VPlan &Plan, VPDominatorTree &VPDT) { |
| 447 | auto DepthFirst = vp_depth_first_shallow(G: Plan.getEntry()); |
| 448 | auto I = find_if(Range&: DepthFirst, P: [&VPDT](VPBlockBase *VPB) { |
| 449 | return VPBlockUtils::isHeader(VPB, VPDT); |
| 450 | }); |
| 451 | return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(Val: *I); |
| 452 | } |
| 453 | |
| 454 | unsigned vputils::getVFScaleFactor(VPRecipeBase *R) { |
| 455 | if (!R) |
| 456 | return 1; |
| 457 | if (auto *RR = dyn_cast<VPReductionPHIRecipe>(Val: R)) |
| 458 | return RR->getVFScaleFactor(); |
| 459 | if (auto *RR = dyn_cast<VPReductionRecipe>(Val: R)) |
| 460 | return RR->getVFScaleFactor(); |
| 461 | if (auto *ER = dyn_cast<VPExpressionRecipe>(Val: R)) |
| 462 | return ER->getVFScaleFactor(); |
| 463 | assert( |
| 464 | (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() != |
| 465 | VPInstruction::ReductionStartVector) && |
| 466 | "getting scaling factor of reduction-start-vector not implemented yet" ); |
| 467 | return 1; |
| 468 | } |
| 469 | |
| 470 | std::optional<VPValue *> |
| 471 | vputils::getRecipesForUncountableExit(VPlan &Plan, |
| 472 | SmallVectorImpl<VPRecipeBase *> &Recipes, |
| 473 | SmallVectorImpl<VPRecipeBase *> &GEPs) { |
| 474 | // Given a VPlan like the following (just including the recipes contributing |
| 475 | // to loop control exiting here, not the actual work), we're looking to match |
| 476 | // the recipes contributing to the uncountable exit condition comparison |
| 477 | // (here, vp<%4>) back to either live-ins or the address nodes for the load |
| 478 | // used as part of the uncountable exit comparison so that we can copy them |
| 479 | // to a preheader and rotate the address in the loop to the next vector |
| 480 | // iteration. |
| 481 | // |
| 482 | // Currently, the address of the load is restricted to a GEP with 2 operands |
| 483 | // and a live-in base address. This constraint may be relaxed later. |
| 484 | // |
| 485 | // VPlan ' for UF>=1' { |
| 486 | // Live-in vp<%0> = VF |
| 487 | // Live-in ir<64> = original trip-count |
| 488 | // |
| 489 | // entry: |
| 490 | // Successor(s): preheader, vector.ph |
| 491 | // |
| 492 | // vector.ph: |
| 493 | // Successor(s): vector loop |
| 494 | // |
| 495 | // <x1> vector loop: { |
| 496 | // vector.body: |
| 497 | // EMIT vp<%2> = CANONICAL-INDUCTION ir<0> |
| 498 | // vp<%3> = SCALAR-STEPS vp<%2>, ir<1>, vp<%0> |
| 499 | // CLONE ir<%ee.addr> = getelementptr ir<0>, vp<%3> |
| 500 | // WIDEN ir<%ee.load> = load ir<%ee.addr> |
| 501 | // WIDEN vp<%4> = icmp eq ir<%ee.load>, ir<0> |
| 502 | // EMIT vp<%5> = any-of vp<%4> |
| 503 | // EMIT vp<%6> = add vp<%2>, vp<%0> |
| 504 | // EMIT vp<%7> = icmp eq vp<%6>, ir<64> |
| 505 | // EMIT branch-on-two-conds vp<%5>, vp<%7> |
| 506 | // No successors |
| 507 | // } |
| 508 | // Successor(s): early.exit, middle.block |
| 509 | // |
| 510 | // middle.block: |
| 511 | // Successor(s): preheader |
| 512 | // |
| 513 | // preheader: |
| 514 | // No successors |
| 515 | // } |
| 516 | |
| 517 | // Find the uncountable loop exit condition. |
| 518 | auto *Region = Plan.getVectorLoopRegion(); |
| 519 | VPValue *UncountableCondition = nullptr; |
| 520 | if (!match(V: Region->getExitingBasicBlock()->getTerminator(), |
| 521 | P: m_BranchOnTwoConds(Op0: m_AnyOf(Op0: m_VPValue(V&: UncountableCondition)), |
| 522 | Op1: m_VPValue()))) |
| 523 | return std::nullopt; |
| 524 | |
| 525 | SmallVector<VPValue *, 4> Worklist; |
| 526 | Worklist.push_back(Elt: UncountableCondition); |
| 527 | while (!Worklist.empty()) { |
| 528 | VPValue *V = Worklist.pop_back_val(); |
| 529 | |
| 530 | // Any value defined outside the loop does not need to be copied. |
| 531 | if (V->isDefinedOutsideLoopRegions()) |
| 532 | continue; |
| 533 | |
| 534 | // FIXME: Remove the single user restriction; it's here because we're |
| 535 | // starting with the simplest set of loops we can, and multiple |
| 536 | // users means needing to add PHI nodes in the transform. |
| 537 | if (V->getNumUsers() > 1) |
| 538 | return std::nullopt; |
| 539 | |
| 540 | VPValue *Op1, *Op2; |
| 541 | // Walk back through recipes until we find at least one load from memory. |
| 542 | if (match(V, P: m_ICmp(Op0: m_VPValue(V&: Op1), Op1: m_VPValue(V&: Op2)))) { |
| 543 | Worklist.push_back(Elt: Op1); |
| 544 | Worklist.push_back(Elt: Op2); |
| 545 | Recipes.push_back(Elt: V->getDefiningRecipe()); |
| 546 | } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(Val: V)) { |
| 547 | // Reject masked loads for the time being; they make the exit condition |
| 548 | // more complex. |
| 549 | if (Load->isMasked()) |
| 550 | return std::nullopt; |
| 551 | |
| 552 | VPValue *GEP = Load->getAddr(); |
| 553 | if (!match(V: GEP, P: m_GetElementPtr(Op0: m_LiveIn(), Op1: m_VPValue()))) |
| 554 | return std::nullopt; |
| 555 | |
| 556 | Recipes.push_back(Elt: Load); |
| 557 | Recipes.push_back(Elt: GEP->getDefiningRecipe()); |
| 558 | GEPs.push_back(Elt: GEP->getDefiningRecipe()); |
| 559 | } else |
| 560 | return std::nullopt; |
| 561 | } |
| 562 | |
| 563 | return UncountableCondition; |
| 564 | } |
| 565 | |
| 566 | bool VPBlockUtils::(const VPBlockBase *VPB, |
| 567 | const VPDominatorTree &VPDT) { |
| 568 | auto *VPBB = dyn_cast<VPBasicBlock>(Val: VPB); |
| 569 | if (!VPBB) |
| 570 | return false; |
| 571 | |
| 572 | // If VPBB is in a region R, VPBB is a loop header if R is a loop region with |
| 573 | // VPBB as its entry, i.e., free of predecessors. |
| 574 | if (auto *R = VPBB->getParent()) |
| 575 | return !R->isReplicator() && !VPBB->hasPredecessors(); |
| 576 | |
| 577 | // A header dominates its second predecessor (the latch), with the other |
| 578 | // predecessor being the preheader |
| 579 | return VPB->getPredecessors().size() == 2 && |
| 580 | VPDT.dominates(A: VPB, B: VPB->getPredecessors()[1]); |
| 581 | } |
| 582 | |
| 583 | bool VPBlockUtils::isLatch(const VPBlockBase *VPB, |
| 584 | const VPDominatorTree &VPDT) { |
| 585 | // A latch has a header as its second successor, with its other successor |
| 586 | // leaving the loop. A preheader OTOH has a header as its first (and only) |
| 587 | // successor. |
| 588 | return VPB->getNumSuccessors() == 2 && |
| 589 | VPBlockUtils::isHeader(VPB: VPB->getSuccessors()[1], VPDT); |
| 590 | } |
| 591 | |
| 592 | std::optional<MemoryLocation> |
| 593 | vputils::getMemoryLocation(const VPRecipeBase &R) { |
| 594 | auto *M = dyn_cast<VPIRMetadata>(Val: &R); |
| 595 | if (!M) |
| 596 | return std::nullopt; |
| 597 | MemoryLocation Loc; |
| 598 | // Populate noalias metadata from VPIRMetadata. |
| 599 | if (MDNode *NoAliasMD = M->getMetadata(Kind: LLVMContext::MD_noalias)) |
| 600 | Loc.AATags.NoAlias = NoAliasMD; |
| 601 | if (MDNode *AliasScopeMD = M->getMetadata(Kind: LLVMContext::MD_alias_scope)) |
| 602 | Loc.AATags.Scope = AliasScopeMD; |
| 603 | return Loc; |
| 604 | } |
| 605 | |
| 606 | /// Find the ComputeReductionResult recipe for \p PhiR, looking through selects |
| 607 | /// inserted for predicated reductions or tail folding. |
| 608 | VPInstruction *vputils::findComputeReductionResult(VPReductionPHIRecipe *PhiR) { |
| 609 | VPValue *BackedgeVal = PhiR->getBackedgeValue(); |
| 610 | if (auto *Res = vputils::findUserOf<VPInstruction::ComputeReductionResult>( |
| 611 | V: BackedgeVal)) |
| 612 | return Res; |
| 613 | |
| 614 | // Look through selects inserted for tail folding or predicated reductions. |
| 615 | VPRecipeBase *SelR = vputils::findUserOf( |
| 616 | V: BackedgeVal, P: m_Select(Op0: m_VPValue(), Op1: m_VPValue(), Op2: m_VPValue())); |
| 617 | if (!SelR) |
| 618 | return nullptr; |
| 619 | return vputils::findUserOf<VPInstruction::ComputeReductionResult>( |
| 620 | V: cast<VPSingleDefRecipe>(Val: SelR)); |
| 621 | } |
| 622 | |