| 1 | //===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "VPlanAnalysis.h" |
| 10 | #include "VPlan.h" |
| 11 | #include "VPlanCFG.h" |
| 12 | #include "VPlanDominatorTree.h" |
| 13 | #include "llvm/ADT/PostOrderIterator.h" |
| 14 | #include "llvm/ADT/TypeSwitch.h" |
| 15 | #include "llvm/Analysis/ScalarEvolution.h" |
| 16 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 17 | #include "llvm/IR/Instruction.h" |
| 18 | #include "llvm/IR/PatternMatch.h" |
| 19 | #include "llvm/Support/GenericDomTreeConstruction.h" |
| 20 | |
| 21 | using namespace llvm; |
| 22 | |
| 23 | #define DEBUG_TYPE "vplan" |
| 24 | |
| 25 | VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) |
| 26 | : Ctx(Plan.getScalarHeader()->getIRBasicBlock()->getContext()) { |
| 27 | if (auto LoopRegion = Plan.getVectorLoopRegion()) { |
| 28 | if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>( |
| 29 | Val: &LoopRegion->getEntryBasicBlock()->front())) { |
| 30 | CanonicalIVTy = CanIV->getScalarType(); |
| 31 | return; |
| 32 | } |
| 33 | } |
| 34 | |
| 35 | // If there's no canonical IV, retrieve the type from the trip count |
| 36 | // expression. |
| 37 | auto *TC = Plan.getTripCount(); |
| 38 | if (TC->isLiveIn()) { |
| 39 | CanonicalIVTy = TC->getLiveInIRValue()->getType(); |
| 40 | return; |
| 41 | } |
| 42 | CanonicalIVTy = cast<VPExpandSCEVRecipe>(Val: TC)->getSCEV()->getType(); |
| 43 | } |
| 44 | |
| 45 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) { |
| 46 | Type *ResTy = inferScalarType(V: R->getIncomingValue(Idx: 0)); |
| 47 | for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) { |
| 48 | VPValue *Inc = R->getIncomingValue(Idx: I); |
| 49 | assert(inferScalarType(Inc) == ResTy && |
| 50 | "different types inferred for different incoming values" ); |
| 51 | CachedTypes[Inc] = ResTy; |
| 52 | } |
| 53 | return ResTy; |
| 54 | } |
| 55 | |
| 56 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { |
| 57 | // Set the result type from the first operand, check if the types for all |
| 58 | // other operands match and cache them. |
| 59 | auto SetResultTyFromOp = [this, R]() { |
| 60 | Type *ResTy = inferScalarType(V: R->getOperand(N: 0)); |
| 61 | for (unsigned Op = 1; Op != R->getNumOperands(); ++Op) { |
| 62 | VPValue *OtherV = R->getOperand(N: Op); |
| 63 | assert(inferScalarType(OtherV) == ResTy && |
| 64 | "different types inferred for different operands" ); |
| 65 | CachedTypes[OtherV] = ResTy; |
| 66 | } |
| 67 | return ResTy; |
| 68 | }; |
| 69 | |
| 70 | unsigned Opcode = R->getOpcode(); |
| 71 | if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode)) |
| 72 | return SetResultTyFromOp(); |
| 73 | |
| 74 | switch (Opcode) { |
| 75 | case Instruction::ExtractElement: |
| 76 | case Instruction::Freeze: |
| 77 | case VPInstruction::ReductionStartVector: |
| 78 | return inferScalarType(V: R->getOperand(N: 0)); |
| 79 | case Instruction::Select: { |
| 80 | Type *ResTy = inferScalarType(V: R->getOperand(N: 1)); |
| 81 | VPValue *OtherV = R->getOperand(N: 2); |
| 82 | assert(inferScalarType(OtherV) == ResTy && |
| 83 | "different types inferred for different operands" ); |
| 84 | CachedTypes[OtherV] = ResTy; |
| 85 | return ResTy; |
| 86 | } |
| 87 | case Instruction::ICmp: |
| 88 | case VPInstruction::ActiveLaneMask: |
| 89 | assert(inferScalarType(R->getOperand(0)) == |
| 90 | inferScalarType(R->getOperand(1)) && |
| 91 | "different types inferred for different operands" ); |
| 92 | return IntegerType::get(C&: Ctx, NumBits: 1); |
| 93 | case VPInstruction::ComputeAnyOfResult: |
| 94 | return inferScalarType(V: R->getOperand(N: 1)); |
| 95 | case VPInstruction::ComputeFindIVResult: |
| 96 | case VPInstruction::ComputeReductionResult: { |
| 97 | return inferScalarType(V: R->getOperand(N: 0)); |
| 98 | } |
| 99 | case VPInstruction::ExplicitVectorLength: |
| 100 | return Type::getIntNTy(C&: Ctx, N: 32); |
| 101 | case Instruction::PHI: |
| 102 | // Infer the type of first operand only, as other operands of header phi's |
| 103 | // may lead to infinite recursion. |
| 104 | return inferScalarType(V: R->getOperand(N: 0)); |
| 105 | case VPInstruction::FirstOrderRecurrenceSplice: |
| 106 | case VPInstruction::Not: |
| 107 | case VPInstruction::CalculateTripCountMinusVF: |
| 108 | case VPInstruction::CanonicalIVIncrementForPart: |
| 109 | case VPInstruction::AnyOf: |
| 110 | case VPInstruction::BuildStructVector: |
| 111 | case VPInstruction::BuildVector: |
| 112 | return SetResultTyFromOp(); |
| 113 | case VPInstruction::FirstActiveLane: |
| 114 | return Type::getIntNTy(C&: Ctx, N: 64); |
| 115 | case VPInstruction::ExtractLastElement: |
| 116 | case VPInstruction::ExtractPenultimateElement: { |
| 117 | Type *BaseTy = inferScalarType(V: R->getOperand(N: 0)); |
| 118 | if (auto *VecTy = dyn_cast<VectorType>(Val: BaseTy)) |
| 119 | return VecTy->getElementType(); |
| 120 | return BaseTy; |
| 121 | } |
| 122 | case VPInstruction::LogicalAnd: |
| 123 | assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) && |
| 124 | inferScalarType(R->getOperand(1))->isIntegerTy(1) && |
| 125 | "LogicalAnd operands should be bool" ); |
| 126 | return IntegerType::get(C&: Ctx, NumBits: 1); |
| 127 | case VPInstruction::Broadcast: |
| 128 | case VPInstruction::PtrAdd: |
| 129 | // Return the type based on first operand. |
| 130 | return inferScalarType(V: R->getOperand(N: 0)); |
| 131 | case VPInstruction::BranchOnCond: |
| 132 | case VPInstruction::BranchOnCount: |
| 133 | return Type::getVoidTy(C&: Ctx); |
| 134 | default: |
| 135 | break; |
| 136 | } |
| 137 | // Type inference not implemented for opcode. |
| 138 | LLVM_DEBUG({ |
| 139 | dbgs() << "LV: Found unhandled opcode for: " ; |
| 140 | R->getVPSingleValue()->dump(); |
| 141 | }); |
| 142 | llvm_unreachable("Unhandled opcode!" ); |
| 143 | } |
| 144 | |
| 145 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) { |
| 146 | unsigned Opcode = R->getOpcode(); |
| 147 | if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || |
| 148 | Instruction::isBitwiseLogicOp(Opcode)) { |
| 149 | Type *ResTy = inferScalarType(V: R->getOperand(N: 0)); |
| 150 | assert(ResTy == inferScalarType(R->getOperand(1)) && |
| 151 | "types for both operands must match for binary op" ); |
| 152 | CachedTypes[R->getOperand(N: 1)] = ResTy; |
| 153 | return ResTy; |
| 154 | } |
| 155 | |
| 156 | switch (Opcode) { |
| 157 | case Instruction::ICmp: |
| 158 | case Instruction::FCmp: |
| 159 | return IntegerType::get(C&: Ctx, NumBits: 1); |
| 160 | case Instruction::FNeg: |
| 161 | case Instruction::Freeze: |
| 162 | return inferScalarType(V: R->getOperand(N: 0)); |
| 163 | case Instruction::ExtractValue: { |
| 164 | assert(R->getNumOperands() == 2 && "expected single level extractvalue" ); |
| 165 | auto *StructTy = cast<StructType>(Val: inferScalarType(V: R->getOperand(N: 0))); |
| 166 | auto *CI = cast<ConstantInt>(Val: R->getOperand(N: 1)->getLiveInIRValue()); |
| 167 | return StructTy->getTypeAtIndex(N: CI->getZExtValue()); |
| 168 | } |
| 169 | default: |
| 170 | break; |
| 171 | } |
| 172 | |
| 173 | // Type inference not implemented for opcode. |
| 174 | LLVM_DEBUG({ |
| 175 | dbgs() << "LV: Found unhandled opcode for: " ; |
| 176 | R->getVPSingleValue()->dump(); |
| 177 | }); |
| 178 | llvm_unreachable("Unhandled opcode!" ); |
| 179 | } |
| 180 | |
| 181 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) { |
| 182 | auto &CI = *cast<CallInst>(Val: R->getUnderlyingInstr()); |
| 183 | return CI.getType(); |
| 184 | } |
| 185 | |
| 186 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) { |
| 187 | assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) && |
| 188 | "Store recipes should not define any values" ); |
| 189 | return cast<LoadInst>(Val: &R->getIngredient())->getType(); |
| 190 | } |
| 191 | |
| 192 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenSelectRecipe *R) { |
| 193 | Type *ResTy = inferScalarType(V: R->getOperand(N: 1)); |
| 194 | VPValue *OtherV = R->getOperand(N: 2); |
| 195 | assert(inferScalarType(OtherV) == ResTy && |
| 196 | "different types inferred for different operands" ); |
| 197 | CachedTypes[OtherV] = ResTy; |
| 198 | return ResTy; |
| 199 | } |
| 200 | |
| 201 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) { |
| 202 | unsigned Opcode = R->getUnderlyingInstr()->getOpcode(); |
| 203 | |
| 204 | if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || |
| 205 | Instruction::isBitwiseLogicOp(Opcode)) { |
| 206 | Type *ResTy = inferScalarType(V: R->getOperand(N: 0)); |
| 207 | assert(ResTy == inferScalarType(R->getOperand(1)) && |
| 208 | "inferred types for operands of binary op don't match" ); |
| 209 | CachedTypes[R->getOperand(N: 1)] = ResTy; |
| 210 | return ResTy; |
| 211 | } |
| 212 | |
| 213 | if (Instruction::isCast(Opcode)) |
| 214 | return R->getUnderlyingInstr()->getType(); |
| 215 | |
| 216 | switch (Opcode) { |
| 217 | case Instruction::Call: { |
| 218 | unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1); |
| 219 | return cast<Function>(Val: R->getOperand(N: CallIdx)->getLiveInIRValue()) |
| 220 | ->getReturnType(); |
| 221 | } |
| 222 | case Instruction::Select: { |
| 223 | Type *ResTy = inferScalarType(V: R->getOperand(N: 1)); |
| 224 | assert(ResTy == inferScalarType(R->getOperand(2)) && |
| 225 | "inferred types for operands of select op don't match" ); |
| 226 | CachedTypes[R->getOperand(N: 2)] = ResTy; |
| 227 | return ResTy; |
| 228 | } |
| 229 | case Instruction::ICmp: |
| 230 | case Instruction::FCmp: |
| 231 | return IntegerType::get(C&: Ctx, NumBits: 1); |
| 232 | case Instruction::Alloca: |
| 233 | case Instruction::ExtractValue: |
| 234 | return R->getUnderlyingInstr()->getType(); |
| 235 | case Instruction::Freeze: |
| 236 | case Instruction::FNeg: |
| 237 | case Instruction::GetElementPtr: |
| 238 | return inferScalarType(V: R->getOperand(N: 0)); |
| 239 | case Instruction::Load: |
| 240 | return cast<LoadInst>(Val: R->getUnderlyingInstr())->getType(); |
| 241 | case Instruction::Store: |
| 242 | // FIXME: VPReplicateRecipes with store opcodes still define a result |
| 243 | // VPValue, so we need to handle them here. Remove the code here once this |
| 244 | // is modeled accurately in VPlan. |
| 245 | return Type::getVoidTy(C&: Ctx); |
| 246 | default: |
| 247 | break; |
| 248 | } |
| 249 | // Type inference not implemented for opcode. |
| 250 | LLVM_DEBUG({ |
| 251 | dbgs() << "LV: Found unhandled opcode for: " ; |
| 252 | R->getVPSingleValue()->dump(); |
| 253 | }); |
| 254 | llvm_unreachable("Unhandled opcode" ); |
| 255 | } |
| 256 | |
| 257 | Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { |
| 258 | if (Type *CachedTy = CachedTypes.lookup(Val: V)) |
| 259 | return CachedTy; |
| 260 | |
| 261 | if (V->isLiveIn()) { |
| 262 | if (auto *IRValue = V->getLiveInIRValue()) |
| 263 | return IRValue->getType(); |
| 264 | // All VPValues without any underlying IR value (like the vector trip count |
| 265 | // or the backedge-taken count) have the same type as the canonical IV. |
| 266 | return CanonicalIVTy; |
| 267 | } |
| 268 | |
| 269 | Type *ResultTy = |
| 270 | TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe()) |
| 271 | .Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe, |
| 272 | VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe, |
| 273 | VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>( |
| 274 | caseFn: [this](const auto *R) { |
| 275 | // Handle header phi recipes, except VPWidenIntOrFpInduction |
| 276 | // which needs special handling due it being possibly truncated. |
| 277 | // TODO: consider inferring/caching type of siblings, e.g., |
| 278 | // backedge value, here and in cases below. |
| 279 | return inferScalarType(V: R->getStartValue()); |
| 280 | }) |
| 281 | .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>( |
| 282 | caseFn: [](const auto *R) { return R->getScalarType(); }) |
| 283 | .Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe, |
| 284 | VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe, |
| 285 | VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe, |
| 286 | VPPartialReductionRecipe>(caseFn: [this](const VPRecipeBase *R) { |
| 287 | return inferScalarType(V: R->getOperand(N: 0)); |
| 288 | }) |
| 289 | // VPInstructionWithType must be handled before VPInstruction. |
| 290 | .Case<VPInstructionWithType, VPWidenIntrinsicRecipe, |
| 291 | VPWidenCastRecipe>( |
| 292 | caseFn: [](const auto *R) { return R->getResultType(); }) |
| 293 | .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe, |
| 294 | VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>( |
| 295 | caseFn: [this](const auto *R) { return inferScalarTypeForRecipe(R); }) |
| 296 | .Case<VPInterleaveRecipe>(caseFn: [V](const VPInterleaveRecipe *R) { |
| 297 | // TODO: Use info from interleave group. |
| 298 | return V->getUnderlyingValue()->getType(); |
| 299 | }) |
| 300 | .Case<VPExpandSCEVRecipe>(caseFn: [](const VPExpandSCEVRecipe *R) { |
| 301 | return R->getSCEV()->getType(); |
| 302 | }) |
| 303 | .Case<VPReductionRecipe>(caseFn: [this](const auto *R) { |
| 304 | return inferScalarType(V: R->getChainOp()); |
| 305 | }) |
| 306 | .Case<VPExpressionRecipe>(caseFn: [this](const auto *R) { |
| 307 | return inferScalarType(V: R->getOperandOfResultType()); |
| 308 | }); |
| 309 | |
| 310 | assert(ResultTy && "could not infer type for the given VPValue" ); |
| 311 | CachedTypes[V] = ResultTy; |
| 312 | return ResultTy; |
| 313 | } |
| 314 | |
| 315 | void llvm::collectEphemeralRecipesForVPlan( |
| 316 | VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) { |
| 317 | // First, collect seed recipes which are operands of assumes. |
| 318 | SmallVector<VPRecipeBase *> Worklist; |
| 319 | for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( |
| 320 | Range: vp_depth_first_deep(G: Plan.getVectorLoopRegion()->getEntry()))) { |
| 321 | for (VPRecipeBase &R : *VPBB) { |
| 322 | auto *RepR = dyn_cast<VPReplicateRecipe>(Val: &R); |
| 323 | if (!RepR || !match(V: RepR->getUnderlyingInstr(), |
| 324 | P: PatternMatch::m_Intrinsic<Intrinsic::assume>())) |
| 325 | continue; |
| 326 | Worklist.push_back(Elt: RepR); |
| 327 | EphRecipes.insert(V: RepR); |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | // Process operands of candidates in worklist and add them to the set of |
| 332 | // ephemeral recipes, if they don't have side-effects and are only used by |
| 333 | // other ephemeral recipes. |
| 334 | while (!Worklist.empty()) { |
| 335 | VPRecipeBase *Cur = Worklist.pop_back_val(); |
| 336 | for (VPValue *Op : Cur->operands()) { |
| 337 | auto *OpR = Op->getDefiningRecipe(); |
| 338 | if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(V: OpR)) |
| 339 | continue; |
| 340 | if (any_of(Range: Op->users(), P: [EphRecipes](VPUser *U) { |
| 341 | auto *UR = dyn_cast<VPRecipeBase>(Val: U); |
| 342 | return !UR || !EphRecipes.contains(V: UR); |
| 343 | })) |
| 344 | continue; |
| 345 | EphRecipes.insert(V: OpR); |
| 346 | Worklist.push_back(Elt: OpR); |
| 347 | } |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>( |
| 352 | DominatorTreeBase<VPBlockBase, false> &DT); |
| 353 | |
| 354 | bool VPDominatorTree::properlyDominates(const VPRecipeBase *A, |
| 355 | const VPRecipeBase *B) { |
| 356 | if (A == B) |
| 357 | return false; |
| 358 | |
| 359 | auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) { |
| 360 | for (auto &R : *A->getParent()) { |
| 361 | if (&R == A) |
| 362 | return true; |
| 363 | if (&R == B) |
| 364 | return false; |
| 365 | } |
| 366 | llvm_unreachable("recipe not found" ); |
| 367 | }; |
| 368 | const VPBlockBase *ParentA = A->getParent(); |
| 369 | const VPBlockBase *ParentB = B->getParent(); |
| 370 | if (ParentA == ParentB) |
| 371 | return LocalComesBefore(A, B); |
| 372 | |
| 373 | #ifndef NDEBUG |
| 374 | auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * { |
| 375 | auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent()); |
| 376 | if (Region && Region->isReplicator()) { |
| 377 | assert(Region->getNumSuccessors() == 1 && |
| 378 | Region->getNumPredecessors() == 1 && "Expected SESE region!" ); |
| 379 | assert(R->getParent()->size() == 1 && |
| 380 | "A recipe in an original replicator region must be the only " |
| 381 | "recipe in its block" ); |
| 382 | return Region; |
| 383 | } |
| 384 | return nullptr; |
| 385 | }; |
| 386 | assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) && |
| 387 | "No replicate regions expected at this point" ); |
| 388 | assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) && |
| 389 | "No replicate regions expected at this point" ); |
| 390 | #endif |
| 391 | return Base::properlyDominates(A: ParentA, B: ParentB); |
| 392 | } |
| 393 | |
| 394 | /// Get the VF scaling factor applied to the recipe's output, if the recipe has |
| 395 | /// one. |
| 396 | static unsigned getVFScaleFactor(VPRecipeBase *R) { |
| 397 | if (auto *RR = dyn_cast<VPReductionPHIRecipe>(Val: R)) |
| 398 | return RR->getVFScaleFactor(); |
| 399 | if (auto *RR = dyn_cast<VPPartialReductionRecipe>(Val: R)) |
| 400 | return RR->getVFScaleFactor(); |
| 401 | assert( |
| 402 | (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() != |
| 403 | VPInstruction::ReductionStartVector) && |
| 404 | "getting scaling factor of reduction-start-vector not implemented yet" ); |
| 405 | return 1; |
| 406 | } |
| 407 | |
| 408 | bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI) const { |
| 409 | return any_of(Range: MaxLocalUsers, P: [&TTI](auto &LU) { |
| 410 | return LU.second > TTI.getNumberOfRegisters(ClassID: LU.first); |
| 411 | }); |
| 412 | } |
| 413 | |
| 414 | SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan( |
| 415 | VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI, |
| 416 | const SmallPtrSetImpl<const Value *> &ValuesToIgnore) { |
| 417 | // Each 'key' in the map opens a new interval. The values |
| 418 | // of the map are the index of the 'last seen' usage of the |
| 419 | // recipe that is the key. |
| 420 | using IntervalMap = SmallDenseMap<VPRecipeBase *, unsigned, 16>; |
| 421 | |
| 422 | // Maps indices to recipes. |
| 423 | SmallVector<VPRecipeBase *, 64> Idx2Recipe; |
| 424 | // Marks the end of each interval. |
| 425 | IntervalMap EndPoint; |
| 426 | // Saves the list of recipe indices that are used in the loop. |
| 427 | SmallPtrSet<VPRecipeBase *, 8> Ends; |
| 428 | // Saves the list of values that are used in the loop but are defined outside |
| 429 | // the loop (not including non-recipe values such as arguments and |
| 430 | // constants). |
| 431 | SmallSetVector<VPValue *, 8> LoopInvariants; |
| 432 | LoopInvariants.insert(X: &Plan.getVectorTripCount()); |
| 433 | |
| 434 | // We scan the loop in a topological order in order and assign a number to |
| 435 | // each recipe. We use RPO to ensure that defs are met before their users. We |
| 436 | // assume that each recipe that has in-loop users starts an interval. We |
| 437 | // record every time that an in-loop value is used, so we have a list of the |
| 438 | // first and last occurrences of each recipe. |
| 439 | ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( |
| 440 | Plan.getVectorLoopRegion()); |
| 441 | for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Range: RPOT)) { |
| 442 | if (!VPBB->getParent()) |
| 443 | break; |
| 444 | for (VPRecipeBase &R : *VPBB) { |
| 445 | Idx2Recipe.push_back(Elt: &R); |
| 446 | |
| 447 | // Save the end location of each USE. |
| 448 | for (VPValue *U : R.operands()) { |
| 449 | auto *DefR = U->getDefiningRecipe(); |
| 450 | |
| 451 | // Ignore non-recipe values such as arguments, constants, etc. |
| 452 | // FIXME: Might need some motivation why these values are ignored. If |
| 453 | // for example an argument is used inside the loop it will increase the |
| 454 | // register pressure (so shouldn't we add it to LoopInvariants). |
| 455 | if (!DefR && (!U->getLiveInIRValue() || |
| 456 | !isa<Instruction>(Val: U->getLiveInIRValue()))) |
| 457 | continue; |
| 458 | |
| 459 | // If this recipe is outside the loop then record it and continue. |
| 460 | if (!DefR) { |
| 461 | LoopInvariants.insert(X: U); |
| 462 | continue; |
| 463 | } |
| 464 | |
| 465 | // Overwrite previous end points. |
| 466 | EndPoint[DefR] = Idx2Recipe.size(); |
| 467 | Ends.insert(Ptr: DefR); |
| 468 | } |
| 469 | } |
| 470 | if (VPBB == Plan.getVectorLoopRegion()->getExiting()) { |
| 471 | // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the |
| 472 | // exiting block, where their increment will get materialized eventually. |
| 473 | for (auto &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { |
| 474 | if (isa<VPWidenIntOrFpInductionRecipe>(Val: &R)) { |
| 475 | EndPoint[&R] = Idx2Recipe.size(); |
| 476 | Ends.insert(Ptr: &R); |
| 477 | } |
| 478 | } |
| 479 | } |
| 480 | } |
| 481 | |
| 482 | // Saves the list of intervals that end with the index in 'key'. |
| 483 | using RecipeList = SmallVector<VPRecipeBase *, 2>; |
| 484 | SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds; |
| 485 | |
| 486 | // Next, we transpose the EndPoints into a multi map that holds the list of |
| 487 | // intervals that *end* at a specific location. |
| 488 | for (auto &Interval : EndPoint) |
| 489 | TransposeEnds[Interval.second].push_back(Elt: Interval.first); |
| 490 | |
| 491 | SmallPtrSet<VPRecipeBase *, 8> OpenIntervals; |
| 492 | SmallVector<VPRegisterUsage, 8> RUs(VFs.size()); |
| 493 | SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size()); |
| 494 | |
| 495 | LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n" ); |
| 496 | |
| 497 | VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); |
| 498 | |
| 499 | const auto &TTICapture = TTI; |
| 500 | auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned { |
| 501 | if (Ty->isTokenTy() || !VectorType::isValidElementType(ElemTy: Ty) || |
| 502 | (VF.isScalable() && |
| 503 | !TTICapture.isElementTypeLegalForScalableVector(Ty))) |
| 504 | return 0; |
| 505 | return TTICapture.getRegUsageForType(Ty: VectorType::get(ElementType: Ty, EC: VF)); |
| 506 | }; |
| 507 | |
| 508 | // We scan the instructions linearly and record each time that a new interval |
| 509 | // starts, by placing it in a set. If we find this value in TransposEnds then |
| 510 | // we remove it from the set. The max register usage is the maximum register |
| 511 | // usage of the recipes of the set. |
| 512 | for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) { |
| 513 | VPRecipeBase *R = Idx2Recipe[Idx]; |
| 514 | |
| 515 | // Remove all of the recipes that end at this location. |
| 516 | RecipeList &List = TransposeEnds[Idx]; |
| 517 | for (VPRecipeBase *ToRemove : List) |
| 518 | OpenIntervals.erase(Ptr: ToRemove); |
| 519 | |
| 520 | // Ignore recipes that are never used within the loop and do not have side |
| 521 | // effects. |
| 522 | if (!Ends.count(Ptr: R) && !R->mayHaveSideEffects()) |
| 523 | continue; |
| 524 | |
| 525 | // Skip recipes for ignored values. |
| 526 | // TODO: Should mark recipes for ephemeral values that cannot be removed |
| 527 | // explictly in VPlan. |
| 528 | if (isa<VPSingleDefRecipe>(Val: R) && |
| 529 | ValuesToIgnore.contains( |
| 530 | Ptr: cast<VPSingleDefRecipe>(Val: R)->getUnderlyingValue())) |
| 531 | continue; |
| 532 | |
| 533 | // For each VF find the maximum usage of registers. |
| 534 | for (unsigned J = 0, E = VFs.size(); J < E; ++J) { |
| 535 | // Count the number of registers used, per register class, given all open |
| 536 | // intervals. |
| 537 | // Note that elements in this SmallMapVector will be default constructed |
| 538 | // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if |
| 539 | // there is no previous entry for ClassID. |
| 540 | SmallMapVector<unsigned, unsigned, 4> RegUsage; |
| 541 | |
| 542 | for (auto *R : OpenIntervals) { |
| 543 | // Skip recipes that weren't present in the original loop. |
| 544 | // TODO: Remove after removing the legacy |
| 545 | // LoopVectorizationCostModel::calculateRegisterUsage |
| 546 | if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe, |
| 547 | VPBranchOnMaskRecipe>(Val: R)) |
| 548 | continue; |
| 549 | |
| 550 | if (VFs[J].isScalar() || |
| 551 | isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe, |
| 552 | VPScalarIVStepsRecipe>(Val: R) || |
| 553 | (isa<VPInstruction>(Val: R) && |
| 554 | all_of(Range: cast<VPSingleDefRecipe>(Val: R)->users(), |
| 555 | P: [&](VPUser *U) { |
| 556 | return cast<VPRecipeBase>(Val: U)->usesScalars( |
| 557 | Op: R->getVPSingleValue()); |
| 558 | })) || |
| 559 | (isa<VPReductionPHIRecipe>(Val: R) && |
| 560 | (cast<VPReductionPHIRecipe>(Val: R))->isInLoop())) { |
| 561 | unsigned ClassID = TTI.getRegisterClassForType( |
| 562 | Vector: false, Ty: TypeInfo.inferScalarType(V: R->getVPSingleValue())); |
| 563 | // FIXME: The target might use more than one register for the type |
| 564 | // even in the scalar case. |
| 565 | RegUsage[ClassID] += 1; |
| 566 | } else { |
| 567 | // The output from scaled phis and scaled reductions actually has |
| 568 | // fewer lanes than the VF. |
| 569 | unsigned ScaleFactor = getVFScaleFactor(R); |
| 570 | ElementCount VF = VFs[J].divideCoefficientBy(RHS: ScaleFactor); |
| 571 | LLVM_DEBUG(if (VF != VFs[J]) { |
| 572 | dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF |
| 573 | << " for " << *R << "\n" ; |
| 574 | }); |
| 575 | |
| 576 | for (VPValue *DefV : R->definedValues()) { |
| 577 | Type *ScalarTy = TypeInfo.inferScalarType(V: DefV); |
| 578 | unsigned ClassID = TTI.getRegisterClassForType(Vector: true, Ty: ScalarTy); |
| 579 | RegUsage[ClassID] += GetRegUsage(ScalarTy, VF); |
| 580 | } |
| 581 | } |
| 582 | } |
| 583 | |
| 584 | for (const auto &Pair : RegUsage) { |
| 585 | auto &Entry = MaxUsages[J][Pair.first]; |
| 586 | Entry = std::max(a: Entry, b: Pair.second); |
| 587 | } |
| 588 | } |
| 589 | |
| 590 | LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # " |
| 591 | << OpenIntervals.size() << '\n'); |
| 592 | |
| 593 | // Add the current recipe to the list of open intervals. |
| 594 | OpenIntervals.insert(Ptr: R); |
| 595 | } |
| 596 | |
| 597 | // We also search for instructions that are defined outside the loop, but are |
| 598 | // used inside the loop. We need this number separately from the max-interval |
| 599 | // usage number because when we unroll, loop-invariant values do not take |
| 600 | // more register. |
| 601 | VPRegisterUsage RU; |
| 602 | for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) { |
| 603 | // Note that elements in this SmallMapVector will be default constructed |
| 604 | // as 0. So we can use "Invariant[ClassID] += n" in the code below even if |
| 605 | // there is no previous entry for ClassID. |
| 606 | SmallMapVector<unsigned, unsigned, 4> Invariant; |
| 607 | |
| 608 | for (auto *In : LoopInvariants) { |
| 609 | // FIXME: The target might use more than one register for the type |
| 610 | // even in the scalar case. |
| 611 | bool IsScalar = all_of(Range: In->users(), P: [&](VPUser *U) { |
| 612 | return cast<VPRecipeBase>(Val: U)->usesScalars(Op: In); |
| 613 | }); |
| 614 | |
| 615 | ElementCount VF = IsScalar ? ElementCount::getFixed(MinVal: 1) : VFs[Idx]; |
| 616 | unsigned ClassID = TTI.getRegisterClassForType( |
| 617 | Vector: VF.isVector(), Ty: TypeInfo.inferScalarType(V: In)); |
| 618 | Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(V: In), VF); |
| 619 | } |
| 620 | |
| 621 | LLVM_DEBUG({ |
| 622 | dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n'; |
| 623 | dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size() |
| 624 | << " item\n" ; |
| 625 | for (const auto &pair : MaxUsages[Idx]) { |
| 626 | dbgs() << "LV(REG): RegisterClass: " |
| 627 | << TTI.getRegisterClassName(pair.first) << ", " << pair.second |
| 628 | << " registers\n" ; |
| 629 | } |
| 630 | dbgs() << "LV(REG): Found invariant usage: " << Invariant.size() |
| 631 | << " item\n" ; |
| 632 | for (const auto &pair : Invariant) { |
| 633 | dbgs() << "LV(REG): RegisterClass: " |
| 634 | << TTI.getRegisterClassName(pair.first) << ", " << pair.second |
| 635 | << " registers\n" ; |
| 636 | } |
| 637 | }); |
| 638 | |
| 639 | RU.LoopInvariantRegs = Invariant; |
| 640 | RU.MaxLocalUsers = MaxUsages[Idx]; |
| 641 | RUs[Idx] = RU; |
| 642 | } |
| 643 | |
| 644 | return RUs; |
| 645 | } |
| 646 | |