1//===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "VPlanAnalysis.h"
10#include "VPlan.h"
11#include "VPlanCFG.h"
12#include "VPlanDominatorTree.h"
13#include "VPlanHelpers.h"
14#include "VPlanPatternMatch.h"
15#include "llvm/ADT/PostOrderIterator.h"
16#include "llvm/ADT/TypeSwitch.h"
17#include "llvm/Analysis/ScalarEvolution.h"
18#include "llvm/Analysis/TargetTransformInfo.h"
19#include "llvm/IR/Instruction.h"
20#include "llvm/IR/PatternMatch.h"
21
22using namespace llvm;
23using namespace VPlanPatternMatch;
24
25#define DEBUG_TYPE "vplan"
26
27VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) : Ctx(Plan.getContext()) {
28 if (auto LoopRegion = Plan.getVectorLoopRegion()) {
29 if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(
30 Val: &LoopRegion->getEntryBasicBlock()->front())) {
31 CanonicalIVTy = CanIV->getScalarType();
32 return;
33 }
34 }
35
36 // If there's no canonical IV, retrieve the type from the trip count
37 // expression.
38 auto *TC = Plan.getTripCount();
39 if (auto *TCIRV = dyn_cast<VPIRValue>(Val: TC)) {
40 CanonicalIVTy = TCIRV->getType();
41 return;
42 }
43 CanonicalIVTy = cast<VPExpandSCEVRecipe>(Val: TC)->getSCEV()->getType();
44}
45
46Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) {
47 Type *ResTy = inferScalarType(V: R->getIncomingValue(Idx: 0));
48 for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) {
49 VPValue *Inc = R->getIncomingValue(Idx: I);
50 assert(inferScalarType(Inc) == ResTy &&
51 "different types inferred for different incoming values");
52 CachedTypes[Inc] = ResTy;
53 }
54 return ResTy;
55}
56
57Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
58 // Set the result type from the first operand, check if the types for all
59 // other operands match and cache them.
60 auto SetResultTyFromOp = [this, R]() {
61 Type *ResTy = inferScalarType(V: R->getOperand(N: 0));
62 for (unsigned Op = 1; Op != R->getNumOperands(); ++Op) {
63 VPValue *OtherV = R->getOperand(N: Op);
64 assert(inferScalarType(OtherV) == ResTy &&
65 "different types inferred for different operands");
66 CachedTypes[OtherV] = ResTy;
67 }
68 return ResTy;
69 };
70
71 unsigned Opcode = R->getOpcode();
72 if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode))
73 return SetResultTyFromOp();
74
75 switch (Opcode) {
76 case Instruction::ExtractElement:
77 case Instruction::Freeze:
78 case Instruction::PHI:
79 case VPInstruction::Broadcast:
80 case VPInstruction::ComputeReductionResult:
81 case VPInstruction::ExtractLastLane:
82 case VPInstruction::ExtractPenultimateElement:
83 case VPInstruction::ExtractLastPart:
84 case VPInstruction::ExtractLastActive:
85 case VPInstruction::PtrAdd:
86 case VPInstruction::WidePtrAdd:
87 case VPInstruction::ReductionStartVector:
88 case VPInstruction::ResumeForEpilogue:
89 case VPInstruction::Reverse:
90 return inferScalarType(V: R->getOperand(N: 0));
91 case Instruction::Select: {
92 Type *ResTy = inferScalarType(V: R->getOperand(N: 1));
93 VPValue *OtherV = R->getOperand(N: 2);
94 assert(inferScalarType(OtherV) == ResTy &&
95 "different types inferred for different operands");
96 CachedTypes[OtherV] = ResTy;
97 return ResTy;
98 }
99 case Instruction::ICmp:
100 case Instruction::FCmp:
101 case VPInstruction::ActiveLaneMask:
102 assert(inferScalarType(R->getOperand(0)) ==
103 inferScalarType(R->getOperand(1)) &&
104 "different types inferred for different operands");
105 return IntegerType::get(C&: Ctx, NumBits: 1);
106 case VPInstruction::ComputeAnyOfResult:
107 return inferScalarType(V: R->getOperand(N: 1));
108 case VPInstruction::ExplicitVectorLength:
109 return Type::getIntNTy(C&: Ctx, N: 32);
110 case VPInstruction::FirstOrderRecurrenceSplice:
111 case VPInstruction::Not:
112 case VPInstruction::CalculateTripCountMinusVF:
113 case VPInstruction::CanonicalIVIncrementForPart:
114 case VPInstruction::AnyOf:
115 case VPInstruction::BuildStructVector:
116 case VPInstruction::BuildVector:
117 case VPInstruction::Unpack:
118 return SetResultTyFromOp();
119 case VPInstruction::ExtractLane:
120 return inferScalarType(V: R->getOperand(N: 1));
121 case VPInstruction::FirstActiveLane:
122 case VPInstruction::LastActiveLane:
123 return Type::getIntNTy(C&: Ctx, N: 64);
124 case VPInstruction::LogicalAnd:
125 assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) &&
126 inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
127 "LogicalAnd operands should be bool");
128 return IntegerType::get(C&: Ctx, NumBits: 1);
129 case VPInstruction::BranchOnCond:
130 case VPInstruction::BranchOnTwoConds:
131 case VPInstruction::BranchOnCount:
132 return Type::getVoidTy(C&: Ctx);
133 default:
134 break;
135 }
136 // Type inference not implemented for opcode.
137 LLVM_DEBUG({
138 dbgs() << "LV: Found unhandled opcode for: ";
139 R->getVPSingleValue()->dump();
140 });
141 llvm_unreachable("Unhandled opcode!");
142}
143
144Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {
145 unsigned Opcode = R->getOpcode();
146 if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
147 Instruction::isBitwiseLogicOp(Opcode)) {
148 Type *ResTy = inferScalarType(V: R->getOperand(N: 0));
149 assert(ResTy == inferScalarType(R->getOperand(1)) &&
150 "types for both operands must match for binary op");
151 CachedTypes[R->getOperand(N: 1)] = ResTy;
152 return ResTy;
153 }
154
155 switch (Opcode) {
156 case Instruction::ICmp:
157 case Instruction::FCmp:
158 return IntegerType::get(C&: Ctx, NumBits: 1);
159 case Instruction::FNeg:
160 case Instruction::Freeze:
161 return inferScalarType(V: R->getOperand(N: 0));
162 case Instruction::ExtractValue: {
163 assert(R->getNumOperands() == 2 && "expected single level extractvalue");
164 auto *StructTy = cast<StructType>(Val: inferScalarType(V: R->getOperand(N: 0)));
165 return StructTy->getTypeAtIndex(
166 N: cast<VPConstantInt>(Val: R->getOperand(N: 1))->getZExtValue());
167 }
168 case Instruction::Select: {
169 Type *ResTy = inferScalarType(V: R->getOperand(N: 1));
170 VPValue *OtherV = R->getOperand(N: 2);
171 assert(inferScalarType(OtherV) == ResTy &&
172 "different types inferred for different operands");
173 CachedTypes[OtherV] = ResTy;
174 return ResTy;
175 }
176 default:
177 break;
178 }
179
180 // Type inference not implemented for opcode.
181 LLVM_DEBUG({
182 dbgs() << "LV: Found unhandled opcode for: ";
183 R->getVPSingleValue()->dump();
184 });
185 llvm_unreachable("Unhandled opcode!");
186}
187
188Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
189 auto &CI = *cast<CallInst>(Val: R->getUnderlyingInstr());
190 return CI.getType();
191}
192
193Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
194 assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
195 "Store recipes should not define any values");
196 return cast<LoadInst>(Val: &R->getIngredient())->getType();
197}
198
199Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) {
200 unsigned Opcode = R->getUnderlyingInstr()->getOpcode();
201
202 if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
203 Instruction::isBitwiseLogicOp(Opcode)) {
204 Type *ResTy = inferScalarType(V: R->getOperand(N: 0));
205 assert(ResTy == inferScalarType(R->getOperand(1)) &&
206 "inferred types for operands of binary op don't match");
207 CachedTypes[R->getOperand(N: 1)] = ResTy;
208 return ResTy;
209 }
210
211 if (Instruction::isCast(Opcode))
212 return R->getUnderlyingInstr()->getType();
213
214 switch (Opcode) {
215 case Instruction::Call: {
216 unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1);
217 return cast<Function>(Val: R->getOperand(N: CallIdx)->getLiveInIRValue())
218 ->getReturnType();
219 }
220 case Instruction::Select: {
221 Type *ResTy = inferScalarType(V: R->getOperand(N: 1));
222 assert(ResTy == inferScalarType(R->getOperand(2)) &&
223 "inferred types for operands of select op don't match");
224 CachedTypes[R->getOperand(N: 2)] = ResTy;
225 return ResTy;
226 }
227 case Instruction::ICmp:
228 case Instruction::FCmp:
229 return IntegerType::get(C&: Ctx, NumBits: 1);
230 case Instruction::Alloca:
231 case Instruction::ExtractValue:
232 return R->getUnderlyingInstr()->getType();
233 case Instruction::Freeze:
234 case Instruction::FNeg:
235 case Instruction::GetElementPtr:
236 return inferScalarType(V: R->getOperand(N: 0));
237 case Instruction::Load:
238 return cast<LoadInst>(Val: R->getUnderlyingInstr())->getType();
239 case Instruction::Store:
240 // FIXME: VPReplicateRecipes with store opcodes still define a result
241 // VPValue, so we need to handle them here. Remove the code here once this
242 // is modeled accurately in VPlan.
243 return Type::getVoidTy(C&: Ctx);
244 default:
245 break;
246 }
247 // Type inference not implemented for opcode.
248 LLVM_DEBUG({
249 dbgs() << "LV: Found unhandled opcode for: ";
250 R->getVPSingleValue()->dump();
251 });
252 llvm_unreachable("Unhandled opcode");
253}
254
255Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
256 if (Type *CachedTy = CachedTypes.lookup(Val: V))
257 return CachedTy;
258
259 if (auto *IRV = dyn_cast<VPIRValue>(Val: V))
260 return IRV->getType();
261
262 if (isa<VPSymbolicValue>(Val: V)) {
263 // All VPValues without any underlying IR value (like the vector trip count
264 // or the backedge-taken count) have the same type as the canonical IV.
265 return CanonicalIVTy;
266 }
267
268 Type *ResultTy =
269 TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
270 .Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
271 VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
272 VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
273 caseFn: [this](const auto *R) {
274 // Handle header phi recipes, except VPWidenIntOrFpInduction
275 // which needs special handling due it being possibly truncated.
276 // TODO: consider inferring/caching type of siblings, e.g.,
277 // backedge value, here and in cases below.
278 return inferScalarType(V: R->getStartValue());
279 })
280 .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
281 caseFn: [](const auto *R) { return R->getScalarType(); })
282 .Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
283 VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
284 VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe>(
285 caseFn: [this](const VPRecipeBase *R) {
286 return inferScalarType(V: R->getOperand(N: 0));
287 })
288 // VPInstructionWithType must be handled before VPInstruction.
289 .Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
290 VPWidenCastRecipe>(
291 caseFn: [](const auto *R) { return R->getResultType(); })
292 .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
293 VPWidenCallRecipe, VPWidenMemoryRecipe>(
294 caseFn: [this](const auto *R) { return inferScalarTypeForRecipe(R); })
295 .Case(caseFn: [V](const VPInterleaveBase *R) {
296 // TODO: Use info from interleave group.
297 return V->getUnderlyingValue()->getType();
298 })
299 .Case(caseFn: [](const VPExpandSCEVRecipe *R) {
300 return R->getSCEV()->getType();
301 })
302 .Case(caseFn: [this](const VPReductionRecipe *R) {
303 return inferScalarType(V: R->getChainOp());
304 })
305 .Case(caseFn: [this](const VPExpressionRecipe *R) {
306 return inferScalarType(V: R->getOperandOfResultType());
307 });
308
309 assert(ResultTy && "could not infer type for the given VPValue");
310 CachedTypes[V] = ResultTy;
311 return ResultTy;
312}
313
314void llvm::collectEphemeralRecipesForVPlan(
315 VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) {
316 // First, collect seed recipes which are operands of assumes.
317 SmallVector<VPRecipeBase *> Worklist;
318 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
319 Range: vp_depth_first_deep(G: Plan.getVectorLoopRegion()->getEntry()))) {
320 for (VPRecipeBase &R : *VPBB) {
321 auto *RepR = dyn_cast<VPReplicateRecipe>(Val: &R);
322 if (!RepR || !match(V: RepR, P: m_Intrinsic<Intrinsic::assume>()))
323 continue;
324 Worklist.push_back(Elt: RepR);
325 EphRecipes.insert(V: RepR);
326 }
327 }
328
329 // Process operands of candidates in worklist and add them to the set of
330 // ephemeral recipes, if they don't have side-effects and are only used by
331 // other ephemeral recipes.
332 while (!Worklist.empty()) {
333 VPRecipeBase *Cur = Worklist.pop_back_val();
334 for (VPValue *Op : Cur->operands()) {
335 auto *OpR = Op->getDefiningRecipe();
336 if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(V: OpR))
337 continue;
338 if (any_of(Range: Op->users(), P: [EphRecipes](VPUser *U) {
339 auto *UR = dyn_cast<VPRecipeBase>(Val: U);
340 return !UR || !EphRecipes.contains(V: UR);
341 }))
342 continue;
343 EphRecipes.insert(V: OpR);
344 Worklist.push_back(Elt: OpR);
345 }
346 }
347}
348
349template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>(
350 DominatorTreeBase<VPBlockBase, false> &DT);
351
352bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
353 const VPRecipeBase *B) {
354 if (A == B)
355 return false;
356
357 auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
358 for (auto &R : *A->getParent()) {
359 if (&R == A)
360 return true;
361 if (&R == B)
362 return false;
363 }
364 llvm_unreachable("recipe not found");
365 };
366 const VPBlockBase *ParentA = A->getParent();
367 const VPBlockBase *ParentB = B->getParent();
368 if (ParentA == ParentB)
369 return LocalComesBefore(A, B);
370
371#ifndef NDEBUG
372 auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * {
373 VPRegionBlock *Region = R->getRegion();
374 if (Region && Region->isReplicator()) {
375 assert(Region->getNumSuccessors() == 1 &&
376 Region->getNumPredecessors() == 1 && "Expected SESE region!");
377 assert(R->getParent()->size() == 1 &&
378 "A recipe in an original replicator region must be the only "
379 "recipe in its block");
380 return Region;
381 }
382 return nullptr;
383 };
384 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
385 "No replicate regions expected at this point");
386 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
387 "No replicate regions expected at this point");
388#endif
389 return Base::properlyDominates(A: ParentA, B: ParentB);
390}
391
392bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI,
393 unsigned OverrideMaxNumRegs) const {
394 return any_of(Range: MaxLocalUsers, P: [&TTI, &OverrideMaxNumRegs](auto &LU) {
395 return LU.second > (OverrideMaxNumRegs > 0
396 ? OverrideMaxNumRegs
397 : TTI.getNumberOfRegisters(ClassID: LU.first));
398 });
399}
400
401SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
402 VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI,
403 const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
404 // Each 'key' in the map opens a new interval. The values
405 // of the map are the index of the 'last seen' usage of the
406 // VPValue that is the key.
407 using IntervalMap = SmallDenseMap<VPValue *, unsigned, 16>;
408
409 // Maps indices to recipes.
410 SmallVector<VPRecipeBase *, 64> Idx2Recipe;
411 // Marks the end of each interval.
412 IntervalMap EndPoint;
413 // Saves the list of VPValues that are used in the loop.
414 SmallPtrSet<VPValue *, 8> Ends;
415 // Saves the list of values that are used in the loop but are defined outside
416 // the loop (not including non-recipe values such as arguments and
417 // constants).
418 SmallSetVector<VPValue *, 8> LoopInvariants;
419 LoopInvariants.insert(X: &Plan.getVectorTripCount());
420
421 // We scan the loop in a topological order in order and assign a number to
422 // each recipe. We use RPO to ensure that defs are met before their users. We
423 // assume that each recipe that has in-loop users starts an interval. We
424 // record every time that an in-loop value is used, so we have a list of the
425 // first occurences of each recipe and last occurrence of each VPValue.
426 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
427 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
428 LoopRegion);
429 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Range: RPOT)) {
430 if (!VPBB->getParent())
431 break;
432 for (VPRecipeBase &R : *VPBB) {
433 Idx2Recipe.push_back(Elt: &R);
434
435 // Save the end location of each USE.
436 for (VPValue *U : R.operands()) {
437 auto *DefR = U->getDefiningRecipe();
438
439 // Ignore non-recipe values such as arguments, constants, etc.
440 // FIXME: Might need some motivation why these values are ignored. If
441 // for example an argument is used inside the loop it will increase the
442 // register pressure (so shouldn't we add it to LoopInvariants).
443 auto *IRV = dyn_cast<VPIRValue>(Val: U);
444 if (!DefR && (!IRV || !isa<Instruction>(Val: IRV->getValue())))
445 continue;
446
447 // If this recipe is outside the loop then record it and continue.
448 if (!DefR) {
449 LoopInvariants.insert(X: U);
450 continue;
451 }
452
453 // Overwrite previous end points.
454 EndPoint[U] = Idx2Recipe.size();
455 Ends.insert(Ptr: U);
456 }
457 }
458 if (VPBB == LoopRegion->getExiting()) {
459 // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
460 // exiting block, where their increment will get materialized eventually.
461 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
462 if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(Val: &R)) {
463 EndPoint[WideIV] = Idx2Recipe.size();
464 Ends.insert(Ptr: WideIV);
465 }
466 }
467 }
468 }
469
470 // Saves the list of intervals that end with the index in 'key'.
471 using VPValueList = SmallVector<VPValue *, 2>;
472 SmallDenseMap<unsigned, VPValueList, 16> TransposeEnds;
473
474 // Next, we transpose the EndPoints into a multi map that holds the list of
475 // intervals that *end* at a specific location.
476 for (auto &Interval : EndPoint)
477 TransposeEnds[Interval.second].push_back(Elt: Interval.first);
478
479 SmallPtrSet<VPValue *, 8> OpenIntervals;
480 SmallVector<VPRegisterUsage, 8> RUs(VFs.size());
481 SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
482
483 LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
484
485 VPTypeAnalysis TypeInfo(Plan);
486
487 const auto &TTICapture = TTI;
488 auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
489 if (Ty->isTokenTy() || !VectorType::isValidElementType(ElemTy: Ty) ||
490 (VF.isScalable() &&
491 !TTICapture.isElementTypeLegalForScalableVector(Ty)))
492 return 0;
493 return TTICapture.getRegUsageForType(Ty: VectorType::get(ElementType: Ty, EC: VF));
494 };
495
496 // We scan the instructions linearly and record each time that a new interval
497 // starts, by placing it in a set. If we find this value in TransposEnds then
498 // we remove it from the set. The max register usage is the maximum register
499 // usage of the recipes of the set.
500 for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
501 VPRecipeBase *R = Idx2Recipe[Idx];
502
503 // Remove all of the VPValues that end at this location.
504 VPValueList &List = TransposeEnds[Idx];
505 for (VPValue *ToRemove : List)
506 OpenIntervals.erase(Ptr: ToRemove);
507
508 // Ignore recipes that are never used within the loop and do not have side
509 // effects.
510 if (none_of(Range: R->definedValues(),
511 P: [&Ends](VPValue *Def) { return Ends.count(Ptr: Def); }) &&
512 !R->mayHaveSideEffects())
513 continue;
514
515 // Skip recipes for ignored values.
516 // TODO: Should mark recipes for ephemeral values that cannot be removed
517 // explictly in VPlan.
518 if (isa<VPSingleDefRecipe>(Val: R) &&
519 ValuesToIgnore.contains(
520 Ptr: cast<VPSingleDefRecipe>(Val: R)->getUnderlyingValue()))
521 continue;
522
523 // For each VF find the maximum usage of registers.
524 for (unsigned J = 0, E = VFs.size(); J < E; ++J) {
525 // Count the number of registers used, per register class, given all open
526 // intervals.
527 // Note that elements in this SmallMapVector will be default constructed
528 // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if
529 // there is no previous entry for ClassID.
530 SmallMapVector<unsigned, unsigned, 4> RegUsage;
531
532 for (auto *VPV : OpenIntervals) {
533 // Skip artificial values or values that weren't present in the original
534 // loop.
535 // TODO: Remove skipping values that weren't present in the original
536 // loop after removing the legacy
537 // LoopVectorizationCostModel::calculateRegisterUsage
538 if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
539 VPBranchOnMaskRecipe>(Val: VPV) ||
540 match(V: VPV, P: m_ExtractLastPart(Op0: m_VPValue())))
541 continue;
542
543 if (VFs[J].isScalar() ||
544 isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
545 VPEVLBasedIVPHIRecipe, VPScalarIVStepsRecipe>(Val: VPV) ||
546 (isa<VPInstruction>(Val: VPV) && vputils::onlyScalarValuesUsed(Def: VPV)) ||
547 (isa<VPReductionPHIRecipe>(Val: VPV) &&
548 (cast<VPReductionPHIRecipe>(Val: VPV))->isInLoop())) {
549 unsigned ClassID =
550 TTI.getRegisterClassForType(Vector: false, Ty: TypeInfo.inferScalarType(V: VPV));
551 // FIXME: The target might use more than one register for the type
552 // even in the scalar case.
553 RegUsage[ClassID] += 1;
554 } else {
555 // The output from scaled phis and scaled reductions actually has
556 // fewer lanes than the VF.
557 unsigned ScaleFactor =
558 vputils::getVFScaleFactor(R: VPV->getDefiningRecipe());
559 ElementCount VF = VFs[J];
560 if (ScaleFactor > 1) {
561 VF = VFs[J].divideCoefficientBy(RHS: ScaleFactor);
562 LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J]
563 << " to " << VF << " for " << *R << "\n";);
564 }
565
566 Type *ScalarTy = TypeInfo.inferScalarType(V: VPV);
567 unsigned ClassID = TTI.getRegisterClassForType(Vector: true, Ty: ScalarTy);
568 RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
569 }
570 }
571
572 for (const auto &Pair : RegUsage) {
573 auto &Entry = MaxUsages[J][Pair.first];
574 Entry = std::max(a: Entry, b: Pair.second);
575 }
576 }
577
578 LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
579 << OpenIntervals.size() << '\n');
580
581 // Add used VPValues defined by the current recipe to the list of open
582 // intervals.
583 for (VPValue *DefV : R->definedValues())
584 if (Ends.contains(Ptr: DefV))
585 OpenIntervals.insert(Ptr: DefV);
586 }
587
588 // We also search for instructions that are defined outside the loop, but are
589 // used inside the loop. We need this number separately from the max-interval
590 // usage number because when we unroll, loop-invariant values do not take
591 // more register.
592 VPRegisterUsage RU;
593 for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {
594 // Note that elements in this SmallMapVector will be default constructed
595 // as 0. So we can use "Invariant[ClassID] += n" in the code below even if
596 // there is no previous entry for ClassID.
597 SmallMapVector<unsigned, unsigned, 4> Invariant;
598
599 for (auto *In : LoopInvariants) {
600 // FIXME: The target might use more than one register for the type
601 // even in the scalar case.
602 bool IsScalar = vputils::onlyScalarValuesUsed(Def: In);
603
604 ElementCount VF = IsScalar ? ElementCount::getFixed(MinVal: 1) : VFs[Idx];
605 unsigned ClassID = TTI.getRegisterClassForType(
606 Vector: VF.isVector(), Ty: TypeInfo.inferScalarType(V: In));
607 Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(V: In), VF);
608 }
609
610 LLVM_DEBUG({
611 dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n';
612 dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
613 << " item\n";
614 for (const auto &pair : MaxUsages[Idx]) {
615 dbgs() << "LV(REG): RegisterClass: "
616 << TTI.getRegisterClassName(pair.first) << ", " << pair.second
617 << " registers\n";
618 }
619 dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
620 << " item\n";
621 for (const auto &pair : Invariant) {
622 dbgs() << "LV(REG): RegisterClass: "
623 << TTI.getRegisterClassName(pair.first) << ", " << pair.second
624 << " registers\n";
625 }
626 });
627
628 RU.LoopInvariantRegs = Invariant;
629 RU.MaxLocalUsers = MaxUsages[Idx];
630 RUs[Idx] = RU;
631 }
632
633 return RUs;
634}
635