1//===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "VPlanAnalysis.h"
10#include "VPlan.h"
11#include "VPlanCFG.h"
12#include "VPlanDominatorTree.h"
13#include "llvm/ADT/PostOrderIterator.h"
14#include "llvm/ADT/TypeSwitch.h"
15#include "llvm/Analysis/ScalarEvolution.h"
16#include "llvm/Analysis/TargetTransformInfo.h"
17#include "llvm/IR/Instruction.h"
18#include "llvm/IR/PatternMatch.h"
19#include "llvm/Support/GenericDomTreeConstruction.h"
20
21using namespace llvm;
22
23#define DEBUG_TYPE "vplan"
24
25VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan)
26 : Ctx(Plan.getScalarHeader()->getIRBasicBlock()->getContext()) {
27 if (auto LoopRegion = Plan.getVectorLoopRegion()) {
28 if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(
29 Val: &LoopRegion->getEntryBasicBlock()->front())) {
30 CanonicalIVTy = CanIV->getScalarType();
31 return;
32 }
33 }
34
35 // If there's no canonical IV, retrieve the type from the trip count
36 // expression.
37 auto *TC = Plan.getTripCount();
38 if (TC->isLiveIn()) {
39 CanonicalIVTy = TC->getLiveInIRValue()->getType();
40 return;
41 }
42 CanonicalIVTy = cast<VPExpandSCEVRecipe>(Val: TC)->getSCEV()->getType();
43}
44
45Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) {
46 Type *ResTy = inferScalarType(V: R->getIncomingValue(Idx: 0));
47 for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) {
48 VPValue *Inc = R->getIncomingValue(Idx: I);
49 assert(inferScalarType(Inc) == ResTy &&
50 "different types inferred for different incoming values");
51 CachedTypes[Inc] = ResTy;
52 }
53 return ResTy;
54}
55
56Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
57 // Set the result type from the first operand, check if the types for all
58 // other operands match and cache them.
59 auto SetResultTyFromOp = [this, R]() {
60 Type *ResTy = inferScalarType(V: R->getOperand(N: 0));
61 for (unsigned Op = 1; Op != R->getNumOperands(); ++Op) {
62 VPValue *OtherV = R->getOperand(N: Op);
63 assert(inferScalarType(OtherV) == ResTy &&
64 "different types inferred for different operands");
65 CachedTypes[OtherV] = ResTy;
66 }
67 return ResTy;
68 };
69
70 unsigned Opcode = R->getOpcode();
71 if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode))
72 return SetResultTyFromOp();
73
74 switch (Opcode) {
75 case Instruction::ExtractElement:
76 case Instruction::Freeze:
77 case VPInstruction::ReductionStartVector:
78 return inferScalarType(V: R->getOperand(N: 0));
79 case Instruction::Select: {
80 Type *ResTy = inferScalarType(V: R->getOperand(N: 1));
81 VPValue *OtherV = R->getOperand(N: 2);
82 assert(inferScalarType(OtherV) == ResTy &&
83 "different types inferred for different operands");
84 CachedTypes[OtherV] = ResTy;
85 return ResTy;
86 }
87 case Instruction::ICmp:
88 case VPInstruction::ActiveLaneMask:
89 assert(inferScalarType(R->getOperand(0)) ==
90 inferScalarType(R->getOperand(1)) &&
91 "different types inferred for different operands");
92 return IntegerType::get(C&: Ctx, NumBits: 1);
93 case VPInstruction::ComputeAnyOfResult:
94 return inferScalarType(V: R->getOperand(N: 1));
95 case VPInstruction::ComputeFindIVResult:
96 case VPInstruction::ComputeReductionResult: {
97 return inferScalarType(V: R->getOperand(N: 0));
98 }
99 case VPInstruction::ExplicitVectorLength:
100 return Type::getIntNTy(C&: Ctx, N: 32);
101 case Instruction::PHI:
102 // Infer the type of first operand only, as other operands of header phi's
103 // may lead to infinite recursion.
104 return inferScalarType(V: R->getOperand(N: 0));
105 case VPInstruction::FirstOrderRecurrenceSplice:
106 case VPInstruction::Not:
107 case VPInstruction::CalculateTripCountMinusVF:
108 case VPInstruction::CanonicalIVIncrementForPart:
109 case VPInstruction::AnyOf:
110 case VPInstruction::BuildStructVector:
111 case VPInstruction::BuildVector:
112 return SetResultTyFromOp();
113 case VPInstruction::FirstActiveLane:
114 return Type::getIntNTy(C&: Ctx, N: 64);
115 case VPInstruction::ExtractLastElement:
116 case VPInstruction::ExtractPenultimateElement: {
117 Type *BaseTy = inferScalarType(V: R->getOperand(N: 0));
118 if (auto *VecTy = dyn_cast<VectorType>(Val: BaseTy))
119 return VecTy->getElementType();
120 return BaseTy;
121 }
122 case VPInstruction::LogicalAnd:
123 assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) &&
124 inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
125 "LogicalAnd operands should be bool");
126 return IntegerType::get(C&: Ctx, NumBits: 1);
127 case VPInstruction::Broadcast:
128 case VPInstruction::PtrAdd:
129 // Return the type based on first operand.
130 return inferScalarType(V: R->getOperand(N: 0));
131 case VPInstruction::BranchOnCond:
132 case VPInstruction::BranchOnCount:
133 return Type::getVoidTy(C&: Ctx);
134 default:
135 break;
136 }
137 // Type inference not implemented for opcode.
138 LLVM_DEBUG({
139 dbgs() << "LV: Found unhandled opcode for: ";
140 R->getVPSingleValue()->dump();
141 });
142 llvm_unreachable("Unhandled opcode!");
143}
144
145Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {
146 unsigned Opcode = R->getOpcode();
147 if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
148 Instruction::isBitwiseLogicOp(Opcode)) {
149 Type *ResTy = inferScalarType(V: R->getOperand(N: 0));
150 assert(ResTy == inferScalarType(R->getOperand(1)) &&
151 "types for both operands must match for binary op");
152 CachedTypes[R->getOperand(N: 1)] = ResTy;
153 return ResTy;
154 }
155
156 switch (Opcode) {
157 case Instruction::ICmp:
158 case Instruction::FCmp:
159 return IntegerType::get(C&: Ctx, NumBits: 1);
160 case Instruction::FNeg:
161 case Instruction::Freeze:
162 return inferScalarType(V: R->getOperand(N: 0));
163 case Instruction::ExtractValue: {
164 assert(R->getNumOperands() == 2 && "expected single level extractvalue");
165 auto *StructTy = cast<StructType>(Val: inferScalarType(V: R->getOperand(N: 0)));
166 auto *CI = cast<ConstantInt>(Val: R->getOperand(N: 1)->getLiveInIRValue());
167 return StructTy->getTypeAtIndex(N: CI->getZExtValue());
168 }
169 default:
170 break;
171 }
172
173 // Type inference not implemented for opcode.
174 LLVM_DEBUG({
175 dbgs() << "LV: Found unhandled opcode for: ";
176 R->getVPSingleValue()->dump();
177 });
178 llvm_unreachable("Unhandled opcode!");
179}
180
181Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
182 auto &CI = *cast<CallInst>(Val: R->getUnderlyingInstr());
183 return CI.getType();
184}
185
186Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
187 assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
188 "Store recipes should not define any values");
189 return cast<LoadInst>(Val: &R->getIngredient())->getType();
190}
191
192Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenSelectRecipe *R) {
193 Type *ResTy = inferScalarType(V: R->getOperand(N: 1));
194 VPValue *OtherV = R->getOperand(N: 2);
195 assert(inferScalarType(OtherV) == ResTy &&
196 "different types inferred for different operands");
197 CachedTypes[OtherV] = ResTy;
198 return ResTy;
199}
200
201Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) {
202 unsigned Opcode = R->getUnderlyingInstr()->getOpcode();
203
204 if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
205 Instruction::isBitwiseLogicOp(Opcode)) {
206 Type *ResTy = inferScalarType(V: R->getOperand(N: 0));
207 assert(ResTy == inferScalarType(R->getOperand(1)) &&
208 "inferred types for operands of binary op don't match");
209 CachedTypes[R->getOperand(N: 1)] = ResTy;
210 return ResTy;
211 }
212
213 if (Instruction::isCast(Opcode))
214 return R->getUnderlyingInstr()->getType();
215
216 switch (Opcode) {
217 case Instruction::Call: {
218 unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1);
219 return cast<Function>(Val: R->getOperand(N: CallIdx)->getLiveInIRValue())
220 ->getReturnType();
221 }
222 case Instruction::Select: {
223 Type *ResTy = inferScalarType(V: R->getOperand(N: 1));
224 assert(ResTy == inferScalarType(R->getOperand(2)) &&
225 "inferred types for operands of select op don't match");
226 CachedTypes[R->getOperand(N: 2)] = ResTy;
227 return ResTy;
228 }
229 case Instruction::ICmp:
230 case Instruction::FCmp:
231 return IntegerType::get(C&: Ctx, NumBits: 1);
232 case Instruction::Alloca:
233 case Instruction::ExtractValue:
234 return R->getUnderlyingInstr()->getType();
235 case Instruction::Freeze:
236 case Instruction::FNeg:
237 case Instruction::GetElementPtr:
238 return inferScalarType(V: R->getOperand(N: 0));
239 case Instruction::Load:
240 return cast<LoadInst>(Val: R->getUnderlyingInstr())->getType();
241 case Instruction::Store:
242 // FIXME: VPReplicateRecipes with store opcodes still define a result
243 // VPValue, so we need to handle them here. Remove the code here once this
244 // is modeled accurately in VPlan.
245 return Type::getVoidTy(C&: Ctx);
246 default:
247 break;
248 }
249 // Type inference not implemented for opcode.
250 LLVM_DEBUG({
251 dbgs() << "LV: Found unhandled opcode for: ";
252 R->getVPSingleValue()->dump();
253 });
254 llvm_unreachable("Unhandled opcode");
255}
256
257Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
258 if (Type *CachedTy = CachedTypes.lookup(Val: V))
259 return CachedTy;
260
261 if (V->isLiveIn()) {
262 if (auto *IRValue = V->getLiveInIRValue())
263 return IRValue->getType();
264 // All VPValues without any underlying IR value (like the vector trip count
265 // or the backedge-taken count) have the same type as the canonical IV.
266 return CanonicalIVTy;
267 }
268
269 Type *ResultTy =
270 TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
271 .Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
272 VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
273 VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
274 caseFn: [this](const auto *R) {
275 // Handle header phi recipes, except VPWidenIntOrFpInduction
276 // which needs special handling due it being possibly truncated.
277 // TODO: consider inferring/caching type of siblings, e.g.,
278 // backedge value, here and in cases below.
279 return inferScalarType(V: R->getStartValue());
280 })
281 .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
282 caseFn: [](const auto *R) { return R->getScalarType(); })
283 .Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
284 VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
285 VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe,
286 VPPartialReductionRecipe>(caseFn: [this](const VPRecipeBase *R) {
287 return inferScalarType(V: R->getOperand(N: 0));
288 })
289 // VPInstructionWithType must be handled before VPInstruction.
290 .Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
291 VPWidenCastRecipe>(
292 caseFn: [](const auto *R) { return R->getResultType(); })
293 .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
294 VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
295 caseFn: [this](const auto *R) { return inferScalarTypeForRecipe(R); })
296 .Case<VPInterleaveRecipe>(caseFn: [V](const VPInterleaveRecipe *R) {
297 // TODO: Use info from interleave group.
298 return V->getUnderlyingValue()->getType();
299 })
300 .Case<VPExpandSCEVRecipe>(caseFn: [](const VPExpandSCEVRecipe *R) {
301 return R->getSCEV()->getType();
302 })
303 .Case<VPReductionRecipe>(caseFn: [this](const auto *R) {
304 return inferScalarType(V: R->getChainOp());
305 })
306 .Case<VPExpressionRecipe>(caseFn: [this](const auto *R) {
307 return inferScalarType(V: R->getOperandOfResultType());
308 });
309
310 assert(ResultTy && "could not infer type for the given VPValue");
311 CachedTypes[V] = ResultTy;
312 return ResultTy;
313}
314
315void llvm::collectEphemeralRecipesForVPlan(
316 VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) {
317 // First, collect seed recipes which are operands of assumes.
318 SmallVector<VPRecipeBase *> Worklist;
319 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
320 Range: vp_depth_first_deep(G: Plan.getVectorLoopRegion()->getEntry()))) {
321 for (VPRecipeBase &R : *VPBB) {
322 auto *RepR = dyn_cast<VPReplicateRecipe>(Val: &R);
323 if (!RepR || !match(V: RepR->getUnderlyingInstr(),
324 P: PatternMatch::m_Intrinsic<Intrinsic::assume>()))
325 continue;
326 Worklist.push_back(Elt: RepR);
327 EphRecipes.insert(V: RepR);
328 }
329 }
330
331 // Process operands of candidates in worklist and add them to the set of
332 // ephemeral recipes, if they don't have side-effects and are only used by
333 // other ephemeral recipes.
334 while (!Worklist.empty()) {
335 VPRecipeBase *Cur = Worklist.pop_back_val();
336 for (VPValue *Op : Cur->operands()) {
337 auto *OpR = Op->getDefiningRecipe();
338 if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(V: OpR))
339 continue;
340 if (any_of(Range: Op->users(), P: [EphRecipes](VPUser *U) {
341 auto *UR = dyn_cast<VPRecipeBase>(Val: U);
342 return !UR || !EphRecipes.contains(V: UR);
343 }))
344 continue;
345 EphRecipes.insert(V: OpR);
346 Worklist.push_back(Elt: OpR);
347 }
348 }
349}
350
351template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>(
352 DominatorTreeBase<VPBlockBase, false> &DT);
353
354bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
355 const VPRecipeBase *B) {
356 if (A == B)
357 return false;
358
359 auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
360 for (auto &R : *A->getParent()) {
361 if (&R == A)
362 return true;
363 if (&R == B)
364 return false;
365 }
366 llvm_unreachable("recipe not found");
367 };
368 const VPBlockBase *ParentA = A->getParent();
369 const VPBlockBase *ParentB = B->getParent();
370 if (ParentA == ParentB)
371 return LocalComesBefore(A, B);
372
373#ifndef NDEBUG
374 auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * {
375 auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
376 if (Region && Region->isReplicator()) {
377 assert(Region->getNumSuccessors() == 1 &&
378 Region->getNumPredecessors() == 1 && "Expected SESE region!");
379 assert(R->getParent()->size() == 1 &&
380 "A recipe in an original replicator region must be the only "
381 "recipe in its block");
382 return Region;
383 }
384 return nullptr;
385 };
386 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
387 "No replicate regions expected at this point");
388 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
389 "No replicate regions expected at this point");
390#endif
391 return Base::properlyDominates(A: ParentA, B: ParentB);
392}
393
394/// Get the VF scaling factor applied to the recipe's output, if the recipe has
395/// one.
396static unsigned getVFScaleFactor(VPRecipeBase *R) {
397 if (auto *RR = dyn_cast<VPReductionPHIRecipe>(Val: R))
398 return RR->getVFScaleFactor();
399 if (auto *RR = dyn_cast<VPPartialReductionRecipe>(Val: R))
400 return RR->getVFScaleFactor();
401 assert(
402 (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
403 VPInstruction::ReductionStartVector) &&
404 "getting scaling factor of reduction-start-vector not implemented yet");
405 return 1;
406}
407
408bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI) const {
409 return any_of(Range: MaxLocalUsers, P: [&TTI](auto &LU) {
410 return LU.second > TTI.getNumberOfRegisters(ClassID: LU.first);
411 });
412}
413
414SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
415 VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI,
416 const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
417 // Each 'key' in the map opens a new interval. The values
418 // of the map are the index of the 'last seen' usage of the
419 // recipe that is the key.
420 using IntervalMap = SmallDenseMap<VPRecipeBase *, unsigned, 16>;
421
422 // Maps indices to recipes.
423 SmallVector<VPRecipeBase *, 64> Idx2Recipe;
424 // Marks the end of each interval.
425 IntervalMap EndPoint;
426 // Saves the list of recipe indices that are used in the loop.
427 SmallPtrSet<VPRecipeBase *, 8> Ends;
428 // Saves the list of values that are used in the loop but are defined outside
429 // the loop (not including non-recipe values such as arguments and
430 // constants).
431 SmallSetVector<VPValue *, 8> LoopInvariants;
432 LoopInvariants.insert(X: &Plan.getVectorTripCount());
433
434 // We scan the loop in a topological order in order and assign a number to
435 // each recipe. We use RPO to ensure that defs are met before their users. We
436 // assume that each recipe that has in-loop users starts an interval. We
437 // record every time that an in-loop value is used, so we have a list of the
438 // first and last occurrences of each recipe.
439 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
440 Plan.getVectorLoopRegion());
441 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Range: RPOT)) {
442 if (!VPBB->getParent())
443 break;
444 for (VPRecipeBase &R : *VPBB) {
445 Idx2Recipe.push_back(Elt: &R);
446
447 // Save the end location of each USE.
448 for (VPValue *U : R.operands()) {
449 auto *DefR = U->getDefiningRecipe();
450
451 // Ignore non-recipe values such as arguments, constants, etc.
452 // FIXME: Might need some motivation why these values are ignored. If
453 // for example an argument is used inside the loop it will increase the
454 // register pressure (so shouldn't we add it to LoopInvariants).
455 if (!DefR && (!U->getLiveInIRValue() ||
456 !isa<Instruction>(Val: U->getLiveInIRValue())))
457 continue;
458
459 // If this recipe is outside the loop then record it and continue.
460 if (!DefR) {
461 LoopInvariants.insert(X: U);
462 continue;
463 }
464
465 // Overwrite previous end points.
466 EndPoint[DefR] = Idx2Recipe.size();
467 Ends.insert(Ptr: DefR);
468 }
469 }
470 if (VPBB == Plan.getVectorLoopRegion()->getExiting()) {
471 // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
472 // exiting block, where their increment will get materialized eventually.
473 for (auto &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
474 if (isa<VPWidenIntOrFpInductionRecipe>(Val: &R)) {
475 EndPoint[&R] = Idx2Recipe.size();
476 Ends.insert(Ptr: &R);
477 }
478 }
479 }
480 }
481
482 // Saves the list of intervals that end with the index in 'key'.
483 using RecipeList = SmallVector<VPRecipeBase *, 2>;
484 SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds;
485
486 // Next, we transpose the EndPoints into a multi map that holds the list of
487 // intervals that *end* at a specific location.
488 for (auto &Interval : EndPoint)
489 TransposeEnds[Interval.second].push_back(Elt: Interval.first);
490
491 SmallPtrSet<VPRecipeBase *, 8> OpenIntervals;
492 SmallVector<VPRegisterUsage, 8> RUs(VFs.size());
493 SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
494
495 LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
496
497 VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
498
499 const auto &TTICapture = TTI;
500 auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
501 if (Ty->isTokenTy() || !VectorType::isValidElementType(ElemTy: Ty) ||
502 (VF.isScalable() &&
503 !TTICapture.isElementTypeLegalForScalableVector(Ty)))
504 return 0;
505 return TTICapture.getRegUsageForType(Ty: VectorType::get(ElementType: Ty, EC: VF));
506 };
507
508 // We scan the instructions linearly and record each time that a new interval
509 // starts, by placing it in a set. If we find this value in TransposEnds then
510 // we remove it from the set. The max register usage is the maximum register
511 // usage of the recipes of the set.
512 for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
513 VPRecipeBase *R = Idx2Recipe[Idx];
514
515 // Remove all of the recipes that end at this location.
516 RecipeList &List = TransposeEnds[Idx];
517 for (VPRecipeBase *ToRemove : List)
518 OpenIntervals.erase(Ptr: ToRemove);
519
520 // Ignore recipes that are never used within the loop and do not have side
521 // effects.
522 if (!Ends.count(Ptr: R) && !R->mayHaveSideEffects())
523 continue;
524
525 // Skip recipes for ignored values.
526 // TODO: Should mark recipes for ephemeral values that cannot be removed
527 // explictly in VPlan.
528 if (isa<VPSingleDefRecipe>(Val: R) &&
529 ValuesToIgnore.contains(
530 Ptr: cast<VPSingleDefRecipe>(Val: R)->getUnderlyingValue()))
531 continue;
532
533 // For each VF find the maximum usage of registers.
534 for (unsigned J = 0, E = VFs.size(); J < E; ++J) {
535 // Count the number of registers used, per register class, given all open
536 // intervals.
537 // Note that elements in this SmallMapVector will be default constructed
538 // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if
539 // there is no previous entry for ClassID.
540 SmallMapVector<unsigned, unsigned, 4> RegUsage;
541
542 for (auto *R : OpenIntervals) {
543 // Skip recipes that weren't present in the original loop.
544 // TODO: Remove after removing the legacy
545 // LoopVectorizationCostModel::calculateRegisterUsage
546 if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
547 VPBranchOnMaskRecipe>(Val: R))
548 continue;
549
550 if (VFs[J].isScalar() ||
551 isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
552 VPScalarIVStepsRecipe>(Val: R) ||
553 (isa<VPInstruction>(Val: R) &&
554 all_of(Range: cast<VPSingleDefRecipe>(Val: R)->users(),
555 P: [&](VPUser *U) {
556 return cast<VPRecipeBase>(Val: U)->usesScalars(
557 Op: R->getVPSingleValue());
558 })) ||
559 (isa<VPReductionPHIRecipe>(Val: R) &&
560 (cast<VPReductionPHIRecipe>(Val: R))->isInLoop())) {
561 unsigned ClassID = TTI.getRegisterClassForType(
562 Vector: false, Ty: TypeInfo.inferScalarType(V: R->getVPSingleValue()));
563 // FIXME: The target might use more than one register for the type
564 // even in the scalar case.
565 RegUsage[ClassID] += 1;
566 } else {
567 // The output from scaled phis and scaled reductions actually has
568 // fewer lanes than the VF.
569 unsigned ScaleFactor = getVFScaleFactor(R);
570 ElementCount VF = VFs[J].divideCoefficientBy(RHS: ScaleFactor);
571 LLVM_DEBUG(if (VF != VFs[J]) {
572 dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF
573 << " for " << *R << "\n";
574 });
575
576 for (VPValue *DefV : R->definedValues()) {
577 Type *ScalarTy = TypeInfo.inferScalarType(V: DefV);
578 unsigned ClassID = TTI.getRegisterClassForType(Vector: true, Ty: ScalarTy);
579 RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
580 }
581 }
582 }
583
584 for (const auto &Pair : RegUsage) {
585 auto &Entry = MaxUsages[J][Pair.first];
586 Entry = std::max(a: Entry, b: Pair.second);
587 }
588 }
589
590 LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
591 << OpenIntervals.size() << '\n');
592
593 // Add the current recipe to the list of open intervals.
594 OpenIntervals.insert(Ptr: R);
595 }
596
597 // We also search for instructions that are defined outside the loop, but are
598 // used inside the loop. We need this number separately from the max-interval
599 // usage number because when we unroll, loop-invariant values do not take
600 // more register.
601 VPRegisterUsage RU;
602 for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {
603 // Note that elements in this SmallMapVector will be default constructed
604 // as 0. So we can use "Invariant[ClassID] += n" in the code below even if
605 // there is no previous entry for ClassID.
606 SmallMapVector<unsigned, unsigned, 4> Invariant;
607
608 for (auto *In : LoopInvariants) {
609 // FIXME: The target might use more than one register for the type
610 // even in the scalar case.
611 bool IsScalar = all_of(Range: In->users(), P: [&](VPUser *U) {
612 return cast<VPRecipeBase>(Val: U)->usesScalars(Op: In);
613 });
614
615 ElementCount VF = IsScalar ? ElementCount::getFixed(MinVal: 1) : VFs[Idx];
616 unsigned ClassID = TTI.getRegisterClassForType(
617 Vector: VF.isVector(), Ty: TypeInfo.inferScalarType(V: In));
618 Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(V: In), VF);
619 }
620
621 LLVM_DEBUG({
622 dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n';
623 dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
624 << " item\n";
625 for (const auto &pair : MaxUsages[Idx]) {
626 dbgs() << "LV(REG): RegisterClass: "
627 << TTI.getRegisterClassName(pair.first) << ", " << pair.second
628 << " registers\n";
629 }
630 dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
631 << " item\n";
632 for (const auto &pair : Invariant) {
633 dbgs() << "LV(REG): RegisterClass: "
634 << TTI.getRegisterClassName(pair.first) << ", " << pair.second
635 << " registers\n";
636 }
637 });
638
639 RU.LoopInvariantRegs = Invariant;
640 RU.MaxLocalUsers = MaxUsages[Idx];
641 RUs[Idx] = RU;
642 }
643
644 return RUs;
645}
646