1//===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "VPlanAnalysis.h"
10#include "VPlan.h"
11#include "VPlanCFG.h"
12#include "VPlanDominatorTree.h"
13#include "VPlanHelpers.h"
14#include "VPlanPatternMatch.h"
15#include "llvm/ADT/PostOrderIterator.h"
16#include "llvm/ADT/TypeSwitch.h"
17#include "llvm/Analysis/ScalarEvolution.h"
18#include "llvm/Analysis/TargetTransformInfo.h"
19#include "llvm/IR/Instruction.h"
20#include "llvm/IR/PatternMatch.h"
21
22using namespace llvm;
23using namespace VPlanPatternMatch;
24
25#define DEBUG_TYPE "vplan"
26
27VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) : Ctx(Plan.getContext()) {
28 if (auto LoopRegion = Plan.getVectorLoopRegion()) {
29 if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(
30 Val: &LoopRegion->getEntryBasicBlock()->front())) {
31 CanonicalIVTy = CanIV->getScalarType();
32 return;
33 }
34 }
35
36 // If there's no canonical IV, retrieve the type from the trip count
37 // expression.
38 auto *TC = Plan.getTripCount();
39 if (auto *TCIRV = dyn_cast<VPIRValue>(Val: TC)) {
40 CanonicalIVTy = TCIRV->getType();
41 return;
42 }
43 CanonicalIVTy = cast<VPExpandSCEVRecipe>(Val: TC)->getSCEV()->getType();
44}
45
46Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) {
47 Type *ResTy = inferScalarType(V: R->getIncomingValue(Idx: 0));
48 for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) {
49 VPValue *Inc = R->getIncomingValue(Idx: I);
50 assert(inferScalarType(Inc) == ResTy &&
51 "different types inferred for different incoming values");
52 CachedTypes[Inc] = ResTy;
53 }
54 return ResTy;
55}
56
57Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
58 // Set the result type from the first operand, check if the types for all
59 // other operands match and cache them.
60 auto SetResultTyFromOp = [this, R]() {
61 Type *ResTy = inferScalarType(V: R->getOperand(N: 0));
62 unsigned NumOperands = R->getNumOperandsWithoutMask();
63 for (unsigned Op = 1; Op != NumOperands; ++Op) {
64 VPValue *OtherV = R->getOperand(N: Op);
65 assert(inferScalarType(OtherV) == ResTy &&
66 "different types inferred for different operands");
67 CachedTypes[OtherV] = ResTy;
68 }
69 return ResTy;
70 };
71
72 unsigned Opcode = R->getOpcode();
73 if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode))
74 return SetResultTyFromOp();
75
76 switch (Opcode) {
77 case Instruction::ExtractElement:
78 case Instruction::Freeze:
79 case Instruction::PHI:
80 case VPInstruction::Broadcast:
81 case VPInstruction::ComputeReductionResult:
82 case VPInstruction::ExitingIVValue:
83 case VPInstruction::ExtractLastLane:
84 case VPInstruction::ExtractPenultimateElement:
85 case VPInstruction::ExtractLastPart:
86 case VPInstruction::ExtractLastActive:
87 case VPInstruction::PtrAdd:
88 case VPInstruction::WidePtrAdd:
89 case VPInstruction::ReductionStartVector:
90 case VPInstruction::ResumeForEpilogue:
91 case VPInstruction::Reverse:
92 return inferScalarType(V: R->getOperand(N: 0));
93 case Instruction::Select: {
94 Type *ResTy = inferScalarType(V: R->getOperand(N: 1));
95 VPValue *OtherV = R->getOperand(N: 2);
96 assert(inferScalarType(OtherV) == ResTy &&
97 "different types inferred for different operands");
98 CachedTypes[OtherV] = ResTy;
99 return ResTy;
100 }
101 case Instruction::ICmp:
102 case Instruction::FCmp:
103 case VPInstruction::ActiveLaneMask:
104 assert(inferScalarType(R->getOperand(0)) ==
105 inferScalarType(R->getOperand(1)) &&
106 "different types inferred for different operands");
107 return IntegerType::get(C&: Ctx, NumBits: 1);
108 case VPInstruction::ComputeAnyOfResult:
109 return inferScalarType(V: R->getOperand(N: 1));
110 case VPInstruction::ExplicitVectorLength:
111 return Type::getIntNTy(C&: Ctx, N: 32);
112 case VPInstruction::FirstOrderRecurrenceSplice:
113 case VPInstruction::Not:
114 case VPInstruction::CalculateTripCountMinusVF:
115 case VPInstruction::CanonicalIVIncrementForPart:
116 case VPInstruction::AnyOf:
117 case VPInstruction::BuildStructVector:
118 case VPInstruction::BuildVector:
119 case VPInstruction::Unpack:
120 return SetResultTyFromOp();
121 case VPInstruction::ExtractLane:
122 return inferScalarType(V: R->getOperand(N: 1));
123 case VPInstruction::FirstActiveLane:
124 case VPInstruction::LastActiveLane:
125 return Type::getIntNTy(C&: Ctx, N: 64);
126 case VPInstruction::LogicalAnd:
127 case VPInstruction::LogicalOr:
128 assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) &&
129 inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
130 "LogicalAnd/Or operands should be bool");
131 return IntegerType::get(C&: Ctx, NumBits: 1);
132 case VPInstruction::MaskedCond:
133 assert(inferScalarType(R->getOperand(0))->isIntegerTy(1));
134 return IntegerType::get(C&: Ctx, NumBits: 1);
135 case VPInstruction::BranchOnCond:
136 case VPInstruction::BranchOnTwoConds:
137 case VPInstruction::BranchOnCount:
138 case Instruction::Store:
139 return Type::getVoidTy(C&: Ctx);
140 case Instruction::Load:
141 return cast<LoadInst>(Val: R->getUnderlyingValue())->getType();
142 case Instruction::Alloca:
143 return cast<AllocaInst>(Val: R->getUnderlyingValue())->getType();
144 case Instruction::Call: {
145 unsigned CallIdx = R->getNumOperandsWithoutMask() - 1;
146 return cast<Function>(Val: R->getOperand(N: CallIdx)->getLiveInIRValue())
147 ->getReturnType();
148 }
149 case Instruction::GetElementPtr:
150 return inferScalarType(V: R->getOperand(N: 0));
151 case Instruction::ExtractValue:
152 return cast<ExtractValueInst>(Val: R->getUnderlyingValue())->getType();
153 default:
154 break;
155 }
156 // Type inference not implemented for opcode.
157 LLVM_DEBUG({
158 dbgs() << "LV: Found unhandled opcode for: ";
159 R->getVPSingleValue()->dump();
160 });
161 llvm_unreachable("Unhandled opcode!");
162}
163
164Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {
165 unsigned Opcode = R->getOpcode();
166 if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
167 Instruction::isBitwiseLogicOp(Opcode)) {
168 Type *ResTy = inferScalarType(V: R->getOperand(N: 0));
169 assert(ResTy == inferScalarType(R->getOperand(1)) &&
170 "types for both operands must match for binary op");
171 CachedTypes[R->getOperand(N: 1)] = ResTy;
172 return ResTy;
173 }
174
175 switch (Opcode) {
176 case Instruction::ICmp:
177 case Instruction::FCmp:
178 return IntegerType::get(C&: Ctx, NumBits: 1);
179 case Instruction::FNeg:
180 case Instruction::Freeze:
181 return inferScalarType(V: R->getOperand(N: 0));
182 case Instruction::ExtractValue: {
183 assert(R->getNumOperands() == 2 && "expected single level extractvalue");
184 auto *StructTy = cast<StructType>(Val: inferScalarType(V: R->getOperand(N: 0)));
185 return StructTy->getTypeAtIndex(
186 N: cast<VPConstantInt>(Val: R->getOperand(N: 1))->getZExtValue());
187 }
188 case Instruction::Select: {
189 Type *ResTy = inferScalarType(V: R->getOperand(N: 1));
190 VPValue *OtherV = R->getOperand(N: 2);
191 assert(inferScalarType(OtherV) == ResTy &&
192 "different types inferred for different operands");
193 CachedTypes[OtherV] = ResTy;
194 return ResTy;
195 }
196 default:
197 break;
198 }
199
200 // Type inference not implemented for opcode.
201 LLVM_DEBUG({
202 dbgs() << "LV: Found unhandled opcode for: ";
203 R->getVPSingleValue()->dump();
204 });
205 llvm_unreachable("Unhandled opcode!");
206}
207
208Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
209 auto &CI = *cast<CallInst>(Val: R->getUnderlyingInstr());
210 return CI.getType();
211}
212
213Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
214 assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
215 "Store recipes should not define any values");
216 return cast<LoadInst>(Val: &R->getIngredient())->getType();
217}
218
219Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) {
220 unsigned Opcode = R->getUnderlyingInstr()->getOpcode();
221
222 if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
223 Instruction::isBitwiseLogicOp(Opcode)) {
224 Type *ResTy = inferScalarType(V: R->getOperand(N: 0));
225 assert(ResTy == inferScalarType(R->getOperand(1)) &&
226 "inferred types for operands of binary op don't match");
227 CachedTypes[R->getOperand(N: 1)] = ResTy;
228 return ResTy;
229 }
230
231 if (Instruction::isCast(Opcode))
232 return R->getUnderlyingInstr()->getType();
233
234 switch (Opcode) {
235 case Instruction::Call: {
236 unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1);
237 return cast<Function>(Val: R->getOperand(N: CallIdx)->getLiveInIRValue())
238 ->getReturnType();
239 }
240 case Instruction::Select: {
241 Type *ResTy = inferScalarType(V: R->getOperand(N: 1));
242 assert(ResTy == inferScalarType(R->getOperand(2)) &&
243 "inferred types for operands of select op don't match");
244 CachedTypes[R->getOperand(N: 2)] = ResTy;
245 return ResTy;
246 }
247 case Instruction::ICmp:
248 case Instruction::FCmp:
249 return IntegerType::get(C&: Ctx, NumBits: 1);
250 case Instruction::Alloca:
251 case Instruction::ExtractValue:
252 return R->getUnderlyingInstr()->getType();
253 case Instruction::Freeze:
254 case Instruction::FNeg:
255 case Instruction::GetElementPtr:
256 return inferScalarType(V: R->getOperand(N: 0));
257 case Instruction::Load:
258 return cast<LoadInst>(Val: R->getUnderlyingInstr())->getType();
259 case Instruction::Store:
260 // FIXME: VPReplicateRecipes with store opcodes still define a result
261 // VPValue, so we need to handle them here. Remove the code here once this
262 // is modeled accurately in VPlan.
263 return Type::getVoidTy(C&: Ctx);
264 default:
265 break;
266 }
267 // Type inference not implemented for opcode.
268 LLVM_DEBUG({
269 dbgs() << "LV: Found unhandled opcode for: ";
270 R->getVPSingleValue()->dump();
271 });
272 llvm_unreachable("Unhandled opcode");
273}
274
275Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
276 if (Type *CachedTy = CachedTypes.lookup(Val: V))
277 return CachedTy;
278
279 if (auto *IRV = dyn_cast<VPIRValue>(Val: V))
280 return IRV->getType();
281
282 if (isa<VPSymbolicValue>(Val: V)) {
283 // All VPValues without any underlying IR value (like the vector trip count
284 // or the backedge-taken count) have the same type as the canonical IV.
285 return CanonicalIVTy;
286 }
287
288 Type *ResultTy =
289 TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
290 .Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
291 VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
292 VPWidenPointerInductionRecipe, VPCurrentIterationPHIRecipe>(
293 caseFn: [this](const auto *R) {
294 // Handle header phi recipes, except VPWidenIntOrFpInduction
295 // which needs special handling due it being possibly truncated.
296 // TODO: consider inferring/caching type of siblings, e.g.,
297 // backedge value, here and in cases below.
298 return inferScalarType(V: R->getStartValue());
299 })
300 .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
301 caseFn: [](const auto *R) { return R->getScalarType(); })
302 .Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
303 VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
304 VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe>(
305 caseFn: [this](const VPRecipeBase *R) {
306 return inferScalarType(V: R->getOperand(N: 0));
307 })
308 // VPInstructionWithType must be handled before VPInstruction.
309 .Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
310 VPWidenCastRecipe>(
311 caseFn: [](const auto *R) { return R->getResultType(); })
312 .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
313 VPWidenCallRecipe, VPWidenMemoryRecipe>(
314 caseFn: [this](const auto *R) { return inferScalarTypeForRecipe(R); })
315 .Case(caseFn: [V](const VPInterleaveBase *R) {
316 // TODO: Use info from interleave group.
317 return V->getUnderlyingValue()->getType();
318 })
319 .Case(caseFn: [](const VPExpandSCEVRecipe *R) {
320 return R->getSCEV()->getType();
321 })
322 .Case(caseFn: [this](const VPReductionRecipe *R) {
323 return inferScalarType(V: R->getChainOp());
324 })
325 .Case(caseFn: [this](const VPExpressionRecipe *R) {
326 return inferScalarType(V: R->getOperandOfResultType());
327 });
328
329 assert(ResultTy && "could not infer type for the given VPValue");
330 CachedTypes[V] = ResultTy;
331 return ResultTy;
332}
333
334void llvm::collectEphemeralRecipesForVPlan(
335 VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) {
336 // First, collect seed recipes which are operands of assumes.
337 SmallVector<VPRecipeBase *> Worklist;
338 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
339 Range: vp_depth_first_deep(G: Plan.getVectorLoopRegion()->getEntry()))) {
340 for (VPRecipeBase &R : *VPBB) {
341 auto *RepR = dyn_cast<VPReplicateRecipe>(Val: &R);
342 if (!RepR || !match(V: RepR, P: m_Intrinsic<Intrinsic::assume>()))
343 continue;
344 Worklist.push_back(Elt: RepR);
345 EphRecipes.insert(V: RepR);
346 }
347 }
348
349 // Process operands of candidates in worklist and add them to the set of
350 // ephemeral recipes, if they don't have side-effects and are only used by
351 // other ephemeral recipes.
352 while (!Worklist.empty()) {
353 VPRecipeBase *Cur = Worklist.pop_back_val();
354 for (VPValue *Op : Cur->operands()) {
355 auto *OpR = Op->getDefiningRecipe();
356 if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(V: OpR))
357 continue;
358 if (any_of(Range: Op->users(), P: [EphRecipes](VPUser *U) {
359 auto *UR = dyn_cast<VPRecipeBase>(Val: U);
360 return !UR || !EphRecipes.contains(V: UR);
361 }))
362 continue;
363 EphRecipes.insert(V: OpR);
364 Worklist.push_back(Elt: OpR);
365 }
366 }
367}
368
369template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>(
370 DominatorTreeBase<VPBlockBase, false> &DT);
371
372bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
373 const VPRecipeBase *B) {
374 if (A == B)
375 return false;
376
377 auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
378 for (auto &R : *A->getParent()) {
379 if (&R == A)
380 return true;
381 if (&R == B)
382 return false;
383 }
384 llvm_unreachable("recipe not found");
385 };
386 const VPBlockBase *ParentA = A->getParent();
387 const VPBlockBase *ParentB = B->getParent();
388 if (ParentA == ParentB)
389 return LocalComesBefore(A, B);
390
391#ifndef NDEBUG
392 auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * {
393 VPRegionBlock *Region = R->getRegion();
394 if (Region && Region->isReplicator()) {
395 assert(Region->getNumSuccessors() == 1 &&
396 Region->getNumPredecessors() == 1 && "Expected SESE region!");
397 assert(R->getParent()->size() == 1 &&
398 "A recipe in an original replicator region must be the only "
399 "recipe in its block");
400 return Region;
401 }
402 return nullptr;
403 };
404 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
405 "No replicate regions expected at this point");
406 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
407 "No replicate regions expected at this point");
408#endif
409 return Base::properlyDominates(A: ParentA, B: ParentB);
410}
411
412bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI,
413 unsigned OverrideMaxNumRegs) const {
414 return any_of(Range: MaxLocalUsers, P: [&TTI, &OverrideMaxNumRegs](auto &LU) {
415 return LU.second > (OverrideMaxNumRegs > 0
416 ? OverrideMaxNumRegs
417 : TTI.getNumberOfRegisters(ClassID: LU.first));
418 });
419}
420
421SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
422 VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI,
423 const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
424 // Each 'key' in the map opens a new interval. The values
425 // of the map are the index of the 'last seen' usage of the
426 // VPValue that is the key.
427 using IntervalMap = SmallDenseMap<VPValue *, unsigned, 16>;
428
429 // Maps indices to recipes.
430 SmallVector<VPRecipeBase *, 64> Idx2Recipe;
431 // Marks the end of each interval.
432 IntervalMap EndPoint;
433 // Saves the list of VPValues that are used in the loop.
434 SmallPtrSet<VPValue *, 8> Ends;
435 // Saves the list of values that are used in the loop but are defined outside
436 // the loop (not including non-recipe values such as arguments and
437 // constants).
438 SmallSetVector<VPValue *, 8> LoopInvariants;
439 if (Plan.getVectorTripCount().getNumUsers() > 0)
440 LoopInvariants.insert(X: &Plan.getVectorTripCount());
441
442 // We scan the loop in a topological order in order and assign a number to
443 // each recipe. We use RPO to ensure that defs are met before their users. We
444 // assume that each recipe that has in-loop users starts an interval. We
445 // record every time that an in-loop value is used, so we have a list of the
446 // first occurences of each recipe and last occurrence of each VPValue.
447 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
448 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
449 LoopRegion);
450 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Range: RPOT)) {
451 if (!VPBB->getParent())
452 break;
453 for (VPRecipeBase &R : *VPBB) {
454 Idx2Recipe.push_back(Elt: &R);
455
456 // Save the end location of each USE.
457 for (VPValue *U : R.operands()) {
458 if (isa<VPRecipeValue>(Val: U)) {
459 // Overwrite previous end points.
460 EndPoint[U] = Idx2Recipe.size();
461 Ends.insert(Ptr: U);
462 } else if (auto *IRV = dyn_cast<VPIRValue>(Val: U)) {
463 // Ignore non-recipe values such as arguments, constants, etc.
464 // FIXME: Might need some motivation why these values are ignored. If
465 // for example an argument is used inside the loop it will increase
466 // the register pressure (so shouldn't we add it to LoopInvariants).
467 if (!isa<Instruction>(Val: IRV->getValue()))
468 continue;
469 // This recipe is outside the loop, record it and continue.
470 LoopInvariants.insert(X: U);
471 }
472 // Other types of VPValue are currently not tracked.
473 }
474 }
475 if (VPBB == LoopRegion->getExiting()) {
476 // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
477 // exiting block, where their increment will get materialized eventually.
478 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
479 if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(Val: &R)) {
480 EndPoint[WideIV] = Idx2Recipe.size();
481 Ends.insert(Ptr: WideIV);
482 }
483 }
484 }
485 }
486
487 // Saves the list of intervals that end with the index in 'key'.
488 using VPValueList = SmallVector<VPValue *, 2>;
489 SmallDenseMap<unsigned, VPValueList, 16> TransposeEnds;
490
491 // Next, we transpose the EndPoints into a multi map that holds the list of
492 // intervals that *end* at a specific location.
493 for (auto &Interval : EndPoint)
494 TransposeEnds[Interval.second].push_back(Elt: Interval.first);
495
496 SmallPtrSet<VPValue *, 8> OpenIntervals;
497 SmallVector<VPRegisterUsage, 8> RUs(VFs.size());
498 SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
499
500 LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
501
502 VPTypeAnalysis TypeInfo(Plan);
503
504 const auto &TTICapture = TTI;
505 auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
506 if (Ty->isTokenTy() || !VectorType::isValidElementType(ElemTy: Ty) ||
507 (VF.isScalable() &&
508 !TTICapture.isElementTypeLegalForScalableVector(Ty)))
509 return 0;
510 return TTICapture.getRegUsageForType(Ty: VectorType::get(ElementType: Ty, EC: VF));
511 };
512
513 // We scan the instructions linearly and record each time that a new interval
514 // starts, by placing it in a set. If we find this value in TransposEnds then
515 // we remove it from the set. The max register usage is the maximum register
516 // usage of the recipes of the set.
517 for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
518 VPRecipeBase *R = Idx2Recipe[Idx];
519
520 // Remove all of the VPValues that end at this location.
521 VPValueList &List = TransposeEnds[Idx];
522 for (VPValue *ToRemove : List)
523 OpenIntervals.erase(Ptr: ToRemove);
524
525 // Ignore recipes that are never used within the loop and do not have side
526 // effects.
527 if (none_of(Range: R->definedValues(),
528 P: [&Ends](VPValue *Def) { return Ends.count(Ptr: Def); }) &&
529 !R->mayHaveSideEffects())
530 continue;
531
532 // Skip recipes for ignored values.
533 // TODO: Should mark recipes for ephemeral values that cannot be removed
534 // explictly in VPlan.
535 if (isa<VPSingleDefRecipe>(Val: R) &&
536 ValuesToIgnore.contains(
537 Ptr: cast<VPSingleDefRecipe>(Val: R)->getUnderlyingValue()))
538 continue;
539
540 // For each VF find the maximum usage of registers.
541 for (unsigned J = 0, E = VFs.size(); J < E; ++J) {
542 // Count the number of registers used, per register class, given all open
543 // intervals.
544 // Note that elements in this SmallMapVector will be default constructed
545 // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if
546 // there is no previous entry for ClassID.
547 SmallMapVector<unsigned, unsigned, 4> RegUsage;
548
549 for (auto *VPV : OpenIntervals) {
550 // Skip artificial values or values that weren't present in the original
551 // loop.
552 // TODO: Remove skipping values that weren't present in the original
553 // loop after removing the legacy
554 // LoopVectorizationCostModel::calculateRegisterUsage
555 if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
556 VPBranchOnMaskRecipe>(Val: VPV) ||
557 match(V: VPV, P: m_ExtractLastPart(Op0: m_VPValue())))
558 continue;
559
560 if (VFs[J].isScalar() ||
561 isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
562 VPCurrentIterationPHIRecipe, VPScalarIVStepsRecipe>(Val: VPV) ||
563 (isa<VPInstruction>(Val: VPV) && vputils::onlyScalarValuesUsed(Def: VPV)) ||
564 (isa<VPReductionPHIRecipe>(Val: VPV) &&
565 (cast<VPReductionPHIRecipe>(Val: VPV))->isInLoop())) {
566 unsigned ClassID =
567 TTI.getRegisterClassForType(Vector: false, Ty: TypeInfo.inferScalarType(V: VPV));
568 // FIXME: The target might use more than one register for the type
569 // even in the scalar case.
570 RegUsage[ClassID] += 1;
571 } else {
572 // The output from scaled phis and scaled reductions actually has
573 // fewer lanes than the VF.
574 unsigned ScaleFactor =
575 vputils::getVFScaleFactor(R: VPV->getDefiningRecipe());
576 ElementCount VF = VFs[J];
577 if (ScaleFactor > 1) {
578 VF = VFs[J].divideCoefficientBy(RHS: ScaleFactor);
579 LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J]
580 << " to " << VF << " for " << *R << "\n";);
581 }
582
583 Type *ScalarTy = TypeInfo.inferScalarType(V: VPV);
584 unsigned ClassID = TTI.getRegisterClassForType(Vector: true, Ty: ScalarTy);
585 RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
586 }
587 }
588
589 for (const auto &Pair : RegUsage) {
590 auto &Entry = MaxUsages[J][Pair.first];
591 Entry = std::max(a: Entry, b: Pair.second);
592 }
593 }
594
595 LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
596 << OpenIntervals.size() << '\n');
597
598 // Add used VPValues defined by the current recipe to the list of open
599 // intervals.
600 for (VPValue *DefV : R->definedValues())
601 if (Ends.contains(Ptr: DefV))
602 OpenIntervals.insert(Ptr: DefV);
603 }
604
605 // We also search for instructions that are defined outside the loop, but are
606 // used inside the loop. We need this number separately from the max-interval
607 // usage number because when we unroll, loop-invariant values do not take
608 // more register.
609 VPRegisterUsage RU;
610 for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {
611 // Note that elements in this SmallMapVector will be default constructed
612 // as 0. So we can use "Invariant[ClassID] += n" in the code below even if
613 // there is no previous entry for ClassID.
614 SmallMapVector<unsigned, unsigned, 4> Invariant;
615
616 for (auto *In : LoopInvariants) {
617 // FIXME: The target might use more than one register for the type
618 // even in the scalar case.
619 bool IsScalar = vputils::onlyScalarValuesUsed(Def: In);
620
621 ElementCount VF = IsScalar ? ElementCount::getFixed(MinVal: 1) : VFs[Idx];
622 unsigned ClassID = TTI.getRegisterClassForType(
623 Vector: VF.isVector(), Ty: TypeInfo.inferScalarType(V: In));
624 Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(V: In), VF);
625 }
626
627 LLVM_DEBUG({
628 dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n';
629 dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
630 << " item\n";
631 for (const auto &pair : MaxUsages[Idx]) {
632 dbgs() << "LV(REG): RegisterClass: "
633 << TTI.getRegisterClassName(pair.first) << ", " << pair.second
634 << " registers\n";
635 }
636 dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
637 << " item\n";
638 for (const auto &pair : Invariant) {
639 dbgs() << "LV(REG): RegisterClass: "
640 << TTI.getRegisterClassName(pair.first) << ", " << pair.second
641 << " registers\n";
642 }
643 });
644
645 RU.LoopInvariantRegs = Invariant;
646 RU.MaxLocalUsers = MaxUsages[Idx];
647 RUs[Idx] = RU;
648 }
649
650 return RUs;
651}
652