1//===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "VPlanAnalysis.h"
10#include "VPlan.h"
11#include "VPlanCFG.h"
12#include "VPlanDominatorTree.h"
13#include "VPlanHelpers.h"
14#include "VPlanPatternMatch.h"
15#include "llvm/ADT/PostOrderIterator.h"
16#include "llvm/ADT/TypeSwitch.h"
17#include "llvm/Analysis/ScalarEvolution.h"
18#include "llvm/Analysis/TargetTransformInfo.h"
19#include "llvm/IR/Instruction.h"
20#include "llvm/IR/PatternMatch.h"
21
22using namespace llvm;
23using namespace VPlanPatternMatch;
24
25#define DEBUG_TYPE "vplan"
26
27VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan)
28 : Ctx(Plan.getContext()), DL(Plan.getDataLayout()) {
29 if (auto LoopRegion = Plan.getVectorLoopRegion()) {
30 if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(
31 Val: &LoopRegion->getEntryBasicBlock()->front())) {
32 CanonicalIVTy = CanIV->getScalarType();
33 return;
34 }
35 }
36
37 // If there's no canonical IV, retrieve the type from the trip count
38 // expression.
39 auto *TC = Plan.getTripCount();
40 if (auto *TCIRV = dyn_cast<VPIRValue>(Val: TC)) {
41 CanonicalIVTy = TCIRV->getType();
42 return;
43 }
44 CanonicalIVTy = cast<VPExpandSCEVRecipe>(Val: TC)->getSCEV()->getType();
45}
46
47Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) {
48 Type *ResTy = inferScalarType(V: R->getIncomingValue(Idx: 0));
49 for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) {
50 VPValue *Inc = R->getIncomingValue(Idx: I);
51 assert(inferScalarType(Inc) == ResTy &&
52 "different types inferred for different incoming values");
53 CachedTypes[Inc] = ResTy;
54 }
55 return ResTy;
56}
57
58Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
59 // Set the result type from the first operand, check if the types for all
60 // other operands match and cache them.
61 auto SetResultTyFromOp = [this, R]() {
62 Type *ResTy = inferScalarType(V: R->getOperand(N: 0));
63 unsigned NumOperands = R->getNumOperandsWithoutMask();
64 for (unsigned Op = 1; Op != NumOperands; ++Op) {
65 VPValue *OtherV = R->getOperand(N: Op);
66 assert(inferScalarType(OtherV) == ResTy &&
67 "different types inferred for different operands");
68 CachedTypes[OtherV] = ResTy;
69 }
70 return ResTy;
71 };
72
73 unsigned Opcode = R->getOpcode();
74 if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode))
75 return SetResultTyFromOp();
76
77 switch (Opcode) {
78 case Instruction::ExtractElement:
79 case Instruction::Freeze:
80 case Instruction::PHI:
81 case VPInstruction::Broadcast:
82 case VPInstruction::ComputeReductionResult:
83 case VPInstruction::ExitingIVValue:
84 case VPInstruction::ExtractLastLane:
85 case VPInstruction::ExtractPenultimateElement:
86 case VPInstruction::ExtractLastPart:
87 case VPInstruction::ExtractLastActive:
88 case VPInstruction::PtrAdd:
89 case VPInstruction::WidePtrAdd:
90 case VPInstruction::ReductionStartVector:
91 case VPInstruction::ResumeForEpilogue:
92 case VPInstruction::Reverse:
93 return inferScalarType(V: R->getOperand(N: 0));
94 case Instruction::Select: {
95 Type *ResTy = inferScalarType(V: R->getOperand(N: 1));
96 VPValue *OtherV = R->getOperand(N: 2);
97 assert(inferScalarType(OtherV) == ResTy &&
98 "different types inferred for different operands");
99 CachedTypes[OtherV] = ResTy;
100 return ResTy;
101 }
102 case Instruction::ICmp:
103 case Instruction::FCmp:
104 case VPInstruction::ActiveLaneMask:
105 assert(inferScalarType(R->getOperand(0)) ==
106 inferScalarType(R->getOperand(1)) &&
107 "different types inferred for different operands");
108 return IntegerType::get(C&: Ctx, NumBits: 1);
109 case VPInstruction::ComputeAnyOfResult:
110 return inferScalarType(V: R->getOperand(N: 1));
111 case VPInstruction::ExplicitVectorLength:
112 return Type::getIntNTy(C&: Ctx, N: 32);
113 case VPInstruction::FirstOrderRecurrenceSplice:
114 case VPInstruction::Not:
115 case VPInstruction::CalculateTripCountMinusVF:
116 case VPInstruction::CanonicalIVIncrementForPart:
117 case VPInstruction::AnyOf:
118 case VPInstruction::BuildStructVector:
119 case VPInstruction::BuildVector:
120 case VPInstruction::Unpack:
121 return SetResultTyFromOp();
122 case VPInstruction::ExtractLane:
123 return inferScalarType(V: R->getOperand(N: 1));
124 case VPInstruction::FirstActiveLane:
125 case VPInstruction::LastActiveLane:
126 // Assume that the maximum possible number of elements in a vector fits
127 // within the index type for the default address space.
128 return DL.getIndexType(C&: Ctx, AddressSpace: 0);
129 case VPInstruction::LogicalAnd:
130 case VPInstruction::LogicalOr:
131 assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) &&
132 inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
133 "LogicalAnd/Or operands should be bool");
134 return IntegerType::get(C&: Ctx, NumBits: 1);
135 case VPInstruction::MaskedCond:
136 assert(inferScalarType(R->getOperand(0))->isIntegerTy(1));
137 return IntegerType::get(C&: Ctx, NumBits: 1);
138 case VPInstruction::BranchOnCond:
139 case VPInstruction::BranchOnTwoConds:
140 case VPInstruction::BranchOnCount:
141 case Instruction::Store:
142 return Type::getVoidTy(C&: Ctx);
143 case Instruction::Load:
144 return cast<LoadInst>(Val: R->getUnderlyingValue())->getType();
145 case Instruction::Alloca:
146 return cast<AllocaInst>(Val: R->getUnderlyingValue())->getType();
147 case Instruction::Call: {
148 unsigned CallIdx = R->getNumOperandsWithoutMask() - 1;
149 return cast<Function>(Val: R->getOperand(N: CallIdx)->getLiveInIRValue())
150 ->getReturnType();
151 }
152 case Instruction::GetElementPtr:
153 return inferScalarType(V: R->getOperand(N: 0));
154 case Instruction::ExtractValue:
155 return cast<ExtractValueInst>(Val: R->getUnderlyingValue())->getType();
156 default:
157 break;
158 }
159 // Type inference not implemented for opcode.
160 LLVM_DEBUG({
161 dbgs() << "LV: Found unhandled opcode for: ";
162 R->getVPSingleValue()->dump();
163 });
164 llvm_unreachable("Unhandled opcode!");
165}
166
167Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {
168 unsigned Opcode = R->getOpcode();
169 if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
170 Instruction::isBitwiseLogicOp(Opcode)) {
171 Type *ResTy = inferScalarType(V: R->getOperand(N: 0));
172 assert(ResTy == inferScalarType(R->getOperand(1)) &&
173 "types for both operands must match for binary op");
174 CachedTypes[R->getOperand(N: 1)] = ResTy;
175 return ResTy;
176 }
177
178 switch (Opcode) {
179 case Instruction::ICmp:
180 case Instruction::FCmp:
181 return IntegerType::get(C&: Ctx, NumBits: 1);
182 case Instruction::FNeg:
183 case Instruction::Freeze:
184 return inferScalarType(V: R->getOperand(N: 0));
185 case Instruction::ExtractValue: {
186 assert(R->getNumOperands() == 2 && "expected single level extractvalue");
187 auto *StructTy = cast<StructType>(Val: inferScalarType(V: R->getOperand(N: 0)));
188 return StructTy->getTypeAtIndex(
189 N: cast<VPConstantInt>(Val: R->getOperand(N: 1))->getZExtValue());
190 }
191 case Instruction::Select: {
192 Type *ResTy = inferScalarType(V: R->getOperand(N: 1));
193 VPValue *OtherV = R->getOperand(N: 2);
194 assert(inferScalarType(OtherV) == ResTy &&
195 "different types inferred for different operands");
196 CachedTypes[OtherV] = ResTy;
197 return ResTy;
198 }
199 default:
200 break;
201 }
202
203 // Type inference not implemented for opcode.
204 LLVM_DEBUG({
205 dbgs() << "LV: Found unhandled opcode for: ";
206 R->getVPSingleValue()->dump();
207 });
208 llvm_unreachable("Unhandled opcode!");
209}
210
211Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
212 auto &CI = *cast<CallInst>(Val: R->getUnderlyingInstr());
213 return CI.getType();
214}
215
216Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
217 assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
218 "Store recipes should not define any values");
219 return cast<LoadInst>(Val: &R->getIngredient())->getType();
220}
221
222Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) {
223 unsigned Opcode = R->getUnderlyingInstr()->getOpcode();
224
225 if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
226 Instruction::isBitwiseLogicOp(Opcode)) {
227 Type *ResTy = inferScalarType(V: R->getOperand(N: 0));
228 assert(ResTy == inferScalarType(R->getOperand(1)) &&
229 "inferred types for operands of binary op don't match");
230 CachedTypes[R->getOperand(N: 1)] = ResTy;
231 return ResTy;
232 }
233
234 if (Instruction::isCast(Opcode))
235 return R->getUnderlyingInstr()->getType();
236
237 switch (Opcode) {
238 case Instruction::Call: {
239 unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1);
240 return cast<Function>(Val: R->getOperand(N: CallIdx)->getLiveInIRValue())
241 ->getReturnType();
242 }
243 case Instruction::Select: {
244 Type *ResTy = inferScalarType(V: R->getOperand(N: 1));
245 assert(ResTy == inferScalarType(R->getOperand(2)) &&
246 "inferred types for operands of select op don't match");
247 CachedTypes[R->getOperand(N: 2)] = ResTy;
248 return ResTy;
249 }
250 case Instruction::ICmp:
251 case Instruction::FCmp:
252 return IntegerType::get(C&: Ctx, NumBits: 1);
253 case Instruction::Alloca:
254 case Instruction::ExtractValue:
255 return R->getUnderlyingInstr()->getType();
256 case Instruction::Freeze:
257 case Instruction::FNeg:
258 case Instruction::GetElementPtr:
259 return inferScalarType(V: R->getOperand(N: 0));
260 case Instruction::Load:
261 return cast<LoadInst>(Val: R->getUnderlyingInstr())->getType();
262 case Instruction::Store:
263 // FIXME: VPReplicateRecipes with store opcodes still define a result
264 // VPValue, so we need to handle them here. Remove the code here once this
265 // is modeled accurately in VPlan.
266 return Type::getVoidTy(C&: Ctx);
267 default:
268 break;
269 }
270 // Type inference not implemented for opcode.
271 LLVM_DEBUG({
272 dbgs() << "LV: Found unhandled opcode for: ";
273 R->getVPSingleValue()->dump();
274 });
275 llvm_unreachable("Unhandled opcode");
276}
277
278Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
279 if (Type *CachedTy = CachedTypes.lookup(Val: V))
280 return CachedTy;
281
282 if (auto *IRV = dyn_cast<VPIRValue>(Val: V))
283 return IRV->getType();
284
285 if (isa<VPSymbolicValue>(Val: V)) {
286 // All VPValues without any underlying IR value (like the vector trip count
287 // or the backedge-taken count) have the same type as the canonical IV.
288 return CanonicalIVTy;
289 }
290
291 Type *ResultTy =
292 TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
293 .Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
294 VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
295 VPWidenPointerInductionRecipe, VPCurrentIterationPHIRecipe>(
296 caseFn: [this](const auto *R) {
297 // Handle header phi recipes, except VPWidenIntOrFpInduction
298 // which needs special handling due it being possibly truncated.
299 // TODO: consider inferring/caching type of siblings, e.g.,
300 // backedge value, here and in cases below.
301 return inferScalarType(V: R->getStartValue());
302 })
303 .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
304 caseFn: [](const auto *R) { return R->getScalarType(); })
305 .Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
306 VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
307 VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe>(
308 caseFn: [this](const VPRecipeBase *R) {
309 return inferScalarType(V: R->getOperand(N: 0));
310 })
311 // VPInstructionWithType must be handled before VPInstruction.
312 .Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
313 VPWidenCastRecipe>(
314 caseFn: [](const auto *R) { return R->getResultType(); })
315 .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
316 VPWidenCallRecipe, VPWidenMemoryRecipe>(
317 caseFn: [this](const auto *R) { return inferScalarTypeForRecipe(R); })
318 .Case(caseFn: [V](const VPInterleaveBase *R) {
319 // TODO: Use info from interleave group.
320 return V->getUnderlyingValue()->getType();
321 })
322 .Case(caseFn: [](const VPExpandSCEVRecipe *R) {
323 return R->getSCEV()->getType();
324 })
325 .Case(caseFn: [this](const VPReductionRecipe *R) {
326 return inferScalarType(V: R->getChainOp());
327 })
328 .Case(caseFn: [this](const VPExpressionRecipe *R) {
329 return inferScalarType(V: R->getOperandOfResultType());
330 });
331
332 assert(ResultTy && "could not infer type for the given VPValue");
333 CachedTypes[V] = ResultTy;
334 return ResultTy;
335}
336
337void llvm::collectEphemeralRecipesForVPlan(
338 VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) {
339 // First, collect seed recipes which are operands of assumes.
340 SmallVector<VPRecipeBase *> Worklist;
341 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
342 Range: vp_depth_first_deep(G: Plan.getVectorLoopRegion()->getEntry()))) {
343 for (VPRecipeBase &R : *VPBB) {
344 auto *RepR = dyn_cast<VPReplicateRecipe>(Val: &R);
345 if (!RepR || !match(V: RepR, P: m_Intrinsic<Intrinsic::assume>()))
346 continue;
347 Worklist.push_back(Elt: RepR);
348 EphRecipes.insert(V: RepR);
349 }
350 }
351
352 // Process operands of candidates in worklist and add them to the set of
353 // ephemeral recipes, if they don't have side-effects and are only used by
354 // other ephemeral recipes.
355 while (!Worklist.empty()) {
356 VPRecipeBase *Cur = Worklist.pop_back_val();
357 for (VPValue *Op : Cur->operands()) {
358 auto *OpR = Op->getDefiningRecipe();
359 if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(V: OpR))
360 continue;
361 if (any_of(Range: Op->users(), P: [EphRecipes](VPUser *U) {
362 auto *UR = dyn_cast<VPRecipeBase>(Val: U);
363 return !UR || !EphRecipes.contains(V: UR);
364 }))
365 continue;
366 EphRecipes.insert(V: OpR);
367 Worklist.push_back(Elt: OpR);
368 }
369 }
370}
371
372template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>(
373 DominatorTreeBase<VPBlockBase, false> &DT);
374
375bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
376 const VPRecipeBase *B) {
377 if (A == B)
378 return false;
379
380 auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
381 for (auto &R : *A->getParent()) {
382 if (&R == A)
383 return true;
384 if (&R == B)
385 return false;
386 }
387 llvm_unreachable("recipe not found");
388 };
389 const VPBlockBase *ParentA = A->getParent();
390 const VPBlockBase *ParentB = B->getParent();
391 if (ParentA == ParentB)
392 return LocalComesBefore(A, B);
393
394#ifndef NDEBUG
395 auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * {
396 VPRegionBlock *Region = R->getRegion();
397 if (Region && Region->isReplicator()) {
398 assert(Region->getNumSuccessors() == 1 &&
399 Region->getNumPredecessors() == 1 && "Expected SESE region!");
400 assert(R->getParent()->size() == 1 &&
401 "A recipe in an original replicator region must be the only "
402 "recipe in its block");
403 return Region;
404 }
405 return nullptr;
406 };
407 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
408 "No replicate regions expected at this point");
409 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
410 "No replicate regions expected at this point");
411#endif
412 return Base::properlyDominates(A: ParentA, B: ParentB);
413}
414
415InstructionCost VPRegisterUsage::spillCost(VPCostContext &Ctx,
416 unsigned OverrideMaxNumRegs) const {
417 InstructionCost Cost;
418 for (const auto &[RegClass, MaxUsers] : MaxLocalUsers) {
419 unsigned AvailableRegs = OverrideMaxNumRegs > 0
420 ? OverrideMaxNumRegs
421 : Ctx.TTI.getNumberOfRegisters(ClassID: RegClass);
422 if (MaxUsers > AvailableRegs) {
423 // Assume that for each register used past what's available we get one
424 // spill and reload.
425 unsigned Spills = MaxUsers - AvailableRegs;
426 InstructionCost SpillCost =
427 Ctx.TTI.getRegisterClassSpillCost(ClassID: RegClass, CostKind: Ctx.CostKind) +
428 Ctx.TTI.getRegisterClassReloadCost(ClassID: RegClass, CostKind: Ctx.CostKind);
429 InstructionCost TotalCost = Spills * SpillCost;
430 LLVM_DEBUG(dbgs() << "LV(REG): Cost of " << TotalCost << " from "
431 << Spills << " spills of "
432 << Ctx.TTI.getRegisterClassName(RegClass) << "\n");
433 Cost += TotalCost;
434 }
435 }
436 return Cost;
437}
438
439SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
440 VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI,
441 const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
442 // Each 'key' in the map opens a new interval. The values
443 // of the map are the index of the 'last seen' usage of the
444 // VPValue that is the key.
445 using IntervalMap = SmallDenseMap<VPValue *, unsigned, 16>;
446
447 // Maps indices to recipes.
448 SmallVector<VPRecipeBase *, 64> Idx2Recipe;
449 // Marks the end of each interval.
450 IntervalMap EndPoint;
451 // Saves the list of VPValues that are used in the loop.
452 SmallPtrSet<VPValue *, 8> Ends;
453 // Saves the list of values that are used in the loop but are defined outside
454 // the loop (not including non-recipe values such as arguments and
455 // constants).
456 SmallSetVector<VPValue *, 8> LoopInvariants;
457 if (Plan.getVectorTripCount().getNumUsers() > 0)
458 LoopInvariants.insert(X: &Plan.getVectorTripCount());
459
460 // We scan the loop in a topological order in order and assign a number to
461 // each recipe. We use RPO to ensure that defs are met before their users. We
462 // assume that each recipe that has in-loop users starts an interval. We
463 // record every time that an in-loop value is used, so we have a list of the
464 // first occurences of each recipe and last occurrence of each VPValue.
465 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
466 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
467 LoopRegion);
468 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Range: RPOT)) {
469 if (!VPBB->getParent())
470 break;
471 for (VPRecipeBase &R : *VPBB) {
472 Idx2Recipe.push_back(Elt: &R);
473
474 // Save the end location of each USE.
475 for (VPValue *U : R.operands()) {
476 if (isa<VPRecipeValue>(Val: U)) {
477 // Overwrite previous end points.
478 EndPoint[U] = Idx2Recipe.size();
479 Ends.insert(Ptr: U);
480 } else if (auto *IRV = dyn_cast<VPIRValue>(Val: U)) {
481 // Ignore non-recipe values such as arguments, constants, etc.
482 // FIXME: Might need some motivation why these values are ignored. If
483 // for example an argument is used inside the loop it will increase
484 // the register pressure (so shouldn't we add it to LoopInvariants).
485 if (!isa<Instruction>(Val: IRV->getValue()))
486 continue;
487 // This recipe is outside the loop, record it and continue.
488 LoopInvariants.insert(X: U);
489 }
490 // Other types of VPValue are currently not tracked.
491 }
492 }
493 if (VPBB == LoopRegion->getExiting()) {
494 // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
495 // exiting block, where their increment will get materialized eventually.
496 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
497 if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(Val: &R)) {
498 EndPoint[WideIV] = Idx2Recipe.size();
499 Ends.insert(Ptr: WideIV);
500 }
501 }
502 }
503 }
504
505 // Saves the list of intervals that end with the index in 'key'.
506 using VPValueList = SmallVector<VPValue *, 2>;
507 SmallDenseMap<unsigned, VPValueList, 16> TransposeEnds;
508
509 // Next, we transpose the EndPoints into a multi map that holds the list of
510 // intervals that *end* at a specific location.
511 for (auto &Interval : EndPoint)
512 TransposeEnds[Interval.second].push_back(Elt: Interval.first);
513
514 SmallPtrSet<VPValue *, 8> OpenIntervals;
515 SmallVector<VPRegisterUsage, 8> RUs(VFs.size());
516 SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
517
518 LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
519
520 VPTypeAnalysis TypeInfo(Plan);
521
522 const auto &TTICapture = TTI;
523 auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
524 if (Ty->isTokenTy() || !VectorType::isValidElementType(ElemTy: Ty) ||
525 (VF.isScalable() &&
526 !TTICapture.isElementTypeLegalForScalableVector(Ty)))
527 return 0;
528 return TTICapture.getRegUsageForType(Ty: VectorType::get(ElementType: Ty, EC: VF));
529 };
530
531 // We scan the instructions linearly and record each time that a new interval
532 // starts, by placing it in a set. If we find this value in TransposEnds then
533 // we remove it from the set. The max register usage is the maximum register
534 // usage of the recipes of the set.
535 for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
536 VPRecipeBase *R = Idx2Recipe[Idx];
537
538 // Remove all of the VPValues that end at this location.
539 VPValueList &List = TransposeEnds[Idx];
540 for (VPValue *ToRemove : List)
541 OpenIntervals.erase(Ptr: ToRemove);
542
543 // Ignore recipes that are never used within the loop and do not have side
544 // effects.
545 if (none_of(Range: R->definedValues(),
546 P: [&Ends](VPValue *Def) { return Ends.count(Ptr: Def); }) &&
547 !R->mayHaveSideEffects())
548 continue;
549
550 // Skip recipes for ignored values.
551 // TODO: Should mark recipes for ephemeral values that cannot be removed
552 // explictly in VPlan.
553 if (isa<VPSingleDefRecipe>(Val: R) &&
554 ValuesToIgnore.contains(
555 Ptr: cast<VPSingleDefRecipe>(Val: R)->getUnderlyingValue()))
556 continue;
557
558 // For each VF find the maximum usage of registers.
559 for (unsigned J = 0, E = VFs.size(); J < E; ++J) {
560 // Count the number of registers used, per register class, given all open
561 // intervals.
562 // Note that elements in this SmallMapVector will be default constructed
563 // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if
564 // there is no previous entry for ClassID.
565 SmallMapVector<unsigned, unsigned, 4> RegUsage;
566
567 for (auto *VPV : OpenIntervals) {
568 // Skip artificial values or values that weren't present in the original
569 // loop.
570 // TODO: Remove skipping values that weren't present in the original
571 // loop after removing the legacy
572 // LoopVectorizationCostModel::calculateRegisterUsage
573 if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
574 VPBranchOnMaskRecipe>(Val: VPV) ||
575 match(V: VPV, P: m_ExtractLastPart(Op0: m_VPValue())))
576 continue;
577
578 if (VFs[J].isScalar() ||
579 isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
580 VPCurrentIterationPHIRecipe, VPScalarIVStepsRecipe>(Val: VPV) ||
581 (isa<VPInstruction>(Val: VPV) && vputils::onlyScalarValuesUsed(Def: VPV)) ||
582 (isa<VPReductionPHIRecipe>(Val: VPV) &&
583 (cast<VPReductionPHIRecipe>(Val: VPV))->isInLoop())) {
584 unsigned ClassID =
585 TTI.getRegisterClassForType(Vector: false, Ty: TypeInfo.inferScalarType(V: VPV));
586 // FIXME: The target might use more than one register for the type
587 // even in the scalar case.
588 RegUsage[ClassID] += 1;
589 } else {
590 // The output from scaled phis and scaled reductions actually has
591 // fewer lanes than the VF.
592 unsigned ScaleFactor =
593 vputils::getVFScaleFactor(R: VPV->getDefiningRecipe());
594 ElementCount VF = VFs[J];
595 if (ScaleFactor > 1) {
596 VF = VFs[J].divideCoefficientBy(RHS: ScaleFactor);
597 LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J]
598 << " to " << VF << " for " << *R << "\n";);
599 }
600
601 Type *ScalarTy = TypeInfo.inferScalarType(V: VPV);
602 unsigned ClassID = TTI.getRegisterClassForType(Vector: true, Ty: ScalarTy);
603 RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
604 }
605 }
606
607 for (const auto &Pair : RegUsage) {
608 auto &Entry = MaxUsages[J][Pair.first];
609 Entry = std::max(a: Entry, b: Pair.second);
610 }
611 }
612
613 LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
614 << OpenIntervals.size() << '\n');
615
616 // Add used VPValues defined by the current recipe to the list of open
617 // intervals.
618 for (VPValue *DefV : R->definedValues())
619 if (Ends.contains(Ptr: DefV))
620 OpenIntervals.insert(Ptr: DefV);
621 }
622
623 // We also search for instructions that are defined outside the loop, but are
624 // used inside the loop. We need this number separately from the max-interval
625 // usage number because when we unroll, loop-invariant values do not take
626 // more register.
627 VPRegisterUsage RU;
628 for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {
629 // Note that elements in this SmallMapVector will be default constructed
630 // as 0. So we can use "Invariant[ClassID] += n" in the code below even if
631 // there is no previous entry for ClassID.
632 SmallMapVector<unsigned, unsigned, 4> Invariant;
633
634 for (auto *In : LoopInvariants) {
635 // FIXME: The target might use more than one register for the type
636 // even in the scalar case.
637 bool IsScalar = vputils::onlyScalarValuesUsed(Def: In);
638
639 ElementCount VF = IsScalar ? ElementCount::getFixed(MinVal: 1) : VFs[Idx];
640 unsigned ClassID = TTI.getRegisterClassForType(
641 Vector: VF.isVector(), Ty: TypeInfo.inferScalarType(V: In));
642 Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(V: In), VF);
643 }
644
645 LLVM_DEBUG({
646 dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n';
647 dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
648 << " item\n";
649 for (const auto &pair : MaxUsages[Idx]) {
650 dbgs() << "LV(REG): RegisterClass: "
651 << TTI.getRegisterClassName(pair.first) << ", " << pair.second
652 << " registers\n";
653 }
654 dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
655 << " item\n";
656 for (const auto &pair : Invariant) {
657 dbgs() << "LV(REG): RegisterClass: "
658 << TTI.getRegisterClassName(pair.first) << ", " << pair.second
659 << " registers\n";
660 }
661 });
662
663 RU.LoopInvariantRegs = Invariant;
664 RU.MaxLocalUsers = MaxUsages[Idx];
665 RUs[Idx] = RU;
666 }
667
668 return RUs;
669}
670