VPlanAnalysis.cpp source code [llvm_projects/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp]

1	//===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "VPlanAnalysis.h"
10	#include "VPlan.h"
11	#include "VPlanCFG.h"
12	#include "VPlanDominatorTree.h"
13	#include "VPlanHelpers.h"
14	#include "VPlanPatternMatch.h"
15	#include "llvm/ADT/PostOrderIterator.h"
16	#include "llvm/ADT/TypeSwitch.h"
17	#include "llvm/Analysis/ScalarEvolution.h"
18	#include "llvm/Analysis/TargetTransformInfo.h"
19	#include "llvm/IR/Instruction.h"
20	#include "llvm/IR/PatternMatch.h"
21
22	using namespace llvm;
23	using namespace VPlanPatternMatch;
24
25	#define DEBUG_TYPE "vplan"
26
27	VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) : Ctx(Plan.getContext()) {
28	if (auto LoopRegion = Plan.getVectorLoopRegion()) {
29	if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(
30	Val: &LoopRegion->getEntryBasicBlock()->front())) {
31	CanonicalIVTy = CanIV->getScalarType();
32	return;
33	}
34	}
35
36	// If there's no canonical IV, retrieve the type from the trip count
37	// expression.
38	auto *TC = Plan.getTripCount();
39	if (auto *TCIRV = dyn_cast<VPIRValue>(Val: TC)) {
40	CanonicalIVTy = TCIRV->getType();
41	return;
42	}
43	CanonicalIVTy = cast<VPExpandSCEVRecipe>(Val: TC)->getSCEV()->getType();
44	}
45
46	Type VPTypeAnalysis::inferScalarTypeForRecipe(const* VPBlendRecipe *R) {
47	Type *ResTy = inferScalarType(V: R->getIncomingValue(Idx: `0`));
48	for (unsigned I = `1`, E = R->getNumIncomingValues(); I != E; ++I) {
49	VPValue *Inc = R->getIncomingValue(Idx: I);
50	assert(inferScalarType(Inc) == ResTy &&
51	"different types inferred for different incoming values");
52	CachedTypes [Inc] = ResTy;
53	}
54	return ResTy;
55	}
56
57	Type VPTypeAnalysis::inferScalarTypeForRecipe(const* VPInstruction *R) {
58	// Set the result type from the first operand, check if the types for all
59	// other operands match and cache them.
60	auto SetResultTyFromOp = [this, R]() {
61	Type *ResTy = inferScalarType(V: R->getOperand(N: `0`));
62	unsigned NumOperands = R->getNumOperandsWithoutMask();
63	for (unsigned Op = `1`; Op != NumOperands; ++Op) {
64	VPValue *OtherV = R->getOperand(N: Op);
65	assert(inferScalarType(OtherV) == ResTy &&
66	"different types inferred for different operands");
67	CachedTypes [OtherV] = ResTy;
68	}
69	return ResTy;
70	};
71
72	unsigned Opcode = R->getOpcode();
73	if (Instruction::isBinaryOp(Opcode) \|\| Instruction::isUnaryOp(Opcode))
74	return SetResultTyFromOp ();
75
76	switch (Opcode) {
77	case Instruction::ExtractElement:
78	case Instruction::Freeze:
79	case Instruction::PHI:
80	case VPInstruction::Broadcast:
81	case VPInstruction::ComputeReductionResult:
82	case VPInstruction::ExitingIVValue:
83	case VPInstruction::ExtractLastLane:
84	case VPInstruction::ExtractPenultimateElement:
85	case VPInstruction::ExtractLastPart:
86	case VPInstruction::ExtractLastActive:
87	case VPInstruction::PtrAdd:
88	case VPInstruction::WidePtrAdd:
89	case VPInstruction::ReductionStartVector:
90	case VPInstruction::ResumeForEpilogue:
91	case VPInstruction::Reverse:
92	return inferScalarType(V: R->getOperand(N: `0`));
93	case Instruction::Select: {
94	Type *ResTy = inferScalarType(V: R->getOperand(N: `1`));
95	VPValue *OtherV = R->getOperand(N: `2`);
96	assert(inferScalarType(OtherV) == ResTy &&
97	"different types inferred for different operands");
98	CachedTypes [OtherV] = ResTy;
99	return ResTy;
100	}
101	case Instruction::ICmp:
102	case Instruction::FCmp:
103	case VPInstruction::ActiveLaneMask:
104	assert(inferScalarType(R->getOperand(`0`)) ==
105	inferScalarType(R->getOperand(`1`)) &&
106	"different types inferred for different operands");
107	return IntegerType::get(C&: Ctx, NumBits: `1`);
108	case VPInstruction::ComputeAnyOfResult:
109	return inferScalarType(V: R->getOperand(N: `1`));
110	case VPInstruction::ExplicitVectorLength:
111	return Type::getIntNTy(C&: Ctx, N: `32`);
112	case VPInstruction::FirstOrderRecurrenceSplice:
113	case VPInstruction::Not:
114	case VPInstruction::CalculateTripCountMinusVF:
115	case VPInstruction::CanonicalIVIncrementForPart:
116	case VPInstruction::AnyOf:
117	case VPInstruction::BuildStructVector:
118	case VPInstruction::BuildVector:
119	case VPInstruction::Unpack:
120	return SetResultTyFromOp ();
121	case VPInstruction::ExtractLane:
122	return inferScalarType(V: R->getOperand(N: `1`));
123	case VPInstruction::FirstActiveLane:
124	case VPInstruction::LastActiveLane:
125	return Type::getIntNTy(C&: Ctx, N: `64`);
126	case VPInstruction::LogicalAnd:
127	case VPInstruction::LogicalOr:
128	assert(inferScalarType(R->getOperand(`0`))->isIntegerTy(`1`) &&
129	inferScalarType(R->getOperand(`1`))->isIntegerTy(`1`) &&
130	"LogicalAnd/Or operands should be bool");
131	return IntegerType::get(C&: Ctx, NumBits: `1`);
132	case VPInstruction::MaskedCond:
133	assert(inferScalarType(R->getOperand(`0`))->isIntegerTy(`1`));
134	return IntegerType::get(C&: Ctx, NumBits: `1`);
135	case VPInstruction::BranchOnCond:
136	case VPInstruction::BranchOnTwoConds:
137	case VPInstruction::BranchOnCount:
138	case Instruction::Store:
139	return Type::getVoidTy(C&: Ctx);
140	case Instruction::Load:
141	return cast<LoadInst>(Val: R->getUnderlyingValue())->getType();
142	case Instruction::Alloca:
143	return cast<AllocaInst>(Val: R->getUnderlyingValue())->getType();
144	case Instruction::Call: {
145	unsigned CallIdx = R->getNumOperandsWithoutMask() - `1`;
146	return cast<Function>(Val: R->getOperand(N: CallIdx)->getLiveInIRValue())
147	->getReturnType();
148	}
149	case Instruction::GetElementPtr:
150	return inferScalarType(V: R->getOperand(N: `0`));
151	case Instruction::ExtractValue:
152	return cast<ExtractValueInst>(Val: R->getUnderlyingValue())->getType();
153	default:
154	break;
155	}
156	// Type inference not implemented for opcode.
157	LLVM_DEBUG({
158	dbgs() << "LV: Found unhandled opcode for: ";
159	R->getVPSingleValue()->dump();
160	});
161	llvm_unreachable("Unhandled opcode!");
162	}
163
164	Type VPTypeAnalysis::inferScalarTypeForRecipe(const* VPWidenRecipe *R) {
165	unsigned Opcode = R->getOpcode();
166	if (Instruction::isBinaryOp(Opcode) \|\| Instruction::isShift(Opcode) \|\|
167	Instruction::isBitwiseLogicOp(Opcode)) {
168	Type *ResTy = inferScalarType(V: R->getOperand(N: `0`));
169	assert(ResTy == inferScalarType(R->getOperand(`1`)) &&
170	"types for both operands must match for binary op");
171	CachedTypes [R->getOperand(N: `1`)] = ResTy;
172	return ResTy;
173	}
174
175	switch (Opcode) {
176	case Instruction::ICmp:
177	case Instruction::FCmp:
178	return IntegerType::get(C&: Ctx, NumBits: `1`);
179	case Instruction::FNeg:
180	case Instruction::Freeze:
181	return inferScalarType(V: R->getOperand(N: `0`));
182	case Instruction::ExtractValue: {
183	assert(R->getNumOperands() == `2` && "expected single level extractvalue");
184	auto *StructTy = cast<StructType>(Val: inferScalarType(V: R->getOperand(N: `0`)));
185	return StructTy->getTypeAtIndex(
186	N: cast<VPConstantInt>(Val: R->getOperand(N: `1`))->getZExtValue());
187	}
188	case Instruction::Select: {
189	Type *ResTy = inferScalarType(V: R->getOperand(N: `1`));
190	VPValue *OtherV = R->getOperand(N: `2`);
191	assert(inferScalarType(OtherV) == ResTy &&
192	"different types inferred for different operands");
193	CachedTypes [OtherV] = ResTy;
194	return ResTy;
195	}
196	default:
197	break;
198	}
199
200	// Type inference not implemented for opcode.
201	LLVM_DEBUG({
202	dbgs() << "LV: Found unhandled opcode for: ";
203	R->getVPSingleValue()->dump();
204	});
205	llvm_unreachable("Unhandled opcode!");
206	}
207
208	Type VPTypeAnalysis::inferScalarTypeForRecipe(const* VPWidenCallRecipe *R) {
209	auto &CI = *cast<CallInst>(Val: R->getUnderlyingInstr());
210	return CI.getType();
211	}
212
213	Type VPTypeAnalysis::inferScalarTypeForRecipe(const* VPWidenMemoryRecipe *R) {
214	assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
215	"Store recipes should not define any values");
216	return cast<LoadInst>(Val: &R->getIngredient())->getType();
217	}
218
219	Type VPTypeAnalysis::inferScalarTypeForRecipe(const* VPReplicateRecipe *R) {
220	unsigned Opcode = R->getUnderlyingInstr()->getOpcode();
221
222	if (Instruction::isBinaryOp(Opcode) \|\| Instruction::isShift(Opcode) \|\|
223	Instruction::isBitwiseLogicOp(Opcode)) {
224	Type *ResTy = inferScalarType(V: R->getOperand(N: `0`));
225	assert(ResTy == inferScalarType(R->getOperand(`1`)) &&
226	"inferred types for operands of binary op don't match");
227	CachedTypes [R->getOperand(N: `1`)] = ResTy;
228	return ResTy;
229	}
230
231	if (Instruction::isCast(Opcode))
232	return R->getUnderlyingInstr()->getType();
233
234	switch (Opcode) {
235	case Instruction::Call: {
236	unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? `2` : `1`);
237	return cast<Function>(Val: R->getOperand(N: CallIdx)->getLiveInIRValue())
238	->getReturnType();
239	}
240	case Instruction::Select: {
241	Type *ResTy = inferScalarType(V: R->getOperand(N: `1`));
242	assert(ResTy == inferScalarType(R->getOperand(`2`)) &&
243	"inferred types for operands of select op don't match");
244	CachedTypes [R->getOperand(N: `2`)] = ResTy;
245	return ResTy;
246	}
247	case Instruction::ICmp:
248	case Instruction::FCmp:
249	return IntegerType::get(C&: Ctx, NumBits: `1`);
250	case Instruction::Alloca:
251	case Instruction::ExtractValue:
252	return R->getUnderlyingInstr()->getType();
253	case Instruction::Freeze:
254	case Instruction::FNeg:
255	case Instruction::GetElementPtr:
256	return inferScalarType(V: R->getOperand(N: `0`));
257	case Instruction::Load:
258	return cast<LoadInst>(Val: R->getUnderlyingInstr())->getType();
259	case Instruction::Store:
260	// FIXME: VPReplicateRecipes with store opcodes still define a result
261	// VPValue, so we need to handle them here. Remove the code here once this
262	// is modeled accurately in VPlan.
263	return Type::getVoidTy(C&: Ctx);
264	default:
265	break;
266	}
267	// Type inference not implemented for opcode.
268	LLVM_DEBUG({
269	dbgs() << "LV: Found unhandled opcode for: ";
270	R->getVPSingleValue()->dump();
271	});
272	llvm_unreachable("Unhandled opcode");
273	}
274
275	Type VPTypeAnalysis::inferScalarType(const* VPValue *V) {
276	if (Type *CachedTy = CachedTypes.lookup(Val: V))
277	return CachedTy;
278
279	if (auto *IRV = dyn_cast<VPIRValue>(Val: V))
280	return IRV->getType();
281
282	if (isa<VPSymbolicValue>(Val: V)) {
283	// All VPValues without any underlying IR value (like the vector trip count
284	// or the backedge-taken count) have the same type as the canonical IV.
285	return CanonicalIVTy;
286	}
287
288	Type *ResultTy =
289	TypeSwitch<const VPRecipeBase , Type >(V->getDefiningRecipe())
290	.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
291	VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
292	VPWidenPointerInductionRecipe, VPCurrentIterationPHIRecipe>(
293	caseFn: [this](const auto *R) {
294	// Handle header phi recipes, except VPWidenIntOrFpInduction
295	// which needs special handling due it being possibly truncated.
296	// TODO: consider inferring/caching type of siblings, e.g.,
297	// backedge value, here and in cases below.
298	return inferScalarType(V: R->getStartValue());
299	})
300	.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
301	caseFn: [](const auto R) { return* R->getScalarType(); })
302	.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
303	VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
304	VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe>(
305	caseFn: [this](const VPRecipeBase *R) {
306	return inferScalarType(V: R->getOperand(N: `0`));
307	})
308	// VPInstructionWithType must be handled before VPInstruction.
309	.Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
310	VPWidenCastRecipe>(
311	caseFn: [](const auto R) { return* R->getResultType(); })
312	.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
313	VPWidenCallRecipe, VPWidenMemoryRecipe>(
314	caseFn: [this](const auto R) { return* inferScalarTypeForRecipe(R); })
315	.Case(caseFn: [V](const VPInterleaveBase *R) {
316	// TODO: Use info from interleave group.
317	return V->getUnderlyingValue()->getType();
318	})
319	.Case(caseFn: [](const VPExpandSCEVRecipe *R) {
320	return R->getSCEV()->getType();
321	})
322	.Case(caseFn: [this](const VPReductionRecipe *R) {
323	return inferScalarType(V: R->getChainOp());
324	})
325	.Case(caseFn: [this](const VPExpressionRecipe *R) {
326	return inferScalarType(V: R->getOperandOfResultType());
327	});
328
329	assert(ResultTy && "could not infer type for the given VPValue");
330	CachedTypes [V] = ResultTy;
331	return ResultTy;
332	}
333
334	void llvm::collectEphemeralRecipesForVPlan(
335	VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) {
336	// First, collect seed recipes which are operands of assumes.
337	SmallVector<VPRecipeBase *> Worklist;
338	for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
339	Range: vp_depth_first_deep(G: Plan.getVectorLoopRegion()->getEntry()))) {
340	for (VPRecipeBase &R : *VPBB) {
341	auto *RepR = dyn_cast<VPReplicateRecipe>(Val: &R);
342	if (!RepR \|\| !match(V: RepR, P: m_Intrinsic<Intrinsic::assume>()))
343	continue;
344	Worklist.push_back(Elt: RepR);
345	EphRecipes.insert(V: RepR);
346	}
347	}
348
349	// Process operands of candidates in worklist and add them to the set of
350	// ephemeral recipes, if they don't have side-effects and are only used by
351	// other ephemeral recipes.
352	while (!Worklist.empty()) {
353	VPRecipeBase *Cur = Worklist.pop_back_val();
354	for (VPValue *Op : Cur->operands()) {
355	auto *OpR = Op->getDefiningRecipe();
356	if (!OpR \|\| OpR->mayHaveSideEffects() \|\| EphRecipes.contains(V: OpR))
357	continue;
358	if (any_of(Range: Op->users(), P: [EphRecipes](VPUser *U) {
359	auto *UR = dyn_cast<VPRecipeBase>(Val: U);
360	return !UR \|\| !EphRecipes.contains(V: UR);
361	}))
362	continue;
363	EphRecipes.insert(V: OpR);
364	Worklist.push_back(Elt: OpR);
365	}
366	}
367	}
368
369	template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>(
370	DominatorTreeBase<VPBlockBase, false> &DT);
371
372	bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
373	const VPRecipeBase *B) {
374	if (A == B)
375	return false;
376
377	auto LocalComesBefore = [](const VPRecipeBase A, const* VPRecipeBase *B) {
378	for (auto &R : *A->getParent()) {
379	if (&R == A)
380	return true;
381	if (&R == B)
382	return false;
383	}
384	llvm_unreachable("recipe not found");
385	};
386	const VPBlockBase *ParentA = A->getParent();
387	const VPBlockBase *ParentB = B->getParent();
388	if (ParentA == ParentB)
389	return LocalComesBefore (A, B);
390
391	#ifndef NDEBUG
392	auto GetReplicateRegion = [](VPRecipeBase R) -> VPRegionBlock {
393	VPRegionBlock *Region = R->getRegion();
394	if (Region && Region->isReplicator()) {
395	assert(Region->getNumSuccessors() == `1` &&
396	Region->getNumPredecessors() == `1` && "Expected SESE region!");
397	assert(R->getParent()->size() == `1` &&
398	"A recipe in an original replicator region must be the only "
399	"recipe in its block");
400	return Region;
401	}
402	return nullptr;
403	};
404	assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
405	"No replicate regions expected at this point");
406	assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
407	"No replicate regions expected at this point");
408	#endif
409	return Base::properlyDominates(A: ParentA, B: ParentB);
410	}
411
412	bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI,
413	unsigned OverrideMaxNumRegs) const {
414	return any_of(Range: MaxLocalUsers, P: [&TTI, &OverrideMaxNumRegs](auto &LU) {
415	return LU.second > (OverrideMaxNumRegs > `0`
416	? OverrideMaxNumRegs
417	: TTI.getNumberOfRegisters(ClassID: LU.first));
418	});
419	}
420
421	SmallVector<VPRegisterUsage, `8`> llvm::calculateRegisterUsageForPlan(
422	VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI,
423	const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
424	// Each 'key' in the map opens a new interval. The values
425	// of the map are the index of the 'last seen' usage of the
426	// VPValue that is the key.
427	using IntervalMap = SmallDenseMap<VPValue , unsigned*, `16`>;
428
429	// Maps indices to recipes.
430	SmallVector<VPRecipeBase *, `64`> Idx2Recipe;
431	// Marks the end of each interval.
432	IntervalMap EndPoint;
433	// Saves the list of VPValues that are used in the loop.
434	SmallPtrSet<VPValue *, `8`> Ends;
435	// Saves the list of values that are used in the loop but are defined outside
436	// the loop (not including non-recipe values such as arguments and
437	// constants).
438	SmallSetVector<VPValue *, `8`> LoopInvariants;
439	if (Plan.getVectorTripCount().getNumUsers() > `0`)
440	LoopInvariants.insert(X: &Plan.getVectorTripCount());
441
442	// We scan the loop in a topological order in order and assign a number to
443	// each recipe. We use RPO to ensure that defs are met before their users. We
444	// assume that each recipe that has in-loop users starts an interval. We
445	// record every time that an in-loop value is used, so we have a list of the
446	// first occurences of each recipe and last occurrence of each VPValue.
447	VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
448	ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
449	LoopRegion);
450	for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Range: RPOT)) {
451	if (!VPBB->getParent())
452	break;
453	for (VPRecipeBase &R : *VPBB) {
454	Idx2Recipe.push_back(Elt: &R);
455
456	// Save the end location of each USE.
457	for (VPValue *U : R.operands()) {
458	if (isa<VPRecipeValue>(Val: U)) {
459	// Overwrite previous end points.
460	EndPoint [U] = Idx2Recipe.size();
461	Ends.insert(Ptr: U);
462	} else if (auto *IRV = dyn_cast<VPIRValue>(Val: U)) {
463	// Ignore non-recipe values such as arguments, constants, etc.
464	// FIXME: Might need some motivation why these values are ignored. If
465	// for example an argument is used inside the loop it will increase
466	// the register pressure (so shouldn't we add it to LoopInvariants).
467	if (!isa<Instruction>(Val: IRV->getValue()))
468	continue;
469	// This recipe is outside the loop, record it and continue.
470	LoopInvariants.insert(X: U);
471	}
472	// Other types of VPValue are currently not tracked.
473	}
474	}
475	if (VPBB == LoopRegion->getExiting()) {
476	// VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
477	// exiting block, where their increment will get materialized eventually.
478	for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
479	if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(Val: &R)) {
480	EndPoint [WideIV] = Idx2Recipe.size();
481	Ends.insert(Ptr: WideIV);
482	}
483	}
484	}
485	}
486
487	// Saves the list of intervals that end with the index in 'key'.
488	using VPValueList = SmallVector<VPValue *, `2`>;
489	SmallDenseMap<unsigned, VPValueList, `16`> TransposeEnds;
490
491	// Next, we transpose the EndPoints into a multi map that holds the list of
492	// intervals that end* at a specific location.*
493	for (auto &Interval : EndPoint)
494	TransposeEnds [Interval.second].push_back(Elt: Interval.first);
495
496	SmallPtrSet<VPValue *, `8`> OpenIntervals;
497	SmallVector<VPRegisterUsage, `8`> RUs(VFs.size());
498	SmallVector<SmallMapVector<unsigned, unsigned, `4`>, `8`> MaxUsages(VFs.size());
499
500	LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
501
502	VPTypeAnalysis TypeInfo(Plan);
503
504	const auto &TTICapture = TTI;
505	auto GetRegUsage = [&TTICapture](Type Ty, ElementCount VF) -> unsigned* {
506	if (Ty->isTokenTy() \|\| !VectorType::isValidElementType(ElemTy: Ty) \|\|
507	(VF.isScalable() &&
508	!TTICapture.isElementTypeLegalForScalableVector(Ty)))
509	return `0`;
510	return TTICapture.getRegUsageForType(Ty: VectorType::get(ElementType: Ty, EC: VF));
511	};
512
513	// We scan the instructions linearly and record each time that a new interval
514	// starts, by placing it in a set. If we find this value in TransposEnds then
515	// we remove it from the set. The max register usage is the maximum register
516	// usage of the recipes of the set.
517	for (unsigned int Idx = `0`, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
518	VPRecipeBase *R = Idx2Recipe [Idx];
519
520	// Remove all of the VPValues that end at this location.
521	VPValueList &List = TransposeEnds [Idx];
522	for (VPValue *ToRemove : List)
523	OpenIntervals.erase(Ptr: ToRemove);
524
525	// Ignore recipes that are never used within the loop and do not have side
526	// effects.
527	if (none_of(Range: R->definedValues(),
528	P: [&Ends](VPValue Def) { return* Ends.count(Ptr: Def); }) &&
529	!R->mayHaveSideEffects())
530	continue;
531
532	// Skip recipes for ignored values.
533	// TODO: Should mark recipes for ephemeral values that cannot be removed
534	// explictly in VPlan.
535	if (isa<VPSingleDefRecipe>(Val: R) &&
536	ValuesToIgnore.contains(
537	Ptr: cast<VPSingleDefRecipe>(Val: R)->getUnderlyingValue()))
538	continue;
539
540	// For each VF find the maximum usage of registers.
541	for (unsigned J = `0`, E = VFs.size(); J < E; ++J) {
542	// Count the number of registers used, per register class, given all open
543	// intervals.
544	// Note that elements in this SmallMapVector will be default constructed
545	// as 0. So we can use "RegUsage[ClassID] += n" in the code below even if
546	// there is no previous entry for ClassID.
547	SmallMapVector<unsigned, unsigned, `4`> RegUsage;
548
549	for (auto *VPV : OpenIntervals) {
550	// Skip artificial values or values that weren't present in the original
551	// loop.
552	// TODO: Remove skipping values that weren't present in the original
553	// loop after removing the legacy
554	// LoopVectorizationCostModel::calculateRegisterUsage
555	if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
556	VPBranchOnMaskRecipe>(Val: VPV) \|\|
557	match(V: VPV, P: m_ExtractLastPart(Op0: m_VPValue())))
558	continue;
559
560	if (VFs [J].isScalar() \|\|
561	isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
562	VPCurrentIterationPHIRecipe, VPScalarIVStepsRecipe>(Val: VPV) \|\|
563	(isa<VPInstruction>(Val: VPV) && vputils::onlyScalarValuesUsed(Def: VPV)) \|\|
564	(isa<VPReductionPHIRecipe>(Val: VPV) &&
565	(cast<VPReductionPHIRecipe>(Val: VPV))->isInLoop())) {
566	unsigned ClassID =
567	TTI.getRegisterClassForType(Vector: false, Ty: TypeInfo.inferScalarType(V: VPV));
568	// FIXME: The target might use more than one register for the type
569	// even in the scalar case.
570	RegUsage [ClassID] += `1`;
571	} else {
572	// The output from scaled phis and scaled reductions actually has
573	// fewer lanes than the VF.
574	unsigned ScaleFactor =
575	vputils::getVFScaleFactor(R: VPV->getDefiningRecipe());
576	ElementCount VF = VFs [J];
577	if (ScaleFactor > `1`) {
578	VF = VFs [J].divideCoefficientBy(RHS: ScaleFactor);
579	LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J]
580	<< " to " << VF << " for " << *R << "\n";);
581	}
582
583	Type *ScalarTy = TypeInfo.inferScalarType(V: VPV);
584	unsigned ClassID = TTI.getRegisterClassForType(Vector: true, Ty: ScalarTy);
585	RegUsage [ClassID] += GetRegUsage (ScalarTy, VF);
586	}
587	}
588
589	for (const auto &Pair : RegUsage) {
590	auto &Entry = MaxUsages [J][Pair.first];
591	Entry = std::max(a: Entry, b: Pair.second);
592	}
593	}
594
595	LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
596	<< OpenIntervals.size() << `'\n'`);
597
598	// Add used VPValues defined by the current recipe to the list of open
599	// intervals.
600	for (VPValue *DefV : R->definedValues())
601	if (Ends.contains(Ptr: DefV))
602	OpenIntervals.insert(Ptr: DefV);
603	}
604
605	// We also search for instructions that are defined outside the loop, but are
606	// used inside the loop. We need this number separately from the max-interval
607	// usage number because when we unroll, loop-invariant values do not take
608	// more register.
609	VPRegisterUsage RU;
610	for (unsigned Idx = `0`, End = VFs.size(); Idx < End; ++Idx) {
611	// Note that elements in this SmallMapVector will be default constructed
612	// as 0. So we can use "Invariant[ClassID] += n" in the code below even if
613	// there is no previous entry for ClassID.
614	SmallMapVector<unsigned, unsigned, `4`> Invariant;
615
616	for (auto *In : LoopInvariants) {
617	// FIXME: The target might use more than one register for the type
618	// even in the scalar case.
619	bool IsScalar = vputils::onlyScalarValuesUsed(Def: In);
620
621	ElementCount VF = IsScalar ? ElementCount::getFixed(MinVal: `1`) : VFs [Idx];
622	unsigned ClassID = TTI.getRegisterClassForType(
623	Vector: VF.isVector(), Ty: TypeInfo.inferScalarType(V: In));
624	Invariant [ClassID] += GetRegUsage (TypeInfo.inferScalarType(V: In), VF);
625	}
626
627	LLVM_DEBUG({
628	dbgs() << "LV(REG): VF = " << VFs[Idx] << `'\n'`;
629	dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
630	<< " item\n";
631	for (const auto &pair : MaxUsages[Idx]) {
632	dbgs() << "LV(REG): RegisterClass: "
633	<< TTI.getRegisterClassName(pair.first) << ", " << pair.second
634	<< " registers\n";
635	}
636	dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
637	<< " item\n";
638	for (const auto &pair : Invariant) {
639	dbgs() << "LV(REG): RegisterClass: "
640	<< TTI.getRegisterClassName(pair.first) << ", " << pair.second
641	<< " registers\n";
642	}
643	});
644
645	RU.LoopInvariantRegs = Invariant;
646	RU.MaxLocalUsers = MaxUsages [Idx];
647	RUs [Idx] = RU;
648	}
649
650	return RUs;
651	}
652

Browse the source code of llvm_projects/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp