VPlanAnalysis.cpp source code [llvm_projects/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp]

1	//===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "VPlanAnalysis.h"
10	#include "VPlan.h"
11	#include "VPlanCFG.h"
12	#include "VPlanDominatorTree.h"
13	#include "llvm/ADT/PostOrderIterator.h"
14	#include "llvm/ADT/TypeSwitch.h"
15	#include "llvm/Analysis/ScalarEvolution.h"
16	#include "llvm/Analysis/TargetTransformInfo.h"
17	#include "llvm/IR/Instruction.h"
18	#include "llvm/IR/PatternMatch.h"
19	#include "llvm/Support/GenericDomTreeConstruction.h"
20
21	using namespace llvm;
22
23	#define DEBUG_TYPE "vplan"
24
25	VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan)
26	: Ctx(Plan.getScalarHeader()->getIRBasicBlock()->getContext()) {
27	if (auto LoopRegion = Plan.getVectorLoopRegion()) {
28	if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(
29	Val: &LoopRegion->getEntryBasicBlock()->front())) {
30	CanonicalIVTy = CanIV->getScalarType();
31	return;
32	}
33	}
34
35	// If there's no canonical IV, retrieve the type from the trip count
36	// expression.
37	auto *TC = Plan.getTripCount();
38	if (TC->isLiveIn()) {
39	CanonicalIVTy = TC->getLiveInIRValue()->getType();
40	return;
41	}
42	CanonicalIVTy = cast<VPExpandSCEVRecipe>(Val: TC)->getSCEV()->getType();
43	}
44
45	Type VPTypeAnalysis::inferScalarTypeForRecipe(const* VPBlendRecipe *R) {
46	Type *ResTy = inferScalarType(V: R->getIncomingValue(Idx: `0`));
47	for (unsigned I = `1`, E = R->getNumIncomingValues(); I != E; ++I) {
48	VPValue *Inc = R->getIncomingValue(Idx: I);
49	assert(inferScalarType(Inc) == ResTy &&
50	"different types inferred for different incoming values");
51	CachedTypes [Inc] = ResTy;
52	}
53	return ResTy;
54	}
55
56	Type VPTypeAnalysis::inferScalarTypeForRecipe(const* VPInstruction *R) {
57	// Set the result type from the first operand, check if the types for all
58	// other operands match and cache them.
59	auto SetResultTyFromOp = [this, R]() {
60	Type *ResTy = inferScalarType(V: R->getOperand(N: `0`));
61	for (unsigned Op = `1`; Op != R->getNumOperands(); ++Op) {
62	VPValue *OtherV = R->getOperand(N: Op);
63	assert(inferScalarType(OtherV) == ResTy &&
64	"different types inferred for different operands");
65	CachedTypes [OtherV] = ResTy;
66	}
67	return ResTy;
68	};
69
70	unsigned Opcode = R->getOpcode();
71	if (Instruction::isBinaryOp(Opcode) \|\| Instruction::isUnaryOp(Opcode))
72	return SetResultTyFromOp ();
73
74	switch (Opcode) {
75	case Instruction::ExtractElement:
76	case Instruction::Freeze:
77	case VPInstruction::ReductionStartVector:
78	return inferScalarType(V: R->getOperand(N: `0`));
79	case Instruction::Select: {
80	Type *ResTy = inferScalarType(V: R->getOperand(N: `1`));
81	VPValue *OtherV = R->getOperand(N: `2`);
82	assert(inferScalarType(OtherV) == ResTy &&
83	"different types inferred for different operands");
84	CachedTypes [OtherV] = ResTy;
85	return ResTy;
86	}
87	case Instruction::ICmp:
88	case VPInstruction::ActiveLaneMask:
89	assert(inferScalarType(R->getOperand(`0`)) ==
90	inferScalarType(R->getOperand(`1`)) &&
91	"different types inferred for different operands");
92	return IntegerType::get(C&: Ctx, NumBits: `1`);
93	case VPInstruction::ComputeAnyOfResult:
94	return inferScalarType(V: R->getOperand(N: `1`));
95	case VPInstruction::ComputeFindIVResult:
96	case VPInstruction::ComputeReductionResult: {
97	return inferScalarType(V: R->getOperand(N: `0`));
98	}
99	case VPInstruction::ExplicitVectorLength:
100	return Type::getIntNTy(C&: Ctx, N: `32`);
101	case Instruction::PHI:
102	// Infer the type of first operand only, as other operands of header phi's
103	// may lead to infinite recursion.
104	return inferScalarType(V: R->getOperand(N: `0`));
105	case VPInstruction::FirstOrderRecurrenceSplice:
106	case VPInstruction::Not:
107	case VPInstruction::CalculateTripCountMinusVF:
108	case VPInstruction::CanonicalIVIncrementForPart:
109	case VPInstruction::AnyOf:
110	case VPInstruction::BuildStructVector:
111	case VPInstruction::BuildVector:
112	return SetResultTyFromOp ();
113	case VPInstruction::FirstActiveLane:
114	return Type::getIntNTy(C&: Ctx, N: `64`);
115	case VPInstruction::ExtractLastElement:
116	case VPInstruction::ExtractPenultimateElement: {
117	Type *BaseTy = inferScalarType(V: R->getOperand(N: `0`));
118	if (auto *VecTy = dyn_cast<VectorType>(Val: BaseTy))
119	return VecTy->getElementType();
120	return BaseTy;
121	}
122	case VPInstruction::LogicalAnd:
123	assert(inferScalarType(R->getOperand(`0`))->isIntegerTy(`1`) &&
124	inferScalarType(R->getOperand(`1`))->isIntegerTy(`1`) &&
125	"LogicalAnd operands should be bool");
126	return IntegerType::get(C&: Ctx, NumBits: `1`);
127	case VPInstruction::Broadcast:
128	case VPInstruction::PtrAdd:
129	// Return the type based on first operand.
130	return inferScalarType(V: R->getOperand(N: `0`));
131	case VPInstruction::BranchOnCond:
132	case VPInstruction::BranchOnCount:
133	return Type::getVoidTy(C&: Ctx);
134	default:
135	break;
136	}
137	// Type inference not implemented for opcode.
138	LLVM_DEBUG({
139	dbgs() << "LV: Found unhandled opcode for: ";
140	R->getVPSingleValue()->dump();
141	});
142	llvm_unreachable("Unhandled opcode!");
143	}
144
145	Type VPTypeAnalysis::inferScalarTypeForRecipe(const* VPWidenRecipe *R) {
146	unsigned Opcode = R->getOpcode();
147	if (Instruction::isBinaryOp(Opcode) \|\| Instruction::isShift(Opcode) \|\|
148	Instruction::isBitwiseLogicOp(Opcode)) {
149	Type *ResTy = inferScalarType(V: R->getOperand(N: `0`));
150	assert(ResTy == inferScalarType(R->getOperand(`1`)) &&
151	"types for both operands must match for binary op");
152	CachedTypes [R->getOperand(N: `1`)] = ResTy;
153	return ResTy;
154	}
155
156	switch (Opcode) {
157	case Instruction::ICmp:
158	case Instruction::FCmp:
159	return IntegerType::get(C&: Ctx, NumBits: `1`);
160	case Instruction::FNeg:
161	case Instruction::Freeze:
162	return inferScalarType(V: R->getOperand(N: `0`));
163	case Instruction::ExtractValue: {
164	assert(R->getNumOperands() == `2` && "expected single level extractvalue");
165	auto *StructTy = cast<StructType>(Val: inferScalarType(V: R->getOperand(N: `0`)));
166	auto *CI = cast<ConstantInt>(Val: R->getOperand(N: `1`)->getLiveInIRValue());
167	return StructTy->getTypeAtIndex(N: CI->getZExtValue());
168	}
169	default:
170	break;
171	}
172
173	// Type inference not implemented for opcode.
174	LLVM_DEBUG({
175	dbgs() << "LV: Found unhandled opcode for: ";
176	R->getVPSingleValue()->dump();
177	});
178	llvm_unreachable("Unhandled opcode!");
179	}
180
181	Type VPTypeAnalysis::inferScalarTypeForRecipe(const* VPWidenCallRecipe *R) {
182	auto &CI = *cast<CallInst>(Val: R->getUnderlyingInstr());
183	return CI.getType();
184	}
185
186	Type VPTypeAnalysis::inferScalarTypeForRecipe(const* VPWidenMemoryRecipe *R) {
187	assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
188	"Store recipes should not define any values");
189	return cast<LoadInst>(Val: &R->getIngredient())->getType();
190	}
191
192	Type VPTypeAnalysis::inferScalarTypeForRecipe(const* VPWidenSelectRecipe *R) {
193	Type *ResTy = inferScalarType(V: R->getOperand(N: `1`));
194	VPValue *OtherV = R->getOperand(N: `2`);
195	assert(inferScalarType(OtherV) == ResTy &&
196	"different types inferred for different operands");
197	CachedTypes [OtherV] = ResTy;
198	return ResTy;
199	}
200
201	Type VPTypeAnalysis::inferScalarTypeForRecipe(const* VPReplicateRecipe *R) {
202	unsigned Opcode = R->getUnderlyingInstr()->getOpcode();
203
204	if (Instruction::isBinaryOp(Opcode) \|\| Instruction::isShift(Opcode) \|\|
205	Instruction::isBitwiseLogicOp(Opcode)) {
206	Type *ResTy = inferScalarType(V: R->getOperand(N: `0`));
207	assert(ResTy == inferScalarType(R->getOperand(`1`)) &&
208	"inferred types for operands of binary op don't match");
209	CachedTypes [R->getOperand(N: `1`)] = ResTy;
210	return ResTy;
211	}
212
213	if (Instruction::isCast(Opcode))
214	return R->getUnderlyingInstr()->getType();
215
216	switch (Opcode) {
217	case Instruction::Call: {
218	unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? `2` : `1`);
219	return cast<Function>(Val: R->getOperand(N: CallIdx)->getLiveInIRValue())
220	->getReturnType();
221	}
222	case Instruction::Select: {
223	Type *ResTy = inferScalarType(V: R->getOperand(N: `1`));
224	assert(ResTy == inferScalarType(R->getOperand(`2`)) &&
225	"inferred types for operands of select op don't match");
226	CachedTypes [R->getOperand(N: `2`)] = ResTy;
227	return ResTy;
228	}
229	case Instruction::ICmp:
230	case Instruction::FCmp:
231	return IntegerType::get(C&: Ctx, NumBits: `1`);
232	case Instruction::Alloca:
233	case Instruction::ExtractValue:
234	return R->getUnderlyingInstr()->getType();
235	case Instruction::Freeze:
236	case Instruction::FNeg:
237	case Instruction::GetElementPtr:
238	return inferScalarType(V: R->getOperand(N: `0`));
239	case Instruction::Load:
240	return cast<LoadInst>(Val: R->getUnderlyingInstr())->getType();
241	case Instruction::Store:
242	// FIXME: VPReplicateRecipes with store opcodes still define a result
243	// VPValue, so we need to handle them here. Remove the code here once this
244	// is modeled accurately in VPlan.
245	return Type::getVoidTy(C&: Ctx);
246	default:
247	break;
248	}
249	// Type inference not implemented for opcode.
250	LLVM_DEBUG({
251	dbgs() << "LV: Found unhandled opcode for: ";
252	R->getVPSingleValue()->dump();
253	});
254	llvm_unreachable("Unhandled opcode");
255	}
256
257	Type VPTypeAnalysis::inferScalarType(const* VPValue *V) {
258	if (Type *CachedTy = CachedTypes.lookup(Val: V))
259	return CachedTy;
260
261	if (V->isLiveIn()) {
262	if (auto *IRValue = V->getLiveInIRValue())
263	return IRValue->getType();
264	// All VPValues without any underlying IR value (like the vector trip count
265	// or the backedge-taken count) have the same type as the canonical IV.
266	return CanonicalIVTy;
267	}
268
269	Type *ResultTy =
270	TypeSwitch<const VPRecipeBase , Type >(V->getDefiningRecipe())
271	.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
272	VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
273	VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
274	caseFn: [this](const auto *R) {
275	// Handle header phi recipes, except VPWidenIntOrFpInduction
276	// which needs special handling due it being possibly truncated.
277	// TODO: consider inferring/caching type of siblings, e.g.,
278	// backedge value, here and in cases below.
279	return inferScalarType(V: R->getStartValue());
280	})
281	.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
282	caseFn: [](const auto R) { return* R->getScalarType(); })
283	.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
284	VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
285	VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe,
286	VPPartialReductionRecipe>(caseFn: [this](const VPRecipeBase *R) {
287	return inferScalarType(V: R->getOperand(N: `0`));
288	})
289	// VPInstructionWithType must be handled before VPInstruction.
290	.Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
291	VPWidenCastRecipe>(
292	caseFn: [](const auto R) { return* R->getResultType(); })
293	.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
294	VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
295	caseFn: [this](const auto R) { return* inferScalarTypeForRecipe(R); })
296	.Case<VPInterleaveRecipe>(caseFn: [V](const VPInterleaveRecipe *R) {
297	// TODO: Use info from interleave group.
298	return V->getUnderlyingValue()->getType();
299	})
300	.Case<VPExpandSCEVRecipe>(caseFn: [](const VPExpandSCEVRecipe *R) {
301	return R->getSCEV()->getType();
302	})
303	.Case<VPReductionRecipe>(caseFn: [this](const auto *R) {
304	return inferScalarType(V: R->getChainOp());
305	})
306	.Case<VPExpressionRecipe>(caseFn: [this](const auto *R) {
307	return inferScalarType(V: R->getOperandOfResultType());
308	});
309
310	assert(ResultTy && "could not infer type for the given VPValue");
311	CachedTypes [V] = ResultTy;
312	return ResultTy;
313	}
314
315	void llvm::collectEphemeralRecipesForVPlan(
316	VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) {
317	// First, collect seed recipes which are operands of assumes.
318	SmallVector<VPRecipeBase *> Worklist;
319	for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
320	Range: vp_depth_first_deep(G: Plan.getVectorLoopRegion()->getEntry()))) {
321	for (VPRecipeBase &R : *VPBB) {
322	auto *RepR = dyn_cast<VPReplicateRecipe>(Val: &R);
323	if (!RepR \|\| !match(V: RepR->getUnderlyingInstr(),
324	P: PatternMatch::m_Intrinsic<Intrinsic::assume>()))
325	continue;
326	Worklist.push_back(Elt: RepR);
327	EphRecipes.insert(V: RepR);
328	}
329	}
330
331	// Process operands of candidates in worklist and add them to the set of
332	// ephemeral recipes, if they don't have side-effects and are only used by
333	// other ephemeral recipes.
334	while (!Worklist.empty()) {
335	VPRecipeBase *Cur = Worklist.pop_back_val();
336	for (VPValue *Op : Cur->operands()) {
337	auto *OpR = Op->getDefiningRecipe();
338	if (!OpR \|\| OpR->mayHaveSideEffects() \|\| EphRecipes.contains(V: OpR))
339	continue;
340	if (any_of(Range: Op->users(), P: [EphRecipes](VPUser *U) {
341	auto *UR = dyn_cast<VPRecipeBase>(Val: U);
342	return !UR \|\| !EphRecipes.contains(V: UR);
343	}))
344	continue;
345	EphRecipes.insert(V: OpR);
346	Worklist.push_back(Elt: OpR);
347	}
348	}
349	}
350
351	template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>(
352	DominatorTreeBase<VPBlockBase, false> &DT);
353
354	bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
355	const VPRecipeBase *B) {
356	if (A == B)
357	return false;
358
359	auto LocalComesBefore = [](const VPRecipeBase A, const* VPRecipeBase *B) {
360	for (auto &R : *A->getParent()) {
361	if (&R == A)
362	return true;
363	if (&R == B)
364	return false;
365	}
366	llvm_unreachable("recipe not found");
367	};
368	const VPBlockBase *ParentA = A->getParent();
369	const VPBlockBase *ParentB = B->getParent();
370	if (ParentA == ParentB)
371	return LocalComesBefore (A, B);
372
373	#ifndef NDEBUG
374	auto GetReplicateRegion = [](VPRecipeBase R) -> VPRegionBlock {
375	auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
376	if (Region && Region->isReplicator()) {
377	assert(Region->getNumSuccessors() == `1` &&
378	Region->getNumPredecessors() == `1` && "Expected SESE region!");
379	assert(R->getParent()->size() == `1` &&
380	"A recipe in an original replicator region must be the only "
381	"recipe in its block");
382	return Region;
383	}
384	return nullptr;
385	};
386	assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
387	"No replicate regions expected at this point");
388	assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
389	"No replicate regions expected at this point");
390	#endif
391	return Base::properlyDominates(A: ParentA, B: ParentB);
392	}
393
394	/// Get the VF scaling factor applied to the recipe's output, if the recipe has
395	/// one.
396	static unsigned getVFScaleFactor(VPRecipeBase *R) {
397	if (auto *RR = dyn_cast<VPReductionPHIRecipe>(Val: R))
398	return RR->getVFScaleFactor();
399	if (auto *RR = dyn_cast<VPPartialReductionRecipe>(Val: R))
400	return RR->getVFScaleFactor();
401	assert(
402	(!isa<VPInstruction>(R) \|\| cast<VPInstruction>(R)->getOpcode() !=
403	VPInstruction::ReductionStartVector) &&
404	"getting scaling factor of reduction-start-vector not implemented yet");
405	return `1`;
406	}
407
408	bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI) const {
409	return any_of(Range: MaxLocalUsers, P: [&TTI](auto &LU) {
410	return LU.second > TTI.getNumberOfRegisters(ClassID: LU.first);
411	});
412	}
413
414	SmallVector<VPRegisterUsage, `8`> llvm::calculateRegisterUsageForPlan(
415	VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI,
416	const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
417	// Each 'key' in the map opens a new interval. The values
418	// of the map are the index of the 'last seen' usage of the
419	// recipe that is the key.
420	using IntervalMap = SmallDenseMap<VPRecipeBase , unsigned*, `16`>;
421
422	// Maps indices to recipes.
423	SmallVector<VPRecipeBase *, `64`> Idx2Recipe;
424	// Marks the end of each interval.
425	IntervalMap EndPoint;
426	// Saves the list of recipe indices that are used in the loop.
427	SmallPtrSet<VPRecipeBase *, `8`> Ends;
428	// Saves the list of values that are used in the loop but are defined outside
429	// the loop (not including non-recipe values such as arguments and
430	// constants).
431	SmallSetVector<VPValue *, `8`> LoopInvariants;
432	LoopInvariants.insert(X: &Plan.getVectorTripCount());
433
434	// We scan the loop in a topological order in order and assign a number to
435	// each recipe. We use RPO to ensure that defs are met before their users. We
436	// assume that each recipe that has in-loop users starts an interval. We
437	// record every time that an in-loop value is used, so we have a list of the
438	// first and last occurrences of each recipe.
439	ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
440	Plan.getVectorLoopRegion());
441	for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Range: RPOT)) {
442	if (!VPBB->getParent())
443	break;
444	for (VPRecipeBase &R : *VPBB) {
445	Idx2Recipe.push_back(Elt: &R);
446
447	// Save the end location of each USE.
448	for (VPValue *U : R.operands()) {
449	auto *DefR = U->getDefiningRecipe();
450
451	// Ignore non-recipe values such as arguments, constants, etc.
452	// FIXME: Might need some motivation why these values are ignored. If
453	// for example an argument is used inside the loop it will increase the
454	// register pressure (so shouldn't we add it to LoopInvariants).
455	if (!DefR && (!U->getLiveInIRValue() \|\|
456	!isa<Instruction>(Val: U->getLiveInIRValue())))
457	continue;
458
459	// If this recipe is outside the loop then record it and continue.
460	if (!DefR) {
461	LoopInvariants.insert(X: U);
462	continue;
463	}
464
465	// Overwrite previous end points.
466	EndPoint [DefR] = Idx2Recipe.size();
467	Ends.insert(Ptr: DefR);
468	}
469	}
470	if (VPBB == Plan.getVectorLoopRegion()->getExiting()) {
471	// VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
472	// exiting block, where their increment will get materialized eventually.
473	for (auto &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
474	if (isa<VPWidenIntOrFpInductionRecipe>(Val: &R)) {
475	EndPoint [&R] = Idx2Recipe.size();
476	Ends.insert(Ptr: &R);
477	}
478	}
479	}
480	}
481
482	// Saves the list of intervals that end with the index in 'key'.
483	using RecipeList = SmallVector<VPRecipeBase *, `2`>;
484	SmallDenseMap<unsigned, RecipeList, `16`> TransposeEnds;
485
486	// Next, we transpose the EndPoints into a multi map that holds the list of
487	// intervals that end* at a specific location.*
488	for (auto &Interval : EndPoint)
489	TransposeEnds [Interval.second].push_back(Elt: Interval.first);
490
491	SmallPtrSet<VPRecipeBase *, `8`> OpenIntervals;
492	SmallVector<VPRegisterUsage, `8`> RUs(VFs.size());
493	SmallVector<SmallMapVector<unsigned, unsigned, `4`>, `8`> MaxUsages(VFs.size());
494
495	LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
496
497	VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
498
499	const auto &TTICapture = TTI;
500	auto GetRegUsage = [&TTICapture](Type Ty, ElementCount VF) -> unsigned* {
501	if (Ty->isTokenTy() \|\| !VectorType::isValidElementType(ElemTy: Ty) \|\|
502	(VF.isScalable() &&
503	!TTICapture.isElementTypeLegalForScalableVector(Ty)))
504	return `0`;
505	return TTICapture.getRegUsageForType(Ty: VectorType::get(ElementType: Ty, EC: VF));
506	};
507
508	// We scan the instructions linearly and record each time that a new interval
509	// starts, by placing it in a set. If we find this value in TransposEnds then
510	// we remove it from the set. The max register usage is the maximum register
511	// usage of the recipes of the set.
512	for (unsigned int Idx = `0`, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
513	VPRecipeBase *R = Idx2Recipe [Idx];
514
515	// Remove all of the recipes that end at this location.
516	RecipeList &List = TransposeEnds [Idx];
517	for (VPRecipeBase *ToRemove : List)
518	OpenIntervals.erase(Ptr: ToRemove);
519
520	// Ignore recipes that are never used within the loop and do not have side
521	// effects.
522	if (!Ends.count(Ptr: R) && !R->mayHaveSideEffects())
523	continue;
524
525	// Skip recipes for ignored values.
526	// TODO: Should mark recipes for ephemeral values that cannot be removed
527	// explictly in VPlan.
528	if (isa<VPSingleDefRecipe>(Val: R) &&
529	ValuesToIgnore.contains(
530	Ptr: cast<VPSingleDefRecipe>(Val: R)->getUnderlyingValue()))
531	continue;
532
533	// For each VF find the maximum usage of registers.
534	for (unsigned J = `0`, E = VFs.size(); J < E; ++J) {
535	// Count the number of registers used, per register class, given all open
536	// intervals.
537	// Note that elements in this SmallMapVector will be default constructed
538	// as 0. So we can use "RegUsage[ClassID] += n" in the code below even if
539	// there is no previous entry for ClassID.
540	SmallMapVector<unsigned, unsigned, `4`> RegUsage;
541
542	for (auto *R : OpenIntervals) {
543	// Skip recipes that weren't present in the original loop.
544	// TODO: Remove after removing the legacy
545	// LoopVectorizationCostModel::calculateRegisterUsage
546	if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
547	VPBranchOnMaskRecipe>(Val: R))
548	continue;
549
550	if (VFs [J].isScalar() \|\|
551	isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
552	VPScalarIVStepsRecipe>(Val: R) \|\|
553	(isa<VPInstruction>(Val: R) &&
554	all_of(Range: cast<VPSingleDefRecipe>(Val: R)->users(),
555	P: [&](VPUser *U) {
556	return cast<VPRecipeBase>(Val: U)->usesScalars(
557	Op: R->getVPSingleValue());
558	})) \|\|
559	(isa<VPReductionPHIRecipe>(Val: R) &&
560	(cast<VPReductionPHIRecipe>(Val: R))->isInLoop())) {
561	unsigned ClassID = TTI.getRegisterClassForType(
562	Vector: false, Ty: TypeInfo.inferScalarType(V: R->getVPSingleValue()));
563	// FIXME: The target might use more than one register for the type
564	// even in the scalar case.
565	RegUsage [ClassID] += `1`;
566	} else {
567	// The output from scaled phis and scaled reductions actually has
568	// fewer lanes than the VF.
569	unsigned ScaleFactor = getVFScaleFactor(R);
570	ElementCount VF = VFs [J].divideCoefficientBy(RHS: ScaleFactor);
571	LLVM_DEBUG(if (VF != VFs[J]) {
572	dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF
573	<< " for " << *R << "\n";
574	});
575
576	for (VPValue *DefV : R->definedValues()) {
577	Type *ScalarTy = TypeInfo.inferScalarType(V: DefV);
578	unsigned ClassID = TTI.getRegisterClassForType(Vector: true, Ty: ScalarTy);
579	RegUsage [ClassID] += GetRegUsage (ScalarTy, VF);
580	}
581	}
582	}
583
584	for (const auto &Pair : RegUsage) {
585	auto &Entry = MaxUsages [J][Pair.first];
586	Entry = std::max(a: Entry, b: Pair.second);
587	}
588	}
589
590	LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
591	<< OpenIntervals.size() << `'\n'`);
592
593	// Add the current recipe to the list of open intervals.
594	OpenIntervals.insert(Ptr: R);
595	}
596
597	// We also search for instructions that are defined outside the loop, but are
598	// used inside the loop. We need this number separately from the max-interval
599	// usage number because when we unroll, loop-invariant values do not take
600	// more register.
601	VPRegisterUsage RU;
602	for (unsigned Idx = `0`, End = VFs.size(); Idx < End; ++Idx) {
603	// Note that elements in this SmallMapVector will be default constructed
604	// as 0. So we can use "Invariant[ClassID] += n" in the code below even if
605	// there is no previous entry for ClassID.
606	SmallMapVector<unsigned, unsigned, `4`> Invariant;
607
608	for (auto *In : LoopInvariants) {
609	// FIXME: The target might use more than one register for the type
610	// even in the scalar case.
611	bool IsScalar = all_of(Range: In->users(), P: [&](VPUser *U) {
612	return cast<VPRecipeBase>(Val: U)->usesScalars(Op: In);
613	});
614
615	ElementCount VF = IsScalar ? ElementCount::getFixed(MinVal: `1`) : VFs [Idx];
616	unsigned ClassID = TTI.getRegisterClassForType(
617	Vector: VF.isVector(), Ty: TypeInfo.inferScalarType(V: In));
618	Invariant [ClassID] += GetRegUsage (TypeInfo.inferScalarType(V: In), VF);
619	}
620
621	LLVM_DEBUG({
622	dbgs() << "LV(REG): VF = " << VFs[Idx] << `'\n'`;
623	dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
624	<< " item\n";
625	for (const auto &pair : MaxUsages[Idx]) {
626	dbgs() << "LV(REG): RegisterClass: "
627	<< TTI.getRegisterClassName(pair.first) << ", " << pair.second
628	<< " registers\n";
629	}
630	dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
631	<< " item\n";
632	for (const auto &pair : Invariant) {
633	dbgs() << "LV(REG): RegisterClass: "
634	<< TTI.getRegisterClassName(pair.first) << ", " << pair.second
635	<< " registers\n";
636	}
637	});
638
639	RU.LoopInvariantRegs = Invariant;
640	RU.MaxLocalUsers = MaxUsages [Idx];
641	RUs [Idx] = RU;
642	}
643
644	return RUs;
645	}
646

Browse the source code of llvm_projects/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp