LoopVectorizationPlanner.h source code [llvm_projects/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h]

1	//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file provides a LoopVectorizationPlanner class.
11	/// InnerLoopVectorizer vectorizes loops which contain only one basic
12	/// LoopVectorizationPlanner - drives the vectorization process after having
13	/// passed Legality checks.
14	/// The planner builds and optimizes the Vectorization Plans which record the
15	/// decisions how to vectorize the given loop. In particular, represent the
16	/// control-flow of the vectorized version, the replication of instructions that
17	/// are to be scalarized, and interleave access groups.
18	///
19	/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20	/// It provides an instruction-level API for generating VPInstructions while
21	/// abstracting away the Recipe manipulation details.
22	//===----------------------------------------------------------------------===//
23
24	#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25	#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27	#include "VPlan.h"
28	#include "llvm/ADT/SmallSet.h"
29	#include "llvm/Support/InstructionCost.h"
30
31	namespace {
32	class GeneratedRTChecks;
33	}
34
35	namespace llvm {
36
37	class LoopInfo;
38	class DominatorTree;
39	class LoopVectorizationLegality;
40	class LoopVectorizationCostModel;
41	class PredicatedScalarEvolution;
42	class LoopVectorizeHints;
43	class LoopVersioning;
44	class OptimizationRemarkEmitter;
45	class TargetTransformInfo;
46	class TargetLibraryInfo;
47	class VPRecipeBuilder;
48	struct VFRange;
49
50	extern cl::opt<bool> EnableVPlanNativePath;
51	extern cl::opt<unsigned> ForceTargetInstructionCost;
52
53	/// VPlan-based builder utility analogous to IRBuilder.
54	class VPBuilder {
55	VPBasicBlock BB = nullptr*;
56	VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator ();
57
58	/// Insert \p VPI in BB at InsertPt if BB is set.
59	template <typename T> T tryInsertInstruction(T R) {
60	if (BB)
61	BB->insert(Recipe: R, InsertPt);
62	return R;
63	}
64
65	VPInstruction createInstruction(unsigned* Opcode,
66	ArrayRef<VPValue *> Operands,
67	const VPIRMetadata &MD, DebugLoc DL,
68	const Twine &Name = "") {
69	return tryInsertInstruction(
70	R: new VPInstruction (Opcode, Operands, {}, MD, DL, Name));
71	}
72
73	public:
74	VPBuilder() = default;
75	VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
76	VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
77	VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
78	setInsertPoint(TheBB, IP);
79	}
80
81	/// Clear the insertion point: created instructions will not be inserted into
82	/// a block.
83	void clearInsertionPoint() {
84	BB = nullptr;
85	InsertPt = VPBasicBlock::iterator ();
86	}
87
88	VPBasicBlock getInsertBlock() const* { return BB; }
89	VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
90
91	/// Create a VPBuilder to insert after \p R.
92	static VPBuilder getToInsertAfter(VPRecipeBase *R) {
93	VPBuilder B;
94	B.setInsertPoint(TheBB: R->getParent(), IP: std::next(x: R->getIterator()));
95	return B;
96	}
97
98	/// InsertPoint - A saved insertion point.
99	class VPInsertPoint {
100	VPBasicBlock Block = nullptr*;
101	VPBasicBlock::iterator Point;
102
103	public:
104	/// Creates a new insertion point which doesn't point to anything.
105	VPInsertPoint() = default;
106
107	/// Creates a new insertion point at the given location.
108	VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
109	: Block(InsertBlock), Point (InsertPoint) {}
110
111	/// Returns true if this insert point is set.
112	bool isSet() const { return Block != nullptr; }
113
114	VPBasicBlock getBlock() const* { return Block; }
115	VPBasicBlock::iterator getPoint() const { return Point; }
116	};
117
118	/// Sets the current insert point to a previously-saved location.
119	void restoreIP(VPInsertPoint IP) {
120	if (IP.isSet())
121	setInsertPoint(TheBB: IP.getBlock(), IP: IP.getPoint());
122	else
123	clearInsertionPoint();
124	}
125
126	/// This specifies that created VPInstructions should be appended to the end
127	/// of the specified block.
128	void setInsertPoint(VPBasicBlock *TheBB) {
129	assert(TheBB && "Attempting to set a null insert point");
130	BB = TheBB;
131	InsertPt = BB->end();
132	}
133
134	/// This specifies that created instructions should be inserted at the
135	/// specified point.
136	void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
137	BB = TheBB;
138	InsertPt = IP;
139	}
140
141	/// This specifies that created instructions should be inserted at the
142	/// specified point.
143	void setInsertPoint(VPRecipeBase *IP) {
144	BB = IP->getParent();
145	InsertPt = IP->getIterator();
146	}
147
148	/// Insert \p R at the current insertion point.
149	void insert(VPRecipeBase *R) { BB->insert(Recipe: R, InsertPt); }
150
151	/// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
152	/// its underlying Instruction.
153	VPInstruction createNaryOp(unsigned* Opcode, ArrayRef<VPValue *> Operands,
154	Instruction Inst = nullptr*,
155	const VPIRFlags &Flags = {},
156	const VPIRMetadata &MD = {},
157	DebugLoc DL = DebugLoc::getUnknown(),
158	const Twine &Name = "") {
159	VPInstruction *NewVPInst = tryInsertInstruction(
160	R: new VPInstruction (Opcode, Operands, Flags, MD, DL, Name));
161	NewVPInst->setUnderlyingValue(Inst);
162	return NewVPInst;
163	}
164	VPInstruction createNaryOp(unsigned* Opcode, ArrayRef<VPValue *> Operands,
165	DebugLoc DL, const Twine &Name = "") {
166	return createInstruction(Opcode, Operands, MD: {}, DL, Name);
167	}
168	VPInstruction createNaryOp(unsigned* Opcode, ArrayRef<VPValue *> Operands,
169	const VPIRFlags &Flags,
170	DebugLoc DL = DebugLoc::getUnknown(),
171	const Twine &Name = "") {
172	return tryInsertInstruction(
173	R: new VPInstruction (Opcode, Operands, Flags, {}, DL, Name));
174	}
175
176	VPInstruction createNaryOp(unsigned* Opcode, ArrayRef<VPValue *> Operands,
177	Type ResultTy, const* VPIRFlags &Flags = {},
178	DebugLoc DL = DebugLoc::getUnknown(),
179	const Twine &Name = "") {
180	return tryInsertInstruction(R: new VPInstructionWithType (
181	Opcode, Operands, ResultTy, Flags, {}, DL, Name));
182	}
183
184	VPInstruction *createOverflowingOp(
185	unsigned Opcode, ArrayRef<VPValue *> Operands,
186	VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
187	DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
188	return tryInsertInstruction(
189	R: new VPInstruction (Opcode, Operands, WrapFlags, {}, DL, Name));
190	}
191
192	VPInstruction createNot(VPValue Operand,
193	DebugLoc DL = DebugLoc::getUnknown(),
194	const Twine &Name = "") {
195	return createInstruction(Opcode: VPInstruction::Not, Operands: {Operand}, MD: {}, DL, Name);
196	}
197
198	VPInstruction createAnd(VPValue LHS, VPValue *RHS,
199	DebugLoc DL = DebugLoc::getUnknown(),
200	const Twine &Name = "") {
201	return createInstruction(Opcode: Instruction::BinaryOps::And, Operands: {LHS, RHS}, MD: {}, DL,
202	Name);
203	}
204
205	VPInstruction createOr(VPValue LHS, VPValue *RHS,
206	DebugLoc DL = DebugLoc::getUnknown(),
207	const Twine &Name = "") {
208
209	return tryInsertInstruction(R: new VPInstruction (
210	Instruction::BinaryOps::Or, {LHS, RHS},
211	VPRecipeWithIRFlags::DisjointFlagsTy (false), {}, DL, Name));
212	}
213
214	VPInstruction *
215	createAdd(VPValue LHS, VPValue RHS, DebugLoc DL = DebugLoc::getUnknown(),
216	const Twine &Name = "",
217	VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
218	return createOverflowingOp(Opcode: Instruction::Add, Operands: {LHS, RHS}, WrapFlags, DL,
219	Name);
220	}
221
222	VPInstruction *
223	createSub(VPValue LHS, VPValue RHS, DebugLoc DL = DebugLoc::getUnknown(),
224	const Twine &Name = "",
225	VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
226	return createOverflowingOp(Opcode: Instruction::Sub, Operands: {LHS, RHS}, WrapFlags, DL,
227	Name);
228	}
229
230	VPInstruction createLogicalAnd(VPValue LHS, VPValue *RHS,
231	DebugLoc DL = DebugLoc::getUnknown(),
232	const Twine &Name = "") {
233	return createNaryOp(Opcode: VPInstruction::LogicalAnd, Operands: {LHS, RHS}, DL, Name);
234	}
235
236	VPInstruction createLogicalOr(VPValue LHS, VPValue *RHS,
237	DebugLoc DL = DebugLoc::getUnknown(),
238	const Twine &Name = "") {
239	return createNaryOp(Opcode: VPInstruction::LogicalOr, Operands: {LHS, RHS}, DL, Name);
240	}
241
242	VPInstruction createSelect(VPValue Cond, VPValue *TrueVal,
243	VPValue *FalseVal,
244	DebugLoc DL = DebugLoc::getUnknown(),
245	const Twine &Name = "",
246	const VPIRFlags &Flags = {}) {
247	return tryInsertInstruction(R: new VPInstruction (
248	Instruction::Select, {Cond, TrueVal, FalseVal}, Flags, {}, DL, Name));
249	}
250
251	/// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
252	/// and \p B.
253	VPInstruction createICmp(CmpInst::Predicate Pred, VPValue A, VPValue *B,
254	DebugLoc DL = DebugLoc::getUnknown(),
255	const Twine &Name = "") {
256	assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
257	Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
258	return tryInsertInstruction(
259	R: new VPInstruction (Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
260	}
261
262	/// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
263	/// and \p B.
264	VPInstruction createFCmp(CmpInst::Predicate Pred, VPValue A, VPValue *B,
265	DebugLoc DL = DebugLoc::getUnknown(),
266	const Twine &Name = "") {
267	assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE &&
268	Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
269	return tryInsertInstruction(
270	R: new VPInstruction (Instruction::FCmp, {A, B},
271	VPIRFlags (Pred, FastMathFlags ()), {}, DL, Name));
272	}
273
274	VPInstruction createPtrAdd(VPValue Ptr, VPValue *Offset,
275	DebugLoc DL = DebugLoc::getUnknown(),
276	const Twine &Name = "") {
277	return tryInsertInstruction(
278	R: new VPInstruction (VPInstruction::PtrAdd, {Ptr, Offset},
279	GEPNoWrapFlags::none(), {}, DL, Name));
280	}
281
282	VPInstruction createNoWrapPtrAdd(VPValue Ptr, VPValue *Offset,
283	GEPNoWrapFlags GEPFlags,
284	DebugLoc DL = DebugLoc::getUnknown(),
285	const Twine &Name = "") {
286	return tryInsertInstruction(R: new VPInstruction (
287	VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
288	}
289
290	VPInstruction createWidePtrAdd(VPValue Ptr, VPValue *Offset,
291	DebugLoc DL = DebugLoc::getUnknown(),
292	const Twine &Name = "") {
293	return tryInsertInstruction(
294	R: new VPInstruction (VPInstruction::WidePtrAdd, {Ptr, Offset},
295	GEPNoWrapFlags::none(), {}, DL, Name));
296	}
297
298	VPPhi createScalarPhi(ArrayRef<VPValue > IncomingValues,
299	DebugLoc DL = DebugLoc::getUnknown(),
300	const Twine &Name = "", const VPIRFlags &Flags = {}) {
301	return tryInsertInstruction(R: new VPPhi (IncomingValues, Flags, DL, Name));
302	}
303
304	VPValue createElementCount(Type Ty, ElementCount EC) {
305	VPlan &Plan = *getInsertBlock()->getPlan();
306	VPValue *RuntimeEC = Plan.getConstantInt(Ty, Val: EC.getKnownMinValue());
307	if (EC.isScalable()) {
308	VPValue *VScale = createNaryOp(Opcode: VPInstruction::VScale, Operands: {}, ResultTy: Ty);
309	RuntimeEC = EC.getKnownMinValue() == `1`
310	? VScale
311	: createOverflowingOp(Opcode: Instruction::Mul,
312	Operands: {VScale, RuntimeEC}, WrapFlags: {true, false});
313	}
314	return RuntimeEC;
315	}
316
317	/// Convert the input value \p Current to the corresponding value of an
318	/// induction with \p Start and \p Step values, using \p Start + \p Current *
319	/// \p Step.
320	VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
321	FPMathOperator FPBinOp, VPIRValue Start,
322	VPValue Current, VPValue Step,
323	const Twine &Name = "") {
324	return tryInsertInstruction(
325	R: new VPDerivedIVRecipe (Kind, FPBinOp, Start, Current, Step, Name));
326	}
327
328	VPInstructionWithType createScalarLoad(Type ResultTy, VPValue *Addr,
329	DebugLoc DL,
330	const VPIRMetadata &Metadata = {}) {
331	return tryInsertInstruction(R: new VPInstructionWithType (
332	Instruction::Load, Addr, ResultTy, {}, Metadata, DL));
333	}
334
335	VPInstruction createScalarCast(Instruction::CastOps Opcode, VPValue Op,
336	Type *ResultTy, DebugLoc DL,
337	const VPIRMetadata &Metadata = {}) {
338	return tryInsertInstruction(R: new VPInstructionWithType (
339	Opcode, Op, ResultTy, VPIRFlags::getDefaultFlags(Opcode), Metadata,
340	DL));
341	}
342
343	VPInstruction createScalarCast(Instruction::CastOps Opcode, VPValue Op,
344	Type *ResultTy, DebugLoc DL,
345	const VPIRFlags &Flags,
346	const VPIRMetadata &Metadata = {}) {
347	return tryInsertInstruction(
348	R: new VPInstructionWithType (Opcode, Op, ResultTy, Flags, Metadata, DL));
349	}
350
351	VPValue createScalarZExtOrTrunc(VPValue Op, Type ResultTy, Type SrcTy,
352	DebugLoc DL) {
353	if (ResultTy == SrcTy)
354	return Op;
355	Instruction::CastOps CastOp =
356	ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
357	? Instruction::Trunc
358	: Instruction::ZExt;
359	return createScalarCast(Opcode: CastOp, Op, ResultTy, DL);
360	}
361
362	VPValue createScalarSExtOrTrunc(VPValue Op, Type ResultTy, Type SrcTy,
363	DebugLoc DL) {
364	if (ResultTy == SrcTy)
365	return Op;
366	Instruction::CastOps CastOp =
367	ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
368	? Instruction::Trunc
369	: Instruction::SExt;
370	return createScalarCast(Opcode: CastOp, Op, ResultTy, DL);
371	}
372
373	VPWidenCastRecipe createWidenCast(Instruction::CastOps Opcode, VPValue Op,
374	Type *ResultTy) {
375	return tryInsertInstruction(R: new VPWidenCastRecipe (
376	Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode)));
377	}
378
379	VPScalarIVStepsRecipe *
380	createScalarIVSteps(Instruction::BinaryOps InductionOpcode,
381	FPMathOperator FPBinOp, VPValue IV, VPValue *Step,
382	VPValue *VF, DebugLoc DL) {
383	return tryInsertInstruction(R: new VPScalarIVStepsRecipe (
384	IV, Step, VF, InductionOpcode,
385	FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags (), DL));
386	}
387
388	VPExpandSCEVRecipe createExpandSCEV(const* SCEV *Expr) {
389	return tryInsertInstruction(R: new VPExpandSCEVRecipe (Expr));
390	}
391
392	//===--------------------------------------------------------------------===//
393	// RAII helpers.
394	//===--------------------------------------------------------------------===//
395
396	/// RAII object that stores the current insertion point and restores it when
397	/// the object is destroyed.
398	class InsertPointGuard {
399	VPBuilder &Builder;
400	VPBasicBlock *Block;
401	VPBasicBlock::iterator Point;
402
403	public:
404	InsertPointGuard(VPBuilder &B)
405	: Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
406
407	InsertPointGuard(const InsertPointGuard &) = delete;
408	InsertPointGuard &operator=(const InsertPointGuard &) = delete;
409
410	~InsertPointGuard() { Builder.restoreIP(IP: VPInsertPoint (Block, Point)); }
411	};
412	};
413
414	/// TODO: The following VectorizationFactor was pulled out of
415	/// LoopVectorizationCostModel class. LV also deals with
416	/// VectorizerParams::VectorizationFactor.
417	/// We need to streamline them.
418
419	/// Information about vectorization costs.
420	struct VectorizationFactor {
421	/// Vector width with best cost.
422	ElementCount Width;
423
424	/// Cost of the loop with that width.
425	InstructionCost Cost;
426
427	/// Cost of the scalar loop.
428	InstructionCost ScalarCost;
429
430	/// The minimum trip count required to make vectorization profitable, e.g. due
431	/// to runtime checks.
432	ElementCount MinProfitableTripCount;
433
434	VectorizationFactor(ElementCount Width, InstructionCost Cost,
435	InstructionCost ScalarCost)
436	: Width (Width), Cost (Cost), ScalarCost (ScalarCost) {}
437
438	/// Width 1 means no vectorization, cost 0 means uncomputed cost.
439	static VectorizationFactor Disabled() {
440	return {ElementCount::getFixed(MinVal: `1`), `0`, `0`};
441	}
442
443	bool operator==(const VectorizationFactor &rhs) const {
444	return Width == rhs.Width && Cost == rhs.Cost;
445	}
446
447	bool operator!=(const VectorizationFactor &rhs) const {
448	return !(*this == rhs);
449	}
450	};
451
452	/// A class that represents two vectorization factors (initialized with 0 by
453	/// default). One for fixed-width vectorization and one for scalable
454	/// vectorization. This can be used by the vectorizer to choose from a range of
455	/// fixed and/or scalable VFs in order to find the most cost-effective VF to
456	/// vectorize with.
457	struct FixedScalableVFPair {
458	ElementCount FixedVF;
459	ElementCount ScalableVF;
460
461	FixedScalableVFPair()
462	: FixedVF(ElementCount::getFixed(MinVal: `0`)),
463	ScalableVF(ElementCount::getScalable(MinVal: `0`)) {}
464	FixedScalableVFPair(const ElementCount &Max) : FixedScalableVFPair () {
465	*(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
466	}
467	FixedScalableVFPair(const ElementCount &FixedVF,
468	const ElementCount &ScalableVF)
469	: FixedVF (FixedVF), ScalableVF (ScalableVF) {
470	assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
471	"Invalid scalable properties");
472	}
473
474	static FixedScalableVFPair getNone() { return FixedScalableVFPair (); }
475
476	/// \return true if either fixed- or scalable VF is non-zero.
477	explicit operator bool() const { return FixedVF \|\| ScalableVF; }
478
479	/// \return true if either fixed- or scalable VF is a valid vector VF.
480	bool hasVector() const { return FixedVF.isVector() \|\| ScalableVF.isVector(); }
481	};
482
483	/// Planner drives the vectorization process after having passed
484	/// Legality checks.
485	class LoopVectorizationPlanner {
486	/// The loop that we evaluate.
487	Loop *OrigLoop;
488
489	/// Loop Info analysis.
490	LoopInfo *LI;
491
492	/// The dominator tree.
493	DominatorTree *DT;
494
495	/// Target Library Info.
496	const TargetLibraryInfo *TLI;
497
498	/// Target Transform Info.
499	const TargetTransformInfo &TTI;
500
501	/// The legality analysis.
502	LoopVectorizationLegality *Legal;
503
504	/// The profitability analysis.
505	LoopVectorizationCostModel &CM;
506
507	/// The interleaved access analysis.
508	InterleavedAccessInfo &IAI;
509
510	PredicatedScalarEvolution &PSE;
511
512	const LoopVectorizeHints &Hints;
513
514	OptimizationRemarkEmitter *ORE;
515
516	SmallVector<VPlanPtr, `4`> VPlans;
517
518	/// Profitable vector factors.
519	SmallVector<VectorizationFactor, `8`> ProfitableVFs;
520
521	/// A builder used to construct the current plan.
522	VPBuilder Builder;
523
524	/// Computes the cost of \p Plan for vectorization factor \p VF.
525	///
526	/// The current implementation requires access to the
527	/// LoopVectorizationLegality to handle inductions and reductions, which is
528	/// why it is kept separate from the VPlan-only cost infrastructure.
529	///
530	/// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
531	/// been retired.
532	InstructionCost cost(VPlan &Plan, ElementCount VF) const;
533
534	/// Precompute costs for certain instructions using the legacy cost model. The
535	/// function is used to bring up the VPlan-based cost model to initially avoid
536	/// taking different decisions due to inaccuracies in the legacy cost model.
537	InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
538	VPCostContext &CostCtx) const;
539
540	public:
541	LoopVectorizationPlanner(
542	Loop L, LoopInfo LI, DominatorTree DT, const* TargetLibraryInfo *TLI,
543	const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal,
544	LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI,
545	PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints,
546	OptimizationRemarkEmitter *ORE)
547	: OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
548	IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
549
550	/// Build VPlans for the specified \p UserVF and \p UserIC if they are
551	/// non-zero or all applicable candidate VFs otherwise. If vectorization and
552	/// interleaving should be avoided up-front, no plans are generated.
553	void plan(ElementCount UserVF, unsigned UserIC);
554
555	/// Use the VPlan-native path to plan how to best vectorize, return the best
556	/// VF and its cost.
557	VectorizationFactor planInVPlanNativePath(ElementCount UserVF);
558
559	/// Return the VPlan for \p VF. At the moment, there is always a single VPlan
560	/// for each VF.
561	VPlan &getPlanFor(ElementCount VF) const;
562
563	/// Compute and return the most profitable vectorization factor. Also collect
564	/// all profitable VFs in ProfitableVFs.
565	VectorizationFactor computeBestVF();
566
567	/// \return The desired interleave count.
568	/// If interleave count has been specified by metadata it will be returned.
569	/// Otherwise, the interleave count is computed and returned. VF and LoopCost
570	/// are the selected vectorization factor and the cost of the selected VF.
571	unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
572	InstructionCost LoopCost);
573
574	/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
575	/// according to the best selected \p VF and \p UF.
576	///
577	/// TODO: \p VectorizingEpilogue indicates if the executed VPlan is for the
578	/// epilogue vector loop. It should be removed once the re-use issue has been
579	/// fixed.
580	///
581	/// Returns a mapping of SCEVs to their expanded IR values.
582	/// Note that this is a temporary workaround needed due to the current
583	/// epilogue handling.
584	DenseMap<const SCEV , Value > executePlan(ElementCount VF, unsigned UF,
585	VPlan &BestPlan,
586	InnerLoopVectorizer &LB,
587	DominatorTree *DT,
588	bool VectorizingEpilogue);
589
590	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
591	void printPlans(raw_ostream &O);
592	#endif
593
594	/// Look through the existing plans and return true if we have one with
595	/// vectorization factor \p VF.
596	bool hasPlanWithVF(ElementCount VF) const {
597	return any_of(Range: VPlans,
598	P: [&](const VPlanPtr &Plan) { return Plan ->hasVF(VF); });
599	}
600
601	/// Test a \p Predicate on a \p Range of VF's. Return the value of applying
602	/// \p Predicate on Range.Start, possibly decreasing Range.End such that the
603	/// returned value holds for the entire \p Range.
604	static bool
605	getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
606	VFRange &Range);
607
608	/// \return The most profitable vectorization factor and the cost of that VF
609	/// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
610	/// epilogue vectorization is not supported for the loop.
611	VectorizationFactor
612	selectEpilogueVectorizationFactor(const ElementCount MainLoopVF, unsigned IC);
613
614	/// Emit remarks for recipes with invalid costs in the available VPlans.
615	void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE);
616
617	/// Create a check to \p Plan to see if the vector loop should be executed
618	/// based on its trip count.
619	void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
620	ElementCount MinProfitableTripCount) const;
621
622	/// Update loop metadata and profile info for both the scalar remainder loop
623	/// and \p VectorLoop, if it exists. Keeps all loop hints from the original
624	/// loop on the vector loop and replaces vectorizer-specific metadata. The
625	/// loop ID of the original loop \p OrigLoopID must be passed, together with
626	/// the average trip count and invocation weight of the original loop (\p
627	/// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
628	/// cannot be retrieved after the plan has been executed, as the original loop
629	/// may have been removed.
630	void updateLoopMetadataAndProfileInfo(
631	Loop VectorLoop, VPBasicBlock HeaderVPBB, const VPlan &Plan,
632	bool VectorizingEpilogue, MDNode *OrigLoopID,
633	std::optional<unsigned> OrigAverageTripCount,
634	unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
635	bool DisableRuntimeUnroll);
636
637	protected:
638	/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
639	/// according to the information gathered by Legal when it checked if it is
640	/// legal to vectorize the loop.
641	void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
642
643	private:
644	/// Build a VPlan according to the information gathered by Legal. \return a
645	/// VPlan for vectorization factors \p Range.Start and up to \p Range.End
646	/// exclusive, possibly decreasing \p Range.End. If no VPlan can be built for
647	/// the input range, set the largest included VF to the maximum VF for which
648	/// no plan could be built.
649	VPlanPtr tryToBuildVPlan(VFRange &Range);
650
651	/// Build a VPlan using VPRecipes according to the information gather by
652	/// Legal. This method is only used for the legacy inner loop vectorizer.
653	/// \p Range's largest included VF is restricted to the maximum VF the
654	/// returned VPlan is valid for. If no VPlan can be built for the input range,
655	/// set the largest included VF to the maximum VF for which no plan could be
656	/// built. Each VPlan is built starting from a copy of \p InitialPlan, which
657	/// is a plain CFG VPlan wrapping the original scalar loop.
658	VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range,
659	LoopVersioning *LVer);
660
661	/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
662	/// according to the information gathered by Legal when it checked if it is
663	/// legal to vectorize the loop. This method creates VPlans using VPRecipes.
664	void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
665
666	/// Add recipes to compute the final reduction result (ComputeAnyOfResult,
667	/// ComputeReductionResult depending on the reduction) in
668	/// the middle block. Selects are introduced for reductions between the phi
669	/// and users outside the vector region when folding the tail.
670	void addReductionResultComputation(VPlanPtr &Plan,
671	VPRecipeBuilder &RecipeBuilder,
672	ElementCount MinVF);
673
674	/// Attach the runtime checks of \p RTChecks to \p Plan.
675	void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
676	bool HasBranchWeights) const;
677
678	#ifndef NDEBUG
679	/// \return The most profitable vectorization factor for the available VPlans
680	/// and the cost of that VF.
681	/// This is now only used to verify the decisions by the new VPlan-based
682	/// cost-model and will be retired once the VPlan-based cost-model is
683	/// stabilized.
684	VectorizationFactor selectVectorizationFactor();
685	#endif
686
687	/// Returns true if the per-lane cost of VectorizationFactor A is lower than
688	/// that of B.
689	bool isMoreProfitable(const VectorizationFactor &A,
690	const VectorizationFactor &B, bool HasTail,
691	bool IsEpilogue = false) const;
692
693	/// Returns true if the per-lane cost of VectorizationFactor A is lower than
694	/// that of B in the context of vectorizing a loop with known \p MaxTripCount.
695	bool isMoreProfitable(const VectorizationFactor &A,
696	const VectorizationFactor &B,
697	const unsigned MaxTripCount, bool HasTail,
698	bool IsEpilogue = false) const;
699
700	/// Determines if we have the infrastructure to vectorize the loop and its
701	/// epilogue, assuming the main loop is vectorized by \p VF.
702	bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
703	};
704
705	} // namespace llvm
706
707	#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
708

Browse the source code of llvm_projects/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h