LoopVectorizationPlanner.h source code [llvm_projects/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h]

1	//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file provides a LoopVectorizationPlanner class.
11	/// InnerLoopVectorizer vectorizes loops which contain only one basic
12	/// LoopVectorizationPlanner - drives the vectorization process after having
13	/// passed Legality checks.
14	/// The planner builds and optimizes the Vectorization Plans which record the
15	/// decisions how to vectorize the given loop. In particular, represent the
16	/// control-flow of the vectorized version, the replication of instructions that
17	/// are to be scalarized, and interleave access groups.
18	///
19	/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20	/// It provides an instruction-level API for generating VPInstructions while
21	/// abstracting away the Recipe manipulation details.
22	//===----------------------------------------------------------------------===//
23
24	#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25	#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27	#include "VPlan.h"
28	#include "llvm/ADT/SmallSet.h"
29	#include "llvm/Support/InstructionCost.h"
30
31	namespace llvm {
32
33	class LoopInfo;
34	class DominatorTree;
35	class LoopVectorizationLegality;
36	class LoopVectorizationCostModel;
37	class PredicatedScalarEvolution;
38	class LoopVectorizeHints;
39	class OptimizationRemarkEmitter;
40	class TargetTransformInfo;
41	class TargetLibraryInfo;
42	class VPRecipeBuilder;
43
44	/// VPlan-based builder utility analogous to IRBuilder.
45	class VPBuilder {
46	VPBasicBlock BB = nullptr*;
47	VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator ();
48
49	/// Insert \p VPI in BB at InsertPt if BB is set.
50	VPInstruction tryInsertInstruction(VPInstruction VPI) {
51	if (BB)
52	BB->insert(Recipe: VPI, InsertPt);
53	return VPI;
54	}
55
56	VPInstruction createInstruction(unsigned* Opcode,
57	ArrayRef<VPValue *> Operands, DebugLoc DL,
58	const Twine &Name = "") {
59	return tryInsertInstruction(VPI: new VPInstruction (Opcode, Operands, DL, Name));
60	}
61
62	VPInstruction createInstruction(unsigned* Opcode,
63	std::initializer_list<VPValue *> Operands,
64	DebugLoc DL, const Twine &Name = "") {
65	return createInstruction(Opcode, Operands: ArrayRef<VPValue *>(Operands), DL, Name);
66	}
67
68	public:
69	VPBuilder() = default;
70	VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
71	VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
72
73	/// Clear the insertion point: created instructions will not be inserted into
74	/// a block.
75	void clearInsertionPoint() {
76	BB = nullptr;
77	InsertPt = VPBasicBlock::iterator ();
78	}
79
80	VPBasicBlock getInsertBlock() const* { return BB; }
81	VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
82
83	/// Create a VPBuilder to insert after \p R.
84	static VPBuilder getToInsertAfter(VPRecipeBase *R) {
85	VPBuilder B;
86	B.setInsertPoint(TheBB: R->getParent(), IP: std::next(x: R->getIterator()));
87	return B;
88	}
89
90	/// InsertPoint - A saved insertion point.
91	class VPInsertPoint {
92	VPBasicBlock Block = nullptr*;
93	VPBasicBlock::iterator Point;
94
95	public:
96	/// Creates a new insertion point which doesn't point to anything.
97	VPInsertPoint() = default;
98
99	/// Creates a new insertion point at the given location.
100	VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
101	: Block(InsertBlock), Point (InsertPoint) {}
102
103	/// Returns true if this insert point is set.
104	bool isSet() const { return Block != nullptr; }
105
106	VPBasicBlock getBlock() const* { return Block; }
107	VPBasicBlock::iterator getPoint() const { return Point; }
108	};
109
110	/// Sets the current insert point to a previously-saved location.
111	void restoreIP(VPInsertPoint IP) {
112	if (IP.isSet())
113	setInsertPoint(TheBB: IP.getBlock(), IP: IP.getPoint());
114	else
115	clearInsertionPoint();
116	}
117
118	/// This specifies that created VPInstructions should be appended to the end
119	/// of the specified block.
120	void setInsertPoint(VPBasicBlock *TheBB) {
121	assert(TheBB && "Attempting to set a null insert point");
122	BB = TheBB;
123	InsertPt = BB->end();
124	}
125
126	/// This specifies that created instructions should be inserted at the
127	/// specified point.
128	void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
129	BB = TheBB;
130	InsertPt = IP;
131	}
132
133	/// This specifies that created instructions should be inserted at the
134	/// specified point.
135	void setInsertPoint(VPRecipeBase *IP) {
136	BB = IP->getParent();
137	InsertPt = IP->getIterator();
138	}
139
140	/// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
141	/// its underlying Instruction.
142	VPInstruction createNaryOp(unsigned* Opcode, ArrayRef<VPValue *> Operands,
143	Instruction Inst = nullptr*,
144	const Twine &Name = "") {
145	DebugLoc DL;
146	if (Inst)
147	DL = Inst->getDebugLoc();
148	VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL, Name);
149	NewVPInst->setUnderlyingValue(Inst);
150	return NewVPInst;
151	}
152	VPInstruction createNaryOp(unsigned* Opcode, ArrayRef<VPValue *> Operands,
153	DebugLoc DL, const Twine &Name = "") {
154	return createInstruction(Opcode, Operands, DL, Name);
155	}
156
157	VPInstruction createOverflowingOp(unsigned* Opcode,
158	std::initializer_list<VPValue *> Operands,
159	VPRecipeWithIRFlags::WrapFlagsTy WrapFlags,
160	DebugLoc DL = {}, const Twine &Name = "") {
161	return tryInsertInstruction(
162	VPI: new VPInstruction (Opcode, Operands, WrapFlags, DL, Name));
163	}
164	VPValue createNot(VPValue Operand, DebugLoc DL = {},
165	const Twine &Name = "") {
166	return createInstruction(Opcode: VPInstruction::Not, Operands: {Operand}, DL, Name);
167	}
168
169	VPValue createAnd(VPValue LHS, VPValue *RHS, DebugLoc DL = {},
170	const Twine &Name = "") {
171	return createInstruction(Opcode: Instruction::BinaryOps::And, Operands: {LHS, RHS}, DL, Name);
172	}
173
174	VPValue createOr(VPValue LHS, VPValue *RHS, DebugLoc DL = {},
175	const Twine &Name = "") {
176
177	return tryInsertInstruction(VPI: new VPInstruction (
178	Instruction::BinaryOps::Or, {LHS, RHS},
179	VPRecipeWithIRFlags::DisjointFlagsTy (false), DL, Name));
180	}
181
182	VPValue createLogicalAnd(VPValue LHS, VPValue *RHS, DebugLoc DL = {},
183	const Twine &Name = "") {
184	return tryInsertInstruction(
185	VPI: new VPInstruction (VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));
186	}
187
188	VPValue createSelect(VPValue Cond, VPValue TrueVal, VPValue FalseVal,
189	DebugLoc DL = {}, const Twine &Name = "",
190	std::optional<FastMathFlags> FMFs = std::nullopt) {
191	auto *Select =
192	FMFs ? new VPInstruction (Instruction::Select, {Cond, TrueVal, FalseVal},
193	*FMFs, DL, Name)
194	: new VPInstruction (Instruction::Select, {Cond, TrueVal, FalseVal},
195	DL, Name);
196	return tryInsertInstruction(VPI: Select);
197	}
198
199	/// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
200	/// and \p B.
201	/// TODO: add createFCmp when needed.
202	VPValue createICmp(CmpInst::Predicate Pred, VPValue A, VPValue *B,
203	DebugLoc DL = {}, const Twine &Name = "");
204
205	//===--------------------------------------------------------------------===//
206	// RAII helpers.
207	//===--------------------------------------------------------------------===//
208
209	/// RAII object that stores the current insertion point and restores it when
210	/// the object is destroyed.
211	class InsertPointGuard {
212	VPBuilder &Builder;
213	VPBasicBlock *Block;
214	VPBasicBlock::iterator Point;
215
216	public:
217	InsertPointGuard(VPBuilder &B)
218	: Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
219
220	InsertPointGuard(const InsertPointGuard &) = delete;
221	InsertPointGuard &operator=(const InsertPointGuard &) = delete;
222
223	~InsertPointGuard() { Builder.restoreIP(IP: VPInsertPoint (Block, Point)); }
224	};
225	};
226
227	/// TODO: The following VectorizationFactor was pulled out of
228	/// LoopVectorizationCostModel class. LV also deals with
229	/// VectorizerParams::VectorizationFactor.
230	/// We need to streamline them.
231
232	/// Information about vectorization costs.
233	struct VectorizationFactor {
234	/// Vector width with best cost.
235	ElementCount Width;
236
237	/// Cost of the loop with that width.
238	InstructionCost Cost;
239
240	/// Cost of the scalar loop.
241	InstructionCost ScalarCost;
242
243	/// The minimum trip count required to make vectorization profitable, e.g. due
244	/// to runtime checks.
245	ElementCount MinProfitableTripCount;
246
247	VectorizationFactor(ElementCount Width, InstructionCost Cost,
248	InstructionCost ScalarCost)
249	: Width (Width), Cost (Cost), ScalarCost (ScalarCost) {}
250
251	/// Width 1 means no vectorization, cost 0 means uncomputed cost.
252	static VectorizationFactor Disabled() {
253	return {ElementCount::getFixed(MinVal: `1`), `0`, `0`};
254	}
255
256	bool operator==(const VectorizationFactor &rhs) const {
257	return Width == rhs.Width && Cost == rhs.Cost;
258	}
259
260	bool operator!=(const VectorizationFactor &rhs) const {
261	return !(*this == rhs);
262	}
263	};
264
265	/// A class that represents two vectorization factors (initialized with 0 by
266	/// default). One for fixed-width vectorization and one for scalable
267	/// vectorization. This can be used by the vectorizer to choose from a range of
268	/// fixed and/or scalable VFs in order to find the most cost-effective VF to
269	/// vectorize with.
270	struct FixedScalableVFPair {
271	ElementCount FixedVF;
272	ElementCount ScalableVF;
273
274	FixedScalableVFPair()
275	: FixedVF(ElementCount::getFixed(MinVal: `0`)),
276	ScalableVF(ElementCount::getScalable(MinVal: `0`)) {}
277	FixedScalableVFPair(const ElementCount &Max) : FixedScalableVFPair () {
278	*(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
279	}
280	FixedScalableVFPair(const ElementCount &FixedVF,
281	const ElementCount &ScalableVF)
282	: FixedVF (FixedVF), ScalableVF (ScalableVF) {
283	assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
284	"Invalid scalable properties");
285	}
286
287	static FixedScalableVFPair getNone() { return FixedScalableVFPair (); }
288
289	/// \return true if either fixed- or scalable VF is non-zero.
290	explicit operator bool() const { return FixedVF \|\| ScalableVF; }
291
292	/// \return true if either fixed- or scalable VF is a valid vector VF.
293	bool hasVector() const { return FixedVF.isVector() \|\| ScalableVF.isVector(); }
294	};
295
296	/// Planner drives the vectorization process after having passed
297	/// Legality checks.
298	class LoopVectorizationPlanner {
299	/// The loop that we evaluate.
300	Loop *OrigLoop;
301
302	/// Loop Info analysis.
303	LoopInfo *LI;
304
305	/// The dominator tree.
306	DominatorTree *DT;
307
308	/// Target Library Info.
309	const TargetLibraryInfo *TLI;
310
311	/// Target Transform Info.
312	const TargetTransformInfo &TTI;
313
314	/// The legality analysis.
315	LoopVectorizationLegality *Legal;
316
317	/// The profitability analysis.
318	LoopVectorizationCostModel &CM;
319
320	/// The interleaved access analysis.
321	InterleavedAccessInfo &IAI;
322
323	PredicatedScalarEvolution &PSE;
324
325	const LoopVectorizeHints &Hints;
326
327	OptimizationRemarkEmitter *ORE;
328
329	SmallVector<VPlanPtr, `4`> VPlans;
330
331	/// Profitable vector factors.
332	SmallVector<VectorizationFactor, `8`> ProfitableVFs;
333
334	/// A builder used to construct the current plan.
335	VPBuilder Builder;
336
337	/// Computes the cost of \p Plan for vectorization factor \p VF.
338	///
339	/// The current implementation requires access to the
340	/// LoopVectorizationLegality to handle inductions and reductions, which is
341	/// why it is kept separate from the VPlan-only cost infrastructure.
342	///
343	/// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
344	/// been retired.
345	InstructionCost cost(VPlan &Plan, ElementCount VF) const;
346
347	public:
348	LoopVectorizationPlanner(
349	Loop L, LoopInfo LI, DominatorTree DT, const* TargetLibraryInfo *TLI,
350	const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal,
351	LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI,
352	PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints,
353	OptimizationRemarkEmitter *ORE)
354	: OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
355	IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
356
357	/// Plan how to best vectorize, return the best VF and its cost, or
358	/// std::nullopt if vectorization and interleaving should be avoided up front.
359	std::optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
360
361	/// Use the VPlan-native path to plan how to best vectorize, return the best
362	/// VF and its cost.
363	VectorizationFactor planInVPlanNativePath(ElementCount UserVF);
364
365	/// Return the best VPlan for \p VF.
366	VPlan &getBestPlanFor(ElementCount VF) const;
367
368	/// Return the most profitable plan and fix its VF to the most profitable one.
369	VPlan &getBestPlan() const;
370
371	/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
372	/// according to the best selected \p VF and \p UF.
373	///
374	/// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue
375	/// vectorization re-using plans for both the main and epilogue vector loops.
376	/// It should be removed once the re-use issue has been fixed.
377	/// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop
378	/// to re-use expansion results generated during main plan execution.
379	///
380	/// Returns a mapping of SCEVs to their expanded IR values and a mapping for
381	/// the reduction resume values. Note that this is a temporary workaround
382	/// needed due to the current epilogue handling.
383	std::pair<DenseMap<const SCEV , Value >,
384	DenseMap<const RecurrenceDescriptor , Value >>
385	executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
386	InnerLoopVectorizer &LB, DominatorTree *DT,
387	bool IsEpilogueVectorization,
388	const DenseMap<const SCEV , Value > ExpandedSCEVs = nullptr*);
389
390	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
391	void printPlans(raw_ostream &O);
392	#endif
393
394	/// Look through the existing plans and return true if we have one with
395	/// vectorization factor \p VF.
396	bool hasPlanWithVF(ElementCount VF) const {
397	return any_of(Range: VPlans,
398	P: [&](const VPlanPtr &Plan) { return Plan ->hasVF(VF); });
399	}
400
401	/// Test a \p Predicate on a \p Range of VF's. Return the value of applying
402	/// \p Predicate on Range.Start, possibly decreasing Range.End such that the
403	/// returned value holds for the entire \p Range.
404	static bool
405	getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
406	VFRange &Range);
407
408	/// \return The most profitable vectorization factor and the cost of that VF
409	/// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
410	/// epilogue vectorization is not supported for the loop.
411	VectorizationFactor
412	selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC);
413
414	protected:
415	/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
416	/// according to the information gathered by Legal when it checked if it is
417	/// legal to vectorize the loop.
418	void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
419
420	private:
421	/// Build a VPlan according to the information gathered by Legal. \return a
422	/// VPlan for vectorization factors \p Range.Start and up to \p Range.End
423	/// exclusive, possibly decreasing \p Range.End.
424	VPlanPtr buildVPlan(VFRange &Range);
425
426	/// Build a VPlan using VPRecipes according to the information gather by
427	/// Legal. This method is only used for the legacy inner loop vectorizer.
428	/// \p Range's largest included VF is restricted to the maximum VF the
429	/// returned VPlan is valid for. If no VPlan can be built for the input range,
430	/// set the largest included VF to the maximum VF for which no plan could be
431	/// built.
432	VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range);
433
434	/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
435	/// according to the information gathered by Legal when it checked if it is
436	/// legal to vectorize the loop. This method creates VPlans using VPRecipes.
437	void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
438
439	// Adjust the recipes for reductions. For in-loop reductions the chain of
440	// instructions leading from the loop exit instr to the phi need to be
441	// converted to reductions, with one operand being vector and the other being
442	// the scalar reduction chain. For other reductions, a select is introduced
443	// between the phi and live-out recipes when folding the tail.
444	void adjustRecipesForReductions(VPlanPtr &Plan,
445	VPRecipeBuilder &RecipeBuilder,
446	ElementCount MinVF);
447
448	/// \return The most profitable vectorization factor for the available VPlans
449	/// and the cost of that VF.
450	/// This is now only used to verify the decisions by the new VPlan-based
451	/// cost-model and will be retired once the VPlan-based cost-model is
452	/// stabilized.
453	VectorizationFactor selectVectorizationFactor();
454
455	/// Returns true if the per-lane cost of VectorizationFactor A is lower than
456	/// that of B.
457	bool isMoreProfitable(const VectorizationFactor &A,
458	const VectorizationFactor &B) const;
459
460	/// Determines if we have the infrastructure to vectorize the loop and its
461	/// epilogue, assuming the main loop is vectorized by \p VF.
462	bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
463	};
464
465	} // namespace llvm
466
467	#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
468

Browse the source code of llvm_projects/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h