1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
29#include "llvm/Support/InstructionCost.h"
30
31namespace {
32class GeneratedRTChecks;
33}
34
35namespace llvm {
36
37class LoopInfo;
38class DominatorTree;
39class LoopVectorizationLegality;
40class LoopVectorizationCostModel;
41class PredicatedScalarEvolution;
42class LoopVectorizeHints;
43class LoopVersioning;
44class OptimizationRemarkEmitter;
45class TargetTransformInfo;
46class TargetLibraryInfo;
47class VPRecipeBuilder;
48struct VPRegisterUsage;
49struct VFRange;
50
51extern cl::opt<bool> EnableVPlanNativePath;
52extern cl::opt<unsigned> ForceTargetInstructionCost;
53
54/// VPlan-based builder utility analogous to IRBuilder.
55class VPBuilder {
56 VPBasicBlock *BB = nullptr;
57 VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
58
59 /// Insert \p VPI in BB at InsertPt if BB is set.
60 template <typename T> T *tryInsertInstruction(T *R) {
61 if (BB)
62 BB->insert(Recipe: R, InsertPt);
63 return R;
64 }
65
66 VPInstruction *createInstruction(unsigned Opcode,
67 ArrayRef<VPValue *> Operands,
68 const VPIRMetadata &MD, DebugLoc DL,
69 const Twine &Name = "") {
70 return tryInsertInstruction(
71 R: new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
72 }
73
74public:
75 VPBuilder() = default;
76 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
77 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
78 VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
79 setInsertPoint(TheBB, IP);
80 }
81
82 /// Clear the insertion point: created instructions will not be inserted into
83 /// a block.
84 void clearInsertionPoint() {
85 BB = nullptr;
86 InsertPt = VPBasicBlock::iterator();
87 }
88
89 VPBasicBlock *getInsertBlock() const { return BB; }
90 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
91
92 /// Create a VPBuilder to insert after \p R.
93 static VPBuilder getToInsertAfter(VPRecipeBase *R) {
94 VPBuilder B;
95 B.setInsertPoint(TheBB: R->getParent(), IP: std::next(x: R->getIterator()));
96 return B;
97 }
98
99 /// InsertPoint - A saved insertion point.
100 class VPInsertPoint {
101 VPBasicBlock *Block = nullptr;
102 VPBasicBlock::iterator Point;
103
104 public:
105 /// Creates a new insertion point which doesn't point to anything.
106 VPInsertPoint() = default;
107
108 /// Creates a new insertion point at the given location.
109 VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
110 : Block(InsertBlock), Point(InsertPoint) {}
111
112 /// Returns true if this insert point is set.
113 bool isSet() const { return Block != nullptr; }
114
115 VPBasicBlock *getBlock() const { return Block; }
116 VPBasicBlock::iterator getPoint() const { return Point; }
117 };
118
119 /// Sets the current insert point to a previously-saved location.
120 void restoreIP(VPInsertPoint IP) {
121 if (IP.isSet())
122 setInsertPoint(TheBB: IP.getBlock(), IP: IP.getPoint());
123 else
124 clearInsertionPoint();
125 }
126
127 /// This specifies that created VPInstructions should be appended to the end
128 /// of the specified block.
129 void setInsertPoint(VPBasicBlock *TheBB) {
130 assert(TheBB && "Attempting to set a null insert point");
131 BB = TheBB;
132 InsertPt = BB->end();
133 }
134
135 /// This specifies that created instructions should be inserted at the
136 /// specified point.
137 void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
138 BB = TheBB;
139 InsertPt = IP;
140 }
141
142 /// This specifies that created instructions should be inserted at the
143 /// specified point.
144 void setInsertPoint(VPRecipeBase *IP) {
145 BB = IP->getParent();
146 InsertPt = IP->getIterator();
147 }
148
149 /// Insert \p R at the current insertion point.
150 void insert(VPRecipeBase *R) { BB->insert(Recipe: R, InsertPt); }
151
152 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
153 /// its underlying Instruction.
154 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
155 Instruction *Inst = nullptr,
156 const VPIRFlags &Flags = {},
157 const VPIRMetadata &MD = {},
158 DebugLoc DL = DebugLoc::getUnknown(),
159 const Twine &Name = "") {
160 VPInstruction *NewVPInst = tryInsertInstruction(
161 R: new VPInstruction(Opcode, Operands, Flags, MD, DL, Name));
162 NewVPInst->setUnderlyingValue(Inst);
163 return NewVPInst;
164 }
165 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
166 DebugLoc DL, const Twine &Name = "") {
167 return createInstruction(Opcode, Operands, MD: {}, DL, Name);
168 }
169 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
170 const VPIRFlags &Flags,
171 DebugLoc DL = DebugLoc::getUnknown(),
172 const Twine &Name = "") {
173 return tryInsertInstruction(
174 R: new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
175 }
176
177 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
178 Type *ResultTy, const VPIRFlags &Flags = {},
179 DebugLoc DL = DebugLoc::getUnknown(),
180 const Twine &Name = "") {
181 return tryInsertInstruction(R: new VPInstructionWithType(
182 Opcode, Operands, ResultTy, Flags, {}, DL, Name));
183 }
184
185 VPInstruction *createOverflowingOp(
186 unsigned Opcode, ArrayRef<VPValue *> Operands,
187 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
188 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
189 return tryInsertInstruction(
190 R: new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
191 }
192
193 VPInstruction *createNot(VPValue *Operand,
194 DebugLoc DL = DebugLoc::getUnknown(),
195 const Twine &Name = "") {
196 return createInstruction(Opcode: VPInstruction::Not, Operands: {Operand}, MD: {}, DL, Name);
197 }
198
199 VPInstruction *createAnd(VPValue *LHS, VPValue *RHS,
200 DebugLoc DL = DebugLoc::getUnknown(),
201 const Twine &Name = "") {
202 return createInstruction(Opcode: Instruction::BinaryOps::And, Operands: {LHS, RHS}, MD: {}, DL,
203 Name);
204 }
205
206 VPInstruction *createOr(VPValue *LHS, VPValue *RHS,
207 DebugLoc DL = DebugLoc::getUnknown(),
208 const Twine &Name = "") {
209
210 return tryInsertInstruction(R: new VPInstruction(
211 Instruction::BinaryOps::Or, {LHS, RHS},
212 VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
213 }
214
215 VPInstruction *
216 createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL = DebugLoc::getUnknown(),
217 const Twine &Name = "",
218 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
219 return createOverflowingOp(Opcode: Instruction::Add, Operands: {LHS, RHS}, WrapFlags, DL,
220 Name);
221 }
222
223 VPInstruction *
224 createSub(VPValue *LHS, VPValue *RHS, DebugLoc DL = DebugLoc::getUnknown(),
225 const Twine &Name = "",
226 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
227 return createOverflowingOp(Opcode: Instruction::Sub, Operands: {LHS, RHS}, WrapFlags, DL,
228 Name);
229 }
230
231 VPInstruction *createLogicalAnd(VPValue *LHS, VPValue *RHS,
232 DebugLoc DL = DebugLoc::getUnknown(),
233 const Twine &Name = "") {
234 return createNaryOp(Opcode: VPInstruction::LogicalAnd, Operands: {LHS, RHS}, DL, Name);
235 }
236
237 VPInstruction *createLogicalOr(VPValue *LHS, VPValue *RHS,
238 DebugLoc DL = DebugLoc::getUnknown(),
239 const Twine &Name = "") {
240 return createNaryOp(Opcode: VPInstruction::LogicalOr, Operands: {LHS, RHS}, DL, Name);
241 }
242
243 VPInstruction *createSelect(VPValue *Cond, VPValue *TrueVal,
244 VPValue *FalseVal,
245 DebugLoc DL = DebugLoc::getUnknown(),
246 const Twine &Name = "",
247 const VPIRFlags &Flags = {}) {
248 return tryInsertInstruction(R: new VPInstruction(
249 Instruction::Select, {Cond, TrueVal, FalseVal}, Flags, {}, DL, Name));
250 }
251
252 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
253 /// and \p B.
254 VPInstruction *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
255 DebugLoc DL = DebugLoc::getUnknown(),
256 const Twine &Name = "") {
257 assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
258 Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
259 return tryInsertInstruction(
260 R: new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
261 }
262
263 /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
264 /// and \p B.
265 VPInstruction *createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
266 DebugLoc DL = DebugLoc::getUnknown(),
267 const Twine &Name = "") {
268 assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE &&
269 Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
270 return tryInsertInstruction(
271 R: new VPInstruction(Instruction::FCmp, {A, B},
272 VPIRFlags(Pred, FastMathFlags()), {}, DL, Name));
273 }
274
275 VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset,
276 DebugLoc DL = DebugLoc::getUnknown(),
277 const Twine &Name = "") {
278 return tryInsertInstruction(
279 R: new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
280 GEPNoWrapFlags::none(), {}, DL, Name));
281 }
282
283 VPInstruction *createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset,
284 GEPNoWrapFlags GEPFlags,
285 DebugLoc DL = DebugLoc::getUnknown(),
286 const Twine &Name = "") {
287 return tryInsertInstruction(R: new VPInstruction(
288 VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
289 }
290
291 VPInstruction *createWidePtrAdd(VPValue *Ptr, VPValue *Offset,
292 DebugLoc DL = DebugLoc::getUnknown(),
293 const Twine &Name = "") {
294 return tryInsertInstruction(
295 R: new VPInstruction(VPInstruction::WidePtrAdd, {Ptr, Offset},
296 GEPNoWrapFlags::none(), {}, DL, Name));
297 }
298
299 VPPhi *createScalarPhi(ArrayRef<VPValue *> IncomingValues,
300 DebugLoc DL = DebugLoc::getUnknown(),
301 const Twine &Name = "", const VPIRFlags &Flags = {}) {
302 return tryInsertInstruction(R: new VPPhi(IncomingValues, Flags, DL, Name));
303 }
304
305 VPValue *createElementCount(Type *Ty, ElementCount EC) {
306 VPlan &Plan = *getInsertBlock()->getPlan();
307 VPValue *RuntimeEC = Plan.getConstantInt(Ty, Val: EC.getKnownMinValue());
308 if (EC.isScalable()) {
309 VPValue *VScale = createNaryOp(Opcode: VPInstruction::VScale, Operands: {}, ResultTy: Ty);
310 RuntimeEC = EC.getKnownMinValue() == 1
311 ? VScale
312 : createOverflowingOp(Opcode: Instruction::Mul,
313 Operands: {VScale, RuntimeEC}, WrapFlags: {true, false});
314 }
315 return RuntimeEC;
316 }
317
318 /// Convert the input value \p Current to the corresponding value of an
319 /// induction with \p Start and \p Step values, using \p Start + \p Current *
320 /// \p Step.
321 VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
322 FPMathOperator *FPBinOp, VPIRValue *Start,
323 VPValue *Current, VPValue *Step,
324 const Twine &Name = "") {
325 return tryInsertInstruction(
326 R: new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
327 }
328
329 VPInstructionWithType *createScalarLoad(Type *ResultTy, VPValue *Addr,
330 DebugLoc DL,
331 const VPIRMetadata &Metadata = {}) {
332 return tryInsertInstruction(R: new VPInstructionWithType(
333 Instruction::Load, Addr, ResultTy, {}, Metadata, DL));
334 }
335
336 VPInstruction *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
337 Type *ResultTy, DebugLoc DL,
338 const VPIRMetadata &Metadata = {}) {
339 return tryInsertInstruction(R: new VPInstructionWithType(
340 Opcode, Op, ResultTy, VPIRFlags::getDefaultFlags(Opcode), Metadata,
341 DL));
342 }
343
344 VPInstruction *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
345 Type *ResultTy, DebugLoc DL,
346 const VPIRFlags &Flags,
347 const VPIRMetadata &Metadata = {}) {
348 return tryInsertInstruction(
349 R: new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
350 }
351
352 VPValue *createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
353 DebugLoc DL) {
354 if (ResultTy == SrcTy)
355 return Op;
356 Instruction::CastOps CastOp =
357 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
358 ? Instruction::Trunc
359 : Instruction::ZExt;
360 return createScalarCast(Opcode: CastOp, Op, ResultTy, DL);
361 }
362
363 VPValue *createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
364 DebugLoc DL) {
365 if (ResultTy == SrcTy)
366 return Op;
367 Instruction::CastOps CastOp =
368 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
369 ? Instruction::Trunc
370 : Instruction::SExt;
371 return createScalarCast(Opcode: CastOp, Op, ResultTy, DL);
372 }
373
374 VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
375 Type *ResultTy) {
376 return tryInsertInstruction(R: new VPWidenCastRecipe(
377 Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode)));
378 }
379
380 VPScalarIVStepsRecipe *
381 createScalarIVSteps(Instruction::BinaryOps InductionOpcode,
382 FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
383 VPValue *VF, DebugLoc DL) {
384 return tryInsertInstruction(R: new VPScalarIVStepsRecipe(
385 IV, Step, VF, InductionOpcode,
386 FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
387 }
388
389 VPExpandSCEVRecipe *createExpandSCEV(const SCEV *Expr) {
390 return tryInsertInstruction(R: new VPExpandSCEVRecipe(Expr));
391 }
392
393 //===--------------------------------------------------------------------===//
394 // RAII helpers.
395 //===--------------------------------------------------------------------===//
396
397 /// RAII object that stores the current insertion point and restores it when
398 /// the object is destroyed.
399 class InsertPointGuard {
400 VPBuilder &Builder;
401 VPBasicBlock *Block;
402 VPBasicBlock::iterator Point;
403
404 public:
405 InsertPointGuard(VPBuilder &B)
406 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
407
408 InsertPointGuard(const InsertPointGuard &) = delete;
409 InsertPointGuard &operator=(const InsertPointGuard &) = delete;
410
411 ~InsertPointGuard() { Builder.restoreIP(IP: VPInsertPoint(Block, Point)); }
412 };
413};
414
415/// TODO: The following VectorizationFactor was pulled out of
416/// LoopVectorizationCostModel class. LV also deals with
417/// VectorizerParams::VectorizationFactor.
418/// We need to streamline them.
419
420/// Information about vectorization costs.
421struct VectorizationFactor {
422 /// Vector width with best cost.
423 ElementCount Width;
424
425 /// Cost of the loop with that width.
426 InstructionCost Cost;
427
428 /// Cost of the scalar loop.
429 InstructionCost ScalarCost;
430
431 /// The minimum trip count required to make vectorization profitable, e.g. due
432 /// to runtime checks.
433 ElementCount MinProfitableTripCount;
434
435 VectorizationFactor(ElementCount Width, InstructionCost Cost,
436 InstructionCost ScalarCost)
437 : Width(Width), Cost(Cost), ScalarCost(ScalarCost) {}
438
439 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
440 static VectorizationFactor Disabled() {
441 return {ElementCount::getFixed(MinVal: 1), 0, 0};
442 }
443
444 bool operator==(const VectorizationFactor &rhs) const {
445 return Width == rhs.Width && Cost == rhs.Cost;
446 }
447
448 bool operator!=(const VectorizationFactor &rhs) const {
449 return !(*this == rhs);
450 }
451};
452
453/// A class that represents two vectorization factors (initialized with 0 by
454/// default). One for fixed-width vectorization and one for scalable
455/// vectorization. This can be used by the vectorizer to choose from a range of
456/// fixed and/or scalable VFs in order to find the most cost-effective VF to
457/// vectorize with.
458struct FixedScalableVFPair {
459 ElementCount FixedVF;
460 ElementCount ScalableVF;
461
462 FixedScalableVFPair()
463 : FixedVF(ElementCount::getFixed(MinVal: 0)),
464 ScalableVF(ElementCount::getScalable(MinVal: 0)) {}
465 FixedScalableVFPair(const ElementCount &Max) : FixedScalableVFPair() {
466 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
467 }
468 FixedScalableVFPair(const ElementCount &FixedVF,
469 const ElementCount &ScalableVF)
470 : FixedVF(FixedVF), ScalableVF(ScalableVF) {
471 assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
472 "Invalid scalable properties");
473 }
474
475 static FixedScalableVFPair getNone() { return FixedScalableVFPair(); }
476
477 /// \return true if either fixed- or scalable VF is non-zero.
478 explicit operator bool() const { return FixedVF || ScalableVF; }
479
480 /// \return true if either fixed- or scalable VF is a valid vector VF.
481 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
482};
483
484/// Planner drives the vectorization process after having passed
485/// Legality checks.
486class LoopVectorizationPlanner {
487 /// The loop that we evaluate.
488 Loop *OrigLoop;
489
490 /// Loop Info analysis.
491 LoopInfo *LI;
492
493 /// The dominator tree.
494 DominatorTree *DT;
495
496 /// Target Library Info.
497 const TargetLibraryInfo *TLI;
498
499 /// Target Transform Info.
500 const TargetTransformInfo &TTI;
501
502 /// The legality analysis.
503 LoopVectorizationLegality *Legal;
504
505 /// The profitability analysis.
506 LoopVectorizationCostModel &CM;
507
508 /// The interleaved access analysis.
509 InterleavedAccessInfo &IAI;
510
511 PredicatedScalarEvolution &PSE;
512
513 const LoopVectorizeHints &Hints;
514
515 OptimizationRemarkEmitter *ORE;
516
517 SmallVector<VPlanPtr, 4> VPlans;
518
519 /// Profitable vector factors.
520 SmallVector<VectorizationFactor, 8> ProfitableVFs;
521
522 /// A builder used to construct the current plan.
523 VPBuilder Builder;
524
525 /// Computes the cost of \p Plan for vectorization factor \p VF.
526 ///
527 /// The current implementation requires access to the
528 /// LoopVectorizationLegality to handle inductions and reductions, which is
529 /// why it is kept separate from the VPlan-only cost infrastructure.
530 ///
531 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
532 /// been retired.
533 InstructionCost cost(VPlan &Plan, ElementCount VF, VPRegisterUsage *RU) const;
534
535 /// Precompute costs for certain instructions using the legacy cost model. The
536 /// function is used to bring up the VPlan-based cost model to initially avoid
537 /// taking different decisions due to inaccuracies in the legacy cost model.
538 InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
539 VPCostContext &CostCtx) const;
540
541public:
542 LoopVectorizationPlanner(
543 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
544 const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal,
545 LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI,
546 PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints,
547 OptimizationRemarkEmitter *ORE)
548 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
549 IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
550
551 /// Build VPlans for the specified \p UserVF and \p UserIC if they are
552 /// non-zero or all applicable candidate VFs otherwise. If vectorization and
553 /// interleaving should be avoided up-front, no plans are generated.
554 void plan(ElementCount UserVF, unsigned UserIC);
555
556 /// Use the VPlan-native path to plan how to best vectorize, return the best
557 /// VF and its cost.
558 VectorizationFactor planInVPlanNativePath(ElementCount UserVF);
559
560 /// Return the VPlan for \p VF. At the moment, there is always a single VPlan
561 /// for each VF.
562 VPlan &getPlanFor(ElementCount VF) const;
563
564 /// Compute and return the most profitable vectorization factor. Also collect
565 /// all profitable VFs in ProfitableVFs.
566 VectorizationFactor computeBestVF();
567
568 /// \return The desired interleave count.
569 /// If interleave count has been specified by metadata it will be returned.
570 /// Otherwise, the interleave count is computed and returned. VF and LoopCost
571 /// are the selected vectorization factor and the cost of the selected VF.
572 unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
573 InstructionCost LoopCost);
574
575 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
576 /// according to the best selected \p VF and \p UF.
577 ///
578 /// TODO: \p EpilogueVecKind should be removed once the re-use issue has been
579 /// fixed.
580 ///
581 /// Returns a mapping of SCEVs to their expanded IR values.
582 /// Note that this is a temporary workaround needed due to the current
583 /// epilogue handling.
584 enum class EpilogueVectorizationKind {
585 None, ///< Not part of epilogue vectorization.
586 MainLoop, ///< Vectorizing the main loop of epilogue vectorization.
587 Epilogue ///< Vectorizing the epilogue loop.
588 };
589 DenseMap<const SCEV *, Value *>
590 executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
591 InnerLoopVectorizer &LB, DominatorTree *DT,
592 EpilogueVectorizationKind EpilogueVecKind =
593 EpilogueVectorizationKind::None);
594
595#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
596 void printPlans(raw_ostream &O);
597#endif
598
599 /// Look through the existing plans and return true if we have one with
600 /// vectorization factor \p VF.
601 bool hasPlanWithVF(ElementCount VF) const {
602 return any_of(Range: VPlans,
603 P: [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
604 }
605
606 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
607 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
608 /// returned value holds for the entire \p Range.
609 static bool
610 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
611 VFRange &Range);
612
613 /// \return The most profitable vectorization factor and the cost of that VF
614 /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
615 /// epilogue vectorization is not supported for the loop.
616 VectorizationFactor selectEpilogueVectorizationFactor(ElementCount MainLoopVF,
617 unsigned IC);
618
619 /// Emit remarks for recipes with invalid costs in the available VPlans.
620 void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE);
621
622 /// Create a check to \p Plan to see if the vector loop should be executed
623 /// based on its trip count.
624 void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
625 ElementCount MinProfitableTripCount) const;
626
627 /// Attach the runtime checks of \p RTChecks to \p Plan.
628 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
629 bool HasBranchWeights) const;
630
631 /// Update loop metadata and profile info for both the scalar remainder loop
632 /// and \p VectorLoop, if it exists. Keeps all loop hints from the original
633 /// loop on the vector loop and replaces vectorizer-specific metadata. The
634 /// loop ID of the original loop \p OrigLoopID must be passed, together with
635 /// the average trip count and invocation weight of the original loop (\p
636 /// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
637 /// cannot be retrieved after the plan has been executed, as the original loop
638 /// may have been removed.
639 void updateLoopMetadataAndProfileInfo(
640 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
641 bool VectorizingEpilogue, MDNode *OrigLoopID,
642 std::optional<unsigned> OrigAverageTripCount,
643 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
644 bool DisableRuntimeUnroll);
645
646protected:
647 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
648 /// according to the information gathered by Legal when it checked if it is
649 /// legal to vectorize the loop.
650 void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
651
652private:
653 /// Build a VPlan according to the information gathered by Legal. \return a
654 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
655 /// exclusive, possibly decreasing \p Range.End. If no VPlan can be built for
656 /// the input range, set the largest included VF to the maximum VF for which
657 /// no plan could be built.
658 VPlanPtr tryToBuildVPlan(VFRange &Range);
659
660 /// Build a VPlan using VPRecipes according to the information gather by
661 /// Legal. This method is only used for the legacy inner loop vectorizer.
662 /// \p Range's largest included VF is restricted to the maximum VF the
663 /// returned VPlan is valid for. If no VPlan can be built for the input range,
664 /// set the largest included VF to the maximum VF for which no plan could be
665 /// built. Each VPlan is built starting from a copy of \p InitialPlan, which
666 /// is a plain CFG VPlan wrapping the original scalar loop.
667 VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range,
668 LoopVersioning *LVer);
669
670 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
671 /// according to the information gathered by Legal when it checked if it is
672 /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
673 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
674
675 /// Add recipes to compute the final reduction result (ComputeAnyOfResult,
676 /// ComputeReductionResult depending on the reduction) in
677 /// the middle block. Selects are introduced for reductions between the phi
678 /// and users outside the vector region when folding the tail.
679 void addReductionResultComputation(VPlanPtr &Plan,
680 VPRecipeBuilder &RecipeBuilder,
681 ElementCount MinVF);
682
683#ifndef NDEBUG
684 /// \return The most profitable vectorization factor for the available VPlans
685 /// and the cost of that VF.
686 /// This is now only used to verify the decisions by the new VPlan-based
687 /// cost-model and will be retired once the VPlan-based cost-model is
688 /// stabilized.
689 VectorizationFactor selectVectorizationFactor();
690#endif
691
692 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
693 /// that of B.
694 bool isMoreProfitable(const VectorizationFactor &A,
695 const VectorizationFactor &B, bool HasTail,
696 bool IsEpilogue = false) const;
697
698 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
699 /// that of B in the context of vectorizing a loop with known \p MaxTripCount.
700 bool isMoreProfitable(const VectorizationFactor &A,
701 const VectorizationFactor &B,
702 const unsigned MaxTripCount, bool HasTail,
703 bool IsEpilogue = false) const;
704
705 /// Determines if we have the infrastructure to vectorize the loop and its
706 /// epilogue, assuming the main loop is vectorized by \p VF.
707 bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
708};
709
710} // namespace llvm
711
712#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
713