1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
29#include "llvm/Support/InstructionCost.h"
30
31namespace {
32class GeneratedRTChecks;
33}
34
35namespace llvm {
36
37class LoopInfo;
38class DominatorTree;
39class LoopVectorizationLegality;
40class LoopVectorizationCostModel;
41class PredicatedScalarEvolution;
42class LoopVectorizeHints;
43class LoopVersioning;
44class OptimizationRemarkEmitter;
45class TargetTransformInfo;
46class TargetLibraryInfo;
47class VPRecipeBuilder;
48struct VFRange;
49
50extern cl::opt<bool> EnableVPlanNativePath;
51extern cl::opt<unsigned> ForceTargetInstructionCost;
52
53/// VPlan-based builder utility analogous to IRBuilder.
54class VPBuilder {
55 VPBasicBlock *BB = nullptr;
56 VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
57
58 /// Insert \p VPI in BB at InsertPt if BB is set.
59 template <typename T> T *tryInsertInstruction(T *R) {
60 if (BB)
61 BB->insert(Recipe: R, InsertPt);
62 return R;
63 }
64
65 VPInstruction *createInstruction(unsigned Opcode,
66 ArrayRef<VPValue *> Operands,
67 const VPIRMetadata &MD, DebugLoc DL,
68 const Twine &Name = "") {
69 return tryInsertInstruction(
70 R: new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
71 }
72
73public:
74 VPBuilder() = default;
75 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
76 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
77 VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
78 setInsertPoint(TheBB, IP);
79 }
80
81 /// Clear the insertion point: created instructions will not be inserted into
82 /// a block.
83 void clearInsertionPoint() {
84 BB = nullptr;
85 InsertPt = VPBasicBlock::iterator();
86 }
87
88 VPBasicBlock *getInsertBlock() const { return BB; }
89 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
90
91 /// Create a VPBuilder to insert after \p R.
92 static VPBuilder getToInsertAfter(VPRecipeBase *R) {
93 VPBuilder B;
94 B.setInsertPoint(TheBB: R->getParent(), IP: std::next(x: R->getIterator()));
95 return B;
96 }
97
98 /// InsertPoint - A saved insertion point.
99 class VPInsertPoint {
100 VPBasicBlock *Block = nullptr;
101 VPBasicBlock::iterator Point;
102
103 public:
104 /// Creates a new insertion point which doesn't point to anything.
105 VPInsertPoint() = default;
106
107 /// Creates a new insertion point at the given location.
108 VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
109 : Block(InsertBlock), Point(InsertPoint) {}
110
111 /// Returns true if this insert point is set.
112 bool isSet() const { return Block != nullptr; }
113
114 VPBasicBlock *getBlock() const { return Block; }
115 VPBasicBlock::iterator getPoint() const { return Point; }
116 };
117
118 /// Sets the current insert point to a previously-saved location.
119 void restoreIP(VPInsertPoint IP) {
120 if (IP.isSet())
121 setInsertPoint(TheBB: IP.getBlock(), IP: IP.getPoint());
122 else
123 clearInsertionPoint();
124 }
125
126 /// This specifies that created VPInstructions should be appended to the end
127 /// of the specified block.
128 void setInsertPoint(VPBasicBlock *TheBB) {
129 assert(TheBB && "Attempting to set a null insert point");
130 BB = TheBB;
131 InsertPt = BB->end();
132 }
133
134 /// This specifies that created instructions should be inserted at the
135 /// specified point.
136 void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
137 BB = TheBB;
138 InsertPt = IP;
139 }
140
141 /// This specifies that created instructions should be inserted at the
142 /// specified point.
143 void setInsertPoint(VPRecipeBase *IP) {
144 BB = IP->getParent();
145 InsertPt = IP->getIterator();
146 }
147
148 /// Insert \p R at the current insertion point.
149 void insert(VPRecipeBase *R) { BB->insert(Recipe: R, InsertPt); }
150
151 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
152 /// its underlying Instruction.
153 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
154 Instruction *Inst = nullptr,
155 const VPIRFlags &Flags = {},
156 const VPIRMetadata &MD = {},
157 DebugLoc DL = DebugLoc::getUnknown(),
158 const Twine &Name = "") {
159 VPInstruction *NewVPInst = tryInsertInstruction(
160 R: new VPInstruction(Opcode, Operands, Flags, MD, DL, Name));
161 NewVPInst->setUnderlyingValue(Inst);
162 return NewVPInst;
163 }
164 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
165 DebugLoc DL, const Twine &Name = "") {
166 return createInstruction(Opcode, Operands, MD: {}, DL, Name);
167 }
168 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
169 const VPIRFlags &Flags,
170 DebugLoc DL = DebugLoc::getUnknown(),
171 const Twine &Name = "") {
172 return tryInsertInstruction(
173 R: new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
174 }
175
176 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
177 Type *ResultTy, const VPIRFlags &Flags = {},
178 DebugLoc DL = DebugLoc::getUnknown(),
179 const Twine &Name = "") {
180 return tryInsertInstruction(R: new VPInstructionWithType(
181 Opcode, Operands, ResultTy, Flags, {}, DL, Name));
182 }
183
184 VPInstruction *createOverflowingOp(
185 unsigned Opcode, ArrayRef<VPValue *> Operands,
186 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
187 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
188 return tryInsertInstruction(
189 R: new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
190 }
191
192 VPInstruction *createNot(VPValue *Operand,
193 DebugLoc DL = DebugLoc::getUnknown(),
194 const Twine &Name = "") {
195 return createInstruction(Opcode: VPInstruction::Not, Operands: {Operand}, MD: {}, DL, Name);
196 }
197
198 VPInstruction *createAnd(VPValue *LHS, VPValue *RHS,
199 DebugLoc DL = DebugLoc::getUnknown(),
200 const Twine &Name = "") {
201 return createInstruction(Opcode: Instruction::BinaryOps::And, Operands: {LHS, RHS}, MD: {}, DL,
202 Name);
203 }
204
205 VPInstruction *createOr(VPValue *LHS, VPValue *RHS,
206 DebugLoc DL = DebugLoc::getUnknown(),
207 const Twine &Name = "") {
208
209 return tryInsertInstruction(R: new VPInstruction(
210 Instruction::BinaryOps::Or, {LHS, RHS},
211 VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
212 }
213
214 VPInstruction *
215 createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL = DebugLoc::getUnknown(),
216 const Twine &Name = "",
217 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
218 return createOverflowingOp(Opcode: Instruction::Add, Operands: {LHS, RHS}, WrapFlags, DL,
219 Name);
220 }
221
222 VPInstruction *
223 createSub(VPValue *LHS, VPValue *RHS, DebugLoc DL = DebugLoc::getUnknown(),
224 const Twine &Name = "",
225 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
226 return createOverflowingOp(Opcode: Instruction::Sub, Operands: {LHS, RHS}, WrapFlags, DL,
227 Name);
228 }
229
230 VPInstruction *createLogicalAnd(VPValue *LHS, VPValue *RHS,
231 DebugLoc DL = DebugLoc::getUnknown(),
232 const Twine &Name = "") {
233 return createNaryOp(Opcode: VPInstruction::LogicalAnd, Operands: {LHS, RHS}, DL, Name);
234 }
235
236 VPInstruction *
237 createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
238 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "",
239 std::optional<FastMathFlags> FMFs = std::nullopt) {
240 if (!FMFs)
241 return createNaryOp(Opcode: Instruction::Select, Operands: {Cond, TrueVal, FalseVal}, DL,
242 Name);
243 return tryInsertInstruction(R: new VPInstruction(
244 Instruction::Select, {Cond, TrueVal, FalseVal}, *FMFs, {}, DL, Name));
245 }
246
247 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
248 /// and \p B.
249 VPInstruction *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
250 DebugLoc DL = DebugLoc::getUnknown(),
251 const Twine &Name = "") {
252 assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
253 Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
254 return tryInsertInstruction(
255 R: new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
256 }
257
258 /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
259 /// and \p B.
260 VPInstruction *createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
261 DebugLoc DL = DebugLoc::getUnknown(),
262 const Twine &Name = "") {
263 assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE &&
264 Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
265 return tryInsertInstruction(
266 R: new VPInstruction(Instruction::FCmp, {A, B},
267 VPIRFlags(Pred, FastMathFlags()), {}, DL, Name));
268 }
269
270 VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset,
271 DebugLoc DL = DebugLoc::getUnknown(),
272 const Twine &Name = "") {
273 return tryInsertInstruction(
274 R: new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
275 GEPNoWrapFlags::none(), {}, DL, Name));
276 }
277
278 VPInstruction *createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset,
279 GEPNoWrapFlags GEPFlags,
280 DebugLoc DL = DebugLoc::getUnknown(),
281 const Twine &Name = "") {
282 return tryInsertInstruction(R: new VPInstruction(
283 VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
284 }
285
286 VPInstruction *createWidePtrAdd(VPValue *Ptr, VPValue *Offset,
287 DebugLoc DL = DebugLoc::getUnknown(),
288 const Twine &Name = "") {
289 return tryInsertInstruction(
290 R: new VPInstruction(VPInstruction::WidePtrAdd, {Ptr, Offset},
291 GEPNoWrapFlags::none(), {}, DL, Name));
292 }
293
294 VPPhi *createScalarPhi(ArrayRef<VPValue *> IncomingValues, DebugLoc DL,
295 const Twine &Name = "") {
296 return tryInsertInstruction(R: new VPPhi(IncomingValues, DL, Name));
297 }
298
299 VPValue *createElementCount(Type *Ty, ElementCount EC) {
300 VPlan &Plan = *getInsertBlock()->getPlan();
301 VPValue *RuntimeEC = Plan.getConstantInt(Ty, Val: EC.getKnownMinValue());
302 if (EC.isScalable()) {
303 VPValue *VScale = createNaryOp(Opcode: VPInstruction::VScale, Operands: {}, ResultTy: Ty);
304 RuntimeEC = EC.getKnownMinValue() == 1
305 ? VScale
306 : createOverflowingOp(Opcode: Instruction::Mul,
307 Operands: {VScale, RuntimeEC}, WrapFlags: {true, false});
308 }
309 return RuntimeEC;
310 }
311
312 /// Convert the input value \p Current to the corresponding value of an
313 /// induction with \p Start and \p Step values, using \p Start + \p Current *
314 /// \p Step.
315 VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
316 FPMathOperator *FPBinOp, VPIRValue *Start,
317 VPValue *Current, VPValue *Step,
318 const Twine &Name = "") {
319 return tryInsertInstruction(
320 R: new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
321 }
322
323 VPInstruction *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
324 Type *ResultTy, DebugLoc DL,
325 const VPIRMetadata &Metadata = {}) {
326 return tryInsertInstruction(R: new VPInstructionWithType(
327 Opcode, Op, ResultTy, VPIRFlags::getDefaultFlags(Opcode), Metadata,
328 DL));
329 }
330
331 VPInstruction *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
332 Type *ResultTy, DebugLoc DL,
333 const VPIRFlags &Flags,
334 const VPIRMetadata &Metadata = {}) {
335 return tryInsertInstruction(
336 R: new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
337 }
338
339 VPValue *createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
340 DebugLoc DL) {
341 if (ResultTy == SrcTy)
342 return Op;
343 Instruction::CastOps CastOp =
344 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
345 ? Instruction::Trunc
346 : Instruction::ZExt;
347 return createScalarCast(Opcode: CastOp, Op, ResultTy, DL);
348 }
349
350 VPValue *createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
351 DebugLoc DL) {
352 if (ResultTy == SrcTy)
353 return Op;
354 Instruction::CastOps CastOp =
355 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
356 ? Instruction::Trunc
357 : Instruction::SExt;
358 return createScalarCast(Opcode: CastOp, Op, ResultTy, DL);
359 }
360
361 VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
362 Type *ResultTy) {
363 return tryInsertInstruction(R: new VPWidenCastRecipe(
364 Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode)));
365 }
366
367 VPScalarIVStepsRecipe *
368 createScalarIVSteps(Instruction::BinaryOps InductionOpcode,
369 FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
370 VPValue *VF, DebugLoc DL) {
371 return tryInsertInstruction(R: new VPScalarIVStepsRecipe(
372 IV, Step, VF, InductionOpcode,
373 FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
374 }
375
376 VPExpandSCEVRecipe *createExpandSCEV(const SCEV *Expr) {
377 return tryInsertInstruction(R: new VPExpandSCEVRecipe(Expr));
378 }
379
380 //===--------------------------------------------------------------------===//
381 // RAII helpers.
382 //===--------------------------------------------------------------------===//
383
384 /// RAII object that stores the current insertion point and restores it when
385 /// the object is destroyed.
386 class InsertPointGuard {
387 VPBuilder &Builder;
388 VPBasicBlock *Block;
389 VPBasicBlock::iterator Point;
390
391 public:
392 InsertPointGuard(VPBuilder &B)
393 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
394
395 InsertPointGuard(const InsertPointGuard &) = delete;
396 InsertPointGuard &operator=(const InsertPointGuard &) = delete;
397
398 ~InsertPointGuard() { Builder.restoreIP(IP: VPInsertPoint(Block, Point)); }
399 };
400};
401
402/// TODO: The following VectorizationFactor was pulled out of
403/// LoopVectorizationCostModel class. LV also deals with
404/// VectorizerParams::VectorizationFactor.
405/// We need to streamline them.
406
407/// Information about vectorization costs.
408struct VectorizationFactor {
409 /// Vector width with best cost.
410 ElementCount Width;
411
412 /// Cost of the loop with that width.
413 InstructionCost Cost;
414
415 /// Cost of the scalar loop.
416 InstructionCost ScalarCost;
417
418 /// The minimum trip count required to make vectorization profitable, e.g. due
419 /// to runtime checks.
420 ElementCount MinProfitableTripCount;
421
422 VectorizationFactor(ElementCount Width, InstructionCost Cost,
423 InstructionCost ScalarCost)
424 : Width(Width), Cost(Cost), ScalarCost(ScalarCost) {}
425
426 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
427 static VectorizationFactor Disabled() {
428 return {ElementCount::getFixed(MinVal: 1), 0, 0};
429 }
430
431 bool operator==(const VectorizationFactor &rhs) const {
432 return Width == rhs.Width && Cost == rhs.Cost;
433 }
434
435 bool operator!=(const VectorizationFactor &rhs) const {
436 return !(*this == rhs);
437 }
438};
439
440/// A class that represents two vectorization factors (initialized with 0 by
441/// default). One for fixed-width vectorization and one for scalable
442/// vectorization. This can be used by the vectorizer to choose from a range of
443/// fixed and/or scalable VFs in order to find the most cost-effective VF to
444/// vectorize with.
445struct FixedScalableVFPair {
446 ElementCount FixedVF;
447 ElementCount ScalableVF;
448
449 FixedScalableVFPair()
450 : FixedVF(ElementCount::getFixed(MinVal: 0)),
451 ScalableVF(ElementCount::getScalable(MinVal: 0)) {}
452 FixedScalableVFPair(const ElementCount &Max) : FixedScalableVFPair() {
453 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
454 }
455 FixedScalableVFPair(const ElementCount &FixedVF,
456 const ElementCount &ScalableVF)
457 : FixedVF(FixedVF), ScalableVF(ScalableVF) {
458 assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
459 "Invalid scalable properties");
460 }
461
462 static FixedScalableVFPair getNone() { return FixedScalableVFPair(); }
463
464 /// \return true if either fixed- or scalable VF is non-zero.
465 explicit operator bool() const { return FixedVF || ScalableVF; }
466
467 /// \return true if either fixed- or scalable VF is a valid vector VF.
468 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
469};
470
471/// Planner drives the vectorization process after having passed
472/// Legality checks.
473class LoopVectorizationPlanner {
474 /// The loop that we evaluate.
475 Loop *OrigLoop;
476
477 /// Loop Info analysis.
478 LoopInfo *LI;
479
480 /// The dominator tree.
481 DominatorTree *DT;
482
483 /// Target Library Info.
484 const TargetLibraryInfo *TLI;
485
486 /// Target Transform Info.
487 const TargetTransformInfo &TTI;
488
489 /// The legality analysis.
490 LoopVectorizationLegality *Legal;
491
492 /// The profitability analysis.
493 LoopVectorizationCostModel &CM;
494
495 /// The interleaved access analysis.
496 InterleavedAccessInfo &IAI;
497
498 PredicatedScalarEvolution &PSE;
499
500 const LoopVectorizeHints &Hints;
501
502 OptimizationRemarkEmitter *ORE;
503
504 SmallVector<VPlanPtr, 4> VPlans;
505
506 /// Profitable vector factors.
507 SmallVector<VectorizationFactor, 8> ProfitableVFs;
508
509 /// A builder used to construct the current plan.
510 VPBuilder Builder;
511
512 /// Computes the cost of \p Plan for vectorization factor \p VF.
513 ///
514 /// The current implementation requires access to the
515 /// LoopVectorizationLegality to handle inductions and reductions, which is
516 /// why it is kept separate from the VPlan-only cost infrastructure.
517 ///
518 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
519 /// been retired.
520 InstructionCost cost(VPlan &Plan, ElementCount VF) const;
521
522 /// Precompute costs for certain instructions using the legacy cost model. The
523 /// function is used to bring up the VPlan-based cost model to initially avoid
524 /// taking different decisions due to inaccuracies in the legacy cost model.
525 InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
526 VPCostContext &CostCtx) const;
527
528public:
529 LoopVectorizationPlanner(
530 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
531 const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal,
532 LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI,
533 PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints,
534 OptimizationRemarkEmitter *ORE)
535 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
536 IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
537
538 /// Build VPlans for the specified \p UserVF and \p UserIC if they are
539 /// non-zero or all applicable candidate VFs otherwise. If vectorization and
540 /// interleaving should be avoided up-front, no plans are generated.
541 void plan(ElementCount UserVF, unsigned UserIC);
542
543 /// Use the VPlan-native path to plan how to best vectorize, return the best
544 /// VF and its cost.
545 VectorizationFactor planInVPlanNativePath(ElementCount UserVF);
546
547 /// Return the VPlan for \p VF. At the moment, there is always a single VPlan
548 /// for each VF.
549 VPlan &getPlanFor(ElementCount VF) const;
550
551 /// Compute and return the most profitable vectorization factor. Also collect
552 /// all profitable VFs in ProfitableVFs.
553 VectorizationFactor computeBestVF();
554
555 /// \return The desired interleave count.
556 /// If interleave count has been specified by metadata it will be returned.
557 /// Otherwise, the interleave count is computed and returned. VF and LoopCost
558 /// are the selected vectorization factor and the cost of the selected VF.
559 unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
560 InstructionCost LoopCost);
561
562 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
563 /// according to the best selected \p VF and \p UF.
564 ///
565 /// TODO: \p VectorizingEpilogue indicates if the executed VPlan is for the
566 /// epilogue vector loop. It should be removed once the re-use issue has been
567 /// fixed.
568 ///
569 /// Returns a mapping of SCEVs to their expanded IR values.
570 /// Note that this is a temporary workaround needed due to the current
571 /// epilogue handling.
572 DenseMap<const SCEV *, Value *> executePlan(ElementCount VF, unsigned UF,
573 VPlan &BestPlan,
574 InnerLoopVectorizer &LB,
575 DominatorTree *DT,
576 bool VectorizingEpilogue);
577
578#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
579 void printPlans(raw_ostream &O);
580#endif
581
582 /// Look through the existing plans and return true if we have one with
583 /// vectorization factor \p VF.
584 bool hasPlanWithVF(ElementCount VF) const {
585 return any_of(Range: VPlans,
586 P: [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
587 }
588
589 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
590 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
591 /// returned value holds for the entire \p Range.
592 static bool
593 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
594 VFRange &Range);
595
596 /// \return The most profitable vectorization factor and the cost of that VF
597 /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
598 /// epilogue vectorization is not supported for the loop.
599 VectorizationFactor
600 selectEpilogueVectorizationFactor(const ElementCount MainLoopVF, unsigned IC);
601
602 /// Emit remarks for recipes with invalid costs in the available VPlans.
603 void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE);
604
605 /// Create a check to \p Plan to see if the vector loop should be executed
606 /// based on its trip count.
607 void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
608 ElementCount MinProfitableTripCount) const;
609
610 /// Update loop metadata and profile info for both the scalar remainder loop
611 /// and \p VectorLoop, if it exists. Keeps all loop hints from the original
612 /// loop on the vector loop and replaces vectorizer-specific metadata. The
613 /// loop ID of the original loop \p OrigLoopID must be passed, together with
614 /// the average trip count and invocation weight of the original loop (\p
615 /// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
616 /// cannot be retrieved after the plan has been executed, as the original loop
617 /// may have been removed.
618 void updateLoopMetadataAndProfileInfo(
619 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
620 bool VectorizingEpilogue, MDNode *OrigLoopID,
621 std::optional<unsigned> OrigAverageTripCount,
622 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
623 bool DisableRuntimeUnroll);
624
625protected:
626 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
627 /// according to the information gathered by Legal when it checked if it is
628 /// legal to vectorize the loop.
629 void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
630
631private:
632 /// Build a VPlan according to the information gathered by Legal. \return a
633 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
634 /// exclusive, possibly decreasing \p Range.End. If no VPlan can be built for
635 /// the input range, set the largest included VF to the maximum VF for which
636 /// no plan could be built.
637 VPlanPtr tryToBuildVPlan(VFRange &Range);
638
639 /// Build a VPlan using VPRecipes according to the information gather by
640 /// Legal. This method is only used for the legacy inner loop vectorizer.
641 /// \p Range's largest included VF is restricted to the maximum VF the
642 /// returned VPlan is valid for. If no VPlan can be built for the input range,
643 /// set the largest included VF to the maximum VF for which no plan could be
644 /// built. Each VPlan is built starting from a copy of \p InitialPlan, which
645 /// is a plain CFG VPlan wrapping the original scalar loop.
646 VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range,
647 LoopVersioning *LVer);
648
649 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
650 /// according to the information gathered by Legal when it checked if it is
651 /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
652 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
653
654 /// Add recipes to compute the final reduction result (ComputeFindIVResult,
655 /// ComputeAnyOfResult, ComputeReductionResult depending on the reduction) in
656 /// the middle block. Selects are introduced for reductions between the phi
657 /// and users outside the vector region when folding the tail.
658 void addReductionResultComputation(VPlanPtr &Plan,
659 VPRecipeBuilder &RecipeBuilder,
660 ElementCount MinVF);
661
662 /// Attach the runtime checks of \p RTChecks to \p Plan.
663 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
664 bool HasBranchWeights) const;
665
666#ifndef NDEBUG
667 /// \return The most profitable vectorization factor for the available VPlans
668 /// and the cost of that VF.
669 /// This is now only used to verify the decisions by the new VPlan-based
670 /// cost-model and will be retired once the VPlan-based cost-model is
671 /// stabilized.
672 VectorizationFactor selectVectorizationFactor();
673#endif
674
675 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
676 /// that of B.
677 bool isMoreProfitable(const VectorizationFactor &A,
678 const VectorizationFactor &B, bool HasTail,
679 bool IsEpilogue = false) const;
680
681 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
682 /// that of B in the context of vectorizing a loop with known \p MaxTripCount.
683 bool isMoreProfitable(const VectorizationFactor &A,
684 const VectorizationFactor &B,
685 const unsigned MaxTripCount, bool HasTail,
686 bool IsEpilogue = false) const;
687
688 /// Determines if we have the infrastructure to vectorize the loop and its
689 /// epilogue, assuming the main loop is vectorized by \p VF.
690 bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
691};
692
693} // namespace llvm
694
695#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
696