1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
29#include "llvm/Support/InstructionCost.h"
30
31namespace {
32class GeneratedRTChecks;
33}
34
35namespace llvm {
36
37class LoopInfo;
38class DominatorTree;
39class LoopVectorizationLegality;
40class LoopVectorizationCostModel;
41class PredicatedScalarEvolution;
42class LoopVectorizeHints;
43class LoopVersioning;
44class OptimizationRemarkEmitter;
45class TargetTransformInfo;
46class TargetLibraryInfo;
47class VPRecipeBuilder;
48struct VFRange;
49
50extern cl::opt<bool> EnableVPlanNativePath;
51extern cl::opt<unsigned> ForceTargetInstructionCost;
52
53/// VPlan-based builder utility analogous to IRBuilder.
54class VPBuilder {
55 VPBasicBlock *BB = nullptr;
56 VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
57
58 /// Insert \p VPI in BB at InsertPt if BB is set.
59 template <typename T> T *tryInsertInstruction(T *R) {
60 if (BB)
61 BB->insert(Recipe: R, InsertPt);
62 return R;
63 }
64
65 VPInstruction *createInstruction(unsigned Opcode,
66 ArrayRef<VPValue *> Operands,
67 const VPIRMetadata &MD, DebugLoc DL,
68 const Twine &Name = "") {
69 return tryInsertInstruction(
70 R: new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
71 }
72
73public:
74 VPBuilder() = default;
75 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
76 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
77 VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
78 setInsertPoint(TheBB, IP);
79 }
80
81 /// Clear the insertion point: created instructions will not be inserted into
82 /// a block.
83 void clearInsertionPoint() {
84 BB = nullptr;
85 InsertPt = VPBasicBlock::iterator();
86 }
87
88 VPBasicBlock *getInsertBlock() const { return BB; }
89 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
90
91 /// Create a VPBuilder to insert after \p R.
92 static VPBuilder getToInsertAfter(VPRecipeBase *R) {
93 VPBuilder B;
94 B.setInsertPoint(TheBB: R->getParent(), IP: std::next(x: R->getIterator()));
95 return B;
96 }
97
98 /// InsertPoint - A saved insertion point.
99 class VPInsertPoint {
100 VPBasicBlock *Block = nullptr;
101 VPBasicBlock::iterator Point;
102
103 public:
104 /// Creates a new insertion point which doesn't point to anything.
105 VPInsertPoint() = default;
106
107 /// Creates a new insertion point at the given location.
108 VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
109 : Block(InsertBlock), Point(InsertPoint) {}
110
111 /// Returns true if this insert point is set.
112 bool isSet() const { return Block != nullptr; }
113
114 VPBasicBlock *getBlock() const { return Block; }
115 VPBasicBlock::iterator getPoint() const { return Point; }
116 };
117
118 /// Sets the current insert point to a previously-saved location.
119 void restoreIP(VPInsertPoint IP) {
120 if (IP.isSet())
121 setInsertPoint(TheBB: IP.getBlock(), IP: IP.getPoint());
122 else
123 clearInsertionPoint();
124 }
125
126 /// This specifies that created VPInstructions should be appended to the end
127 /// of the specified block.
128 void setInsertPoint(VPBasicBlock *TheBB) {
129 assert(TheBB && "Attempting to set a null insert point");
130 BB = TheBB;
131 InsertPt = BB->end();
132 }
133
134 /// This specifies that created instructions should be inserted at the
135 /// specified point.
136 void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
137 BB = TheBB;
138 InsertPt = IP;
139 }
140
141 /// This specifies that created instructions should be inserted at the
142 /// specified point.
143 void setInsertPoint(VPRecipeBase *IP) {
144 BB = IP->getParent();
145 InsertPt = IP->getIterator();
146 }
147
148 /// Insert \p R at the current insertion point.
149 void insert(VPRecipeBase *R) { BB->insert(Recipe: R, InsertPt); }
150
151 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
152 /// its underlying Instruction.
153 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
154 Instruction *Inst = nullptr,
155 const VPIRFlags &Flags = {},
156 const VPIRMetadata &MD = {},
157 DebugLoc DL = DebugLoc::getUnknown(),
158 const Twine &Name = "") {
159 VPInstruction *NewVPInst = tryInsertInstruction(
160 R: new VPInstruction(Opcode, Operands, Flags, MD, DL, Name));
161 NewVPInst->setUnderlyingValue(Inst);
162 return NewVPInst;
163 }
164 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
165 DebugLoc DL, const Twine &Name = "") {
166 return createInstruction(Opcode, Operands, MD: {}, DL, Name);
167 }
168 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
169 const VPIRFlags &Flags,
170 DebugLoc DL = DebugLoc::getUnknown(),
171 const Twine &Name = "") {
172 return tryInsertInstruction(
173 R: new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
174 }
175
176 VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
177 Type *ResultTy, const VPIRFlags &Flags = {},
178 DebugLoc DL = DebugLoc::getUnknown(),
179 const Twine &Name = "") {
180 return tryInsertInstruction(R: new VPInstructionWithType(
181 Opcode, Operands, ResultTy, Flags, {}, DL, Name));
182 }
183
184 VPInstruction *createOverflowingOp(
185 unsigned Opcode, ArrayRef<VPValue *> Operands,
186 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
187 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
188 return tryInsertInstruction(
189 R: new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
190 }
191
192 VPInstruction *createNot(VPValue *Operand,
193 DebugLoc DL = DebugLoc::getUnknown(),
194 const Twine &Name = "") {
195 return createInstruction(Opcode: VPInstruction::Not, Operands: {Operand}, MD: {}, DL, Name);
196 }
197
198 VPInstruction *createAnd(VPValue *LHS, VPValue *RHS,
199 DebugLoc DL = DebugLoc::getUnknown(),
200 const Twine &Name = "") {
201 return createInstruction(Opcode: Instruction::BinaryOps::And, Operands: {LHS, RHS}, MD: {}, DL,
202 Name);
203 }
204
205 VPInstruction *createOr(VPValue *LHS, VPValue *RHS,
206 DebugLoc DL = DebugLoc::getUnknown(),
207 const Twine &Name = "") {
208
209 return tryInsertInstruction(R: new VPInstruction(
210 Instruction::BinaryOps::Or, {LHS, RHS},
211 VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
212 }
213
214 VPInstruction *
215 createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL = DebugLoc::getUnknown(),
216 const Twine &Name = "",
217 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
218 return createOverflowingOp(Opcode: Instruction::Add, Operands: {LHS, RHS}, WrapFlags, DL,
219 Name);
220 }
221
222 VPInstruction *
223 createSub(VPValue *LHS, VPValue *RHS, DebugLoc DL = DebugLoc::getUnknown(),
224 const Twine &Name = "",
225 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
226 return createOverflowingOp(Opcode: Instruction::Sub, Operands: {LHS, RHS}, WrapFlags, DL,
227 Name);
228 }
229
230 VPInstruction *createLogicalAnd(VPValue *LHS, VPValue *RHS,
231 DebugLoc DL = DebugLoc::getUnknown(),
232 const Twine &Name = "") {
233 return createNaryOp(Opcode: VPInstruction::LogicalAnd, Operands: {LHS, RHS}, DL, Name);
234 }
235
236 VPInstruction *createLogicalOr(VPValue *LHS, VPValue *RHS,
237 DebugLoc DL = DebugLoc::getUnknown(),
238 const Twine &Name = "") {
239 return createNaryOp(Opcode: VPInstruction::LogicalOr, Operands: {LHS, RHS}, DL, Name);
240 }
241
242 VPInstruction *createSelect(VPValue *Cond, VPValue *TrueVal,
243 VPValue *FalseVal,
244 DebugLoc DL = DebugLoc::getUnknown(),
245 const Twine &Name = "",
246 const VPIRFlags &Flags = {}) {
247 return tryInsertInstruction(R: new VPInstruction(
248 Instruction::Select, {Cond, TrueVal, FalseVal}, Flags, {}, DL, Name));
249 }
250
251 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
252 /// and \p B.
253 VPInstruction *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
254 DebugLoc DL = DebugLoc::getUnknown(),
255 const Twine &Name = "") {
256 assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
257 Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
258 return tryInsertInstruction(
259 R: new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
260 }
261
262 /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
263 /// and \p B.
264 VPInstruction *createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
265 DebugLoc DL = DebugLoc::getUnknown(),
266 const Twine &Name = "") {
267 assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE &&
268 Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
269 return tryInsertInstruction(
270 R: new VPInstruction(Instruction::FCmp, {A, B},
271 VPIRFlags(Pred, FastMathFlags()), {}, DL, Name));
272 }
273
274 VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset,
275 DebugLoc DL = DebugLoc::getUnknown(),
276 const Twine &Name = "") {
277 return tryInsertInstruction(
278 R: new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
279 GEPNoWrapFlags::none(), {}, DL, Name));
280 }
281
282 VPInstruction *createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset,
283 GEPNoWrapFlags GEPFlags,
284 DebugLoc DL = DebugLoc::getUnknown(),
285 const Twine &Name = "") {
286 return tryInsertInstruction(R: new VPInstruction(
287 VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
288 }
289
290 VPInstruction *createWidePtrAdd(VPValue *Ptr, VPValue *Offset,
291 DebugLoc DL = DebugLoc::getUnknown(),
292 const Twine &Name = "") {
293 return tryInsertInstruction(
294 R: new VPInstruction(VPInstruction::WidePtrAdd, {Ptr, Offset},
295 GEPNoWrapFlags::none(), {}, DL, Name));
296 }
297
298 VPPhi *createScalarPhi(ArrayRef<VPValue *> IncomingValues,
299 DebugLoc DL = DebugLoc::getUnknown(),
300 const Twine &Name = "", const VPIRFlags &Flags = {}) {
301 return tryInsertInstruction(R: new VPPhi(IncomingValues, Flags, DL, Name));
302 }
303
304 VPValue *createElementCount(Type *Ty, ElementCount EC) {
305 VPlan &Plan = *getInsertBlock()->getPlan();
306 VPValue *RuntimeEC = Plan.getConstantInt(Ty, Val: EC.getKnownMinValue());
307 if (EC.isScalable()) {
308 VPValue *VScale = createNaryOp(Opcode: VPInstruction::VScale, Operands: {}, ResultTy: Ty);
309 RuntimeEC = EC.getKnownMinValue() == 1
310 ? VScale
311 : createOverflowingOp(Opcode: Instruction::Mul,
312 Operands: {VScale, RuntimeEC}, WrapFlags: {true, false});
313 }
314 return RuntimeEC;
315 }
316
317 /// Convert the input value \p Current to the corresponding value of an
318 /// induction with \p Start and \p Step values, using \p Start + \p Current *
319 /// \p Step.
320 VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
321 FPMathOperator *FPBinOp, VPIRValue *Start,
322 VPValue *Current, VPValue *Step,
323 const Twine &Name = "") {
324 return tryInsertInstruction(
325 R: new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
326 }
327
328 VPInstruction *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
329 Type *ResultTy, DebugLoc DL,
330 const VPIRMetadata &Metadata = {}) {
331 return tryInsertInstruction(R: new VPInstructionWithType(
332 Opcode, Op, ResultTy, VPIRFlags::getDefaultFlags(Opcode), Metadata,
333 DL));
334 }
335
336 VPInstruction *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
337 Type *ResultTy, DebugLoc DL,
338 const VPIRFlags &Flags,
339 const VPIRMetadata &Metadata = {}) {
340 return tryInsertInstruction(
341 R: new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
342 }
343
344 VPValue *createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
345 DebugLoc DL) {
346 if (ResultTy == SrcTy)
347 return Op;
348 Instruction::CastOps CastOp =
349 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
350 ? Instruction::Trunc
351 : Instruction::ZExt;
352 return createScalarCast(Opcode: CastOp, Op, ResultTy, DL);
353 }
354
355 VPValue *createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
356 DebugLoc DL) {
357 if (ResultTy == SrcTy)
358 return Op;
359 Instruction::CastOps CastOp =
360 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
361 ? Instruction::Trunc
362 : Instruction::SExt;
363 return createScalarCast(Opcode: CastOp, Op, ResultTy, DL);
364 }
365
366 VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
367 Type *ResultTy) {
368 return tryInsertInstruction(R: new VPWidenCastRecipe(
369 Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode)));
370 }
371
372 VPScalarIVStepsRecipe *
373 createScalarIVSteps(Instruction::BinaryOps InductionOpcode,
374 FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
375 VPValue *VF, DebugLoc DL) {
376 return tryInsertInstruction(R: new VPScalarIVStepsRecipe(
377 IV, Step, VF, InductionOpcode,
378 FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
379 }
380
381 VPExpandSCEVRecipe *createExpandSCEV(const SCEV *Expr) {
382 return tryInsertInstruction(R: new VPExpandSCEVRecipe(Expr));
383 }
384
385 //===--------------------------------------------------------------------===//
386 // RAII helpers.
387 //===--------------------------------------------------------------------===//
388
389 /// RAII object that stores the current insertion point and restores it when
390 /// the object is destroyed.
391 class InsertPointGuard {
392 VPBuilder &Builder;
393 VPBasicBlock *Block;
394 VPBasicBlock::iterator Point;
395
396 public:
397 InsertPointGuard(VPBuilder &B)
398 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
399
400 InsertPointGuard(const InsertPointGuard &) = delete;
401 InsertPointGuard &operator=(const InsertPointGuard &) = delete;
402
403 ~InsertPointGuard() { Builder.restoreIP(IP: VPInsertPoint(Block, Point)); }
404 };
405};
406
407/// TODO: The following VectorizationFactor was pulled out of
408/// LoopVectorizationCostModel class. LV also deals with
409/// VectorizerParams::VectorizationFactor.
410/// We need to streamline them.
411
412/// Information about vectorization costs.
413struct VectorizationFactor {
414 /// Vector width with best cost.
415 ElementCount Width;
416
417 /// Cost of the loop with that width.
418 InstructionCost Cost;
419
420 /// Cost of the scalar loop.
421 InstructionCost ScalarCost;
422
423 /// The minimum trip count required to make vectorization profitable, e.g. due
424 /// to runtime checks.
425 ElementCount MinProfitableTripCount;
426
427 VectorizationFactor(ElementCount Width, InstructionCost Cost,
428 InstructionCost ScalarCost)
429 : Width(Width), Cost(Cost), ScalarCost(ScalarCost) {}
430
431 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
432 static VectorizationFactor Disabled() {
433 return {ElementCount::getFixed(MinVal: 1), 0, 0};
434 }
435
436 bool operator==(const VectorizationFactor &rhs) const {
437 return Width == rhs.Width && Cost == rhs.Cost;
438 }
439
440 bool operator!=(const VectorizationFactor &rhs) const {
441 return !(*this == rhs);
442 }
443};
444
445/// A class that represents two vectorization factors (initialized with 0 by
446/// default). One for fixed-width vectorization and one for scalable
447/// vectorization. This can be used by the vectorizer to choose from a range of
448/// fixed and/or scalable VFs in order to find the most cost-effective VF to
449/// vectorize with.
450struct FixedScalableVFPair {
451 ElementCount FixedVF;
452 ElementCount ScalableVF;
453
454 FixedScalableVFPair()
455 : FixedVF(ElementCount::getFixed(MinVal: 0)),
456 ScalableVF(ElementCount::getScalable(MinVal: 0)) {}
457 FixedScalableVFPair(const ElementCount &Max) : FixedScalableVFPair() {
458 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
459 }
460 FixedScalableVFPair(const ElementCount &FixedVF,
461 const ElementCount &ScalableVF)
462 : FixedVF(FixedVF), ScalableVF(ScalableVF) {
463 assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
464 "Invalid scalable properties");
465 }
466
467 static FixedScalableVFPair getNone() { return FixedScalableVFPair(); }
468
469 /// \return true if either fixed- or scalable VF is non-zero.
470 explicit operator bool() const { return FixedVF || ScalableVF; }
471
472 /// \return true if either fixed- or scalable VF is a valid vector VF.
473 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
474};
475
476/// Planner drives the vectorization process after having passed
477/// Legality checks.
478class LoopVectorizationPlanner {
479 /// The loop that we evaluate.
480 Loop *OrigLoop;
481
482 /// Loop Info analysis.
483 LoopInfo *LI;
484
485 /// The dominator tree.
486 DominatorTree *DT;
487
488 /// Target Library Info.
489 const TargetLibraryInfo *TLI;
490
491 /// Target Transform Info.
492 const TargetTransformInfo &TTI;
493
494 /// The legality analysis.
495 LoopVectorizationLegality *Legal;
496
497 /// The profitability analysis.
498 LoopVectorizationCostModel &CM;
499
500 /// The interleaved access analysis.
501 InterleavedAccessInfo &IAI;
502
503 PredicatedScalarEvolution &PSE;
504
505 const LoopVectorizeHints &Hints;
506
507 OptimizationRemarkEmitter *ORE;
508
509 SmallVector<VPlanPtr, 4> VPlans;
510
511 /// Profitable vector factors.
512 SmallVector<VectorizationFactor, 8> ProfitableVFs;
513
514 /// A builder used to construct the current plan.
515 VPBuilder Builder;
516
517 /// Computes the cost of \p Plan for vectorization factor \p VF.
518 ///
519 /// The current implementation requires access to the
520 /// LoopVectorizationLegality to handle inductions and reductions, which is
521 /// why it is kept separate from the VPlan-only cost infrastructure.
522 ///
523 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
524 /// been retired.
525 InstructionCost cost(VPlan &Plan, ElementCount VF) const;
526
527 /// Precompute costs for certain instructions using the legacy cost model. The
528 /// function is used to bring up the VPlan-based cost model to initially avoid
529 /// taking different decisions due to inaccuracies in the legacy cost model.
530 InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
531 VPCostContext &CostCtx) const;
532
533public:
534 LoopVectorizationPlanner(
535 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
536 const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal,
537 LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI,
538 PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints,
539 OptimizationRemarkEmitter *ORE)
540 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
541 IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
542
543 /// Build VPlans for the specified \p UserVF and \p UserIC if they are
544 /// non-zero or all applicable candidate VFs otherwise. If vectorization and
545 /// interleaving should be avoided up-front, no plans are generated.
546 void plan(ElementCount UserVF, unsigned UserIC);
547
548 /// Use the VPlan-native path to plan how to best vectorize, return the best
549 /// VF and its cost.
550 VectorizationFactor planInVPlanNativePath(ElementCount UserVF);
551
552 /// Return the VPlan for \p VF. At the moment, there is always a single VPlan
553 /// for each VF.
554 VPlan &getPlanFor(ElementCount VF) const;
555
556 /// Compute and return the most profitable vectorization factor. Also collect
557 /// all profitable VFs in ProfitableVFs.
558 VectorizationFactor computeBestVF();
559
560 /// \return The desired interleave count.
561 /// If interleave count has been specified by metadata it will be returned.
562 /// Otherwise, the interleave count is computed and returned. VF and LoopCost
563 /// are the selected vectorization factor and the cost of the selected VF.
564 unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
565 InstructionCost LoopCost);
566
567 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
568 /// according to the best selected \p VF and \p UF.
569 ///
570 /// TODO: \p VectorizingEpilogue indicates if the executed VPlan is for the
571 /// epilogue vector loop. It should be removed once the re-use issue has been
572 /// fixed.
573 ///
574 /// Returns a mapping of SCEVs to their expanded IR values.
575 /// Note that this is a temporary workaround needed due to the current
576 /// epilogue handling.
577 DenseMap<const SCEV *, Value *> executePlan(ElementCount VF, unsigned UF,
578 VPlan &BestPlan,
579 InnerLoopVectorizer &LB,
580 DominatorTree *DT,
581 bool VectorizingEpilogue);
582
583#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
584 void printPlans(raw_ostream &O);
585#endif
586
587 /// Look through the existing plans and return true if we have one with
588 /// vectorization factor \p VF.
589 bool hasPlanWithVF(ElementCount VF) const {
590 return any_of(Range: VPlans,
591 P: [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
592 }
593
594 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
595 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
596 /// returned value holds for the entire \p Range.
597 static bool
598 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
599 VFRange &Range);
600
601 /// \return The most profitable vectorization factor and the cost of that VF
602 /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
603 /// epilogue vectorization is not supported for the loop.
604 VectorizationFactor
605 selectEpilogueVectorizationFactor(const ElementCount MainLoopVF, unsigned IC);
606
607 /// Emit remarks for recipes with invalid costs in the available VPlans.
608 void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE);
609
610 /// Create a check to \p Plan to see if the vector loop should be executed
611 /// based on its trip count.
612 void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
613 ElementCount MinProfitableTripCount) const;
614
615 /// Update loop metadata and profile info for both the scalar remainder loop
616 /// and \p VectorLoop, if it exists. Keeps all loop hints from the original
617 /// loop on the vector loop and replaces vectorizer-specific metadata. The
618 /// loop ID of the original loop \p OrigLoopID must be passed, together with
619 /// the average trip count and invocation weight of the original loop (\p
620 /// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
621 /// cannot be retrieved after the plan has been executed, as the original loop
622 /// may have been removed.
623 void updateLoopMetadataAndProfileInfo(
624 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
625 bool VectorizingEpilogue, MDNode *OrigLoopID,
626 std::optional<unsigned> OrigAverageTripCount,
627 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
628 bool DisableRuntimeUnroll);
629
630protected:
631 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
632 /// according to the information gathered by Legal when it checked if it is
633 /// legal to vectorize the loop.
634 void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
635
636private:
637 /// Build a VPlan according to the information gathered by Legal. \return a
638 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
639 /// exclusive, possibly decreasing \p Range.End. If no VPlan can be built for
640 /// the input range, set the largest included VF to the maximum VF for which
641 /// no plan could be built.
642 VPlanPtr tryToBuildVPlan(VFRange &Range);
643
644 /// Build a VPlan using VPRecipes according to the information gather by
645 /// Legal. This method is only used for the legacy inner loop vectorizer.
646 /// \p Range's largest included VF is restricted to the maximum VF the
647 /// returned VPlan is valid for. If no VPlan can be built for the input range,
648 /// set the largest included VF to the maximum VF for which no plan could be
649 /// built. Each VPlan is built starting from a copy of \p InitialPlan, which
650 /// is a plain CFG VPlan wrapping the original scalar loop.
651 VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range,
652 LoopVersioning *LVer);
653
654 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
655 /// according to the information gathered by Legal when it checked if it is
656 /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
657 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
658
659 /// Add recipes to compute the final reduction result (ComputeAnyOfResult,
660 /// ComputeReductionResult depending on the reduction) in
661 /// the middle block. Selects are introduced for reductions between the phi
662 /// and users outside the vector region when folding the tail.
663 void addReductionResultComputation(VPlanPtr &Plan,
664 VPRecipeBuilder &RecipeBuilder,
665 ElementCount MinVF);
666
667 /// Attach the runtime checks of \p RTChecks to \p Plan.
668 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
669 bool HasBranchWeights) const;
670
671#ifndef NDEBUG
672 /// \return The most profitable vectorization factor for the available VPlans
673 /// and the cost of that VF.
674 /// This is now only used to verify the decisions by the new VPlan-based
675 /// cost-model and will be retired once the VPlan-based cost-model is
676 /// stabilized.
677 VectorizationFactor selectVectorizationFactor();
678#endif
679
680 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
681 /// that of B.
682 bool isMoreProfitable(const VectorizationFactor &A,
683 const VectorizationFactor &B, bool HasTail,
684 bool IsEpilogue = false) const;
685
686 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
687 /// that of B in the context of vectorizing a loop with known \p MaxTripCount.
688 bool isMoreProfitable(const VectorizationFactor &A,
689 const VectorizationFactor &B,
690 const unsigned MaxTripCount, bool HasTail,
691 bool IsEpilogue = false) const;
692
693 /// Determines if we have the infrastructure to vectorize the loop and its
694 /// epilogue, assuming the main loop is vectorized by \p VF.
695 bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
696};
697
698} // namespace llvm
699
700#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
701