1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanAnalysis.h"
28#include "VPlanValue.h"
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/SmallBitVector.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallVector.h"
33#include "llvm/ADT/Twine.h"
34#include "llvm/ADT/ilist.h"
35#include "llvm/ADT/ilist_node.h"
36#include "llvm/Analysis/IVDescriptors.h"
37#include "llvm/Analysis/VectorUtils.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/Support/InstructionCost.h"
42#include <algorithm>
43#include <cassert>
44#include <cstddef>
45#include <string>
46
47namespace llvm {
48
49class BasicBlock;
50class DominatorTree;
51class InnerLoopVectorizer;
52class IRBuilderBase;
53struct VPTransformState;
54class raw_ostream;
55class RecurrenceDescriptor;
56class SCEV;
57class Type;
58class VPBasicBlock;
59class VPBuilder;
60class VPDominatorTree;
61class VPRegionBlock;
62class VPlan;
63class VPLane;
64class VPReplicateRecipe;
65class VPlanSlp;
66class Value;
67class LoopVectorizationCostModel;
68class LoopVersioning;
69
70struct VPCostContext;
71
72namespace Intrinsic {
73typedef unsigned ID;
74}
75
76using VPlanPtr = std::unique_ptr<VPlan>;
77
78/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
79/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
80class VPBlockBase {
81 friend class VPBlockUtils;
82
83 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
84
85 /// An optional name for the block.
86 std::string Name;
87
88 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
89 /// it is a topmost VPBlockBase.
90 VPRegionBlock *Parent = nullptr;
91
92 /// List of predecessor blocks.
93 SmallVector<VPBlockBase *, 1> Predecessors;
94
95 /// List of successor blocks.
96 SmallVector<VPBlockBase *, 1> Successors;
97
98 /// VPlan containing the block. Can only be set on the entry block of the
99 /// plan.
100 VPlan *Plan = nullptr;
101
102 /// Add \p Successor as the last successor to this block.
103 void appendSuccessor(VPBlockBase *Successor) {
104 assert(Successor && "Cannot add nullptr successor!");
105 Successors.push_back(Elt: Successor);
106 }
107
108 /// Add \p Predecessor as the last predecessor to this block.
109 void appendPredecessor(VPBlockBase *Predecessor) {
110 assert(Predecessor && "Cannot add nullptr predecessor!");
111 Predecessors.push_back(Elt: Predecessor);
112 }
113
114 /// Remove \p Predecessor from the predecessors of this block.
115 void removePredecessor(VPBlockBase *Predecessor) {
116 auto Pos = find(Range&: Predecessors, Val: Predecessor);
117 assert(Pos && "Predecessor does not exist");
118 Predecessors.erase(CI: Pos);
119 }
120
121 /// Remove \p Successor from the successors of this block.
122 void removeSuccessor(VPBlockBase *Successor) {
123 auto Pos = find(Range&: Successors, Val: Successor);
124 assert(Pos && "Successor does not exist");
125 Successors.erase(CI: Pos);
126 }
127
128 /// This function replaces one predecessor with another, useful when
129 /// trying to replace an old block in the CFG with a new one.
130 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
131 auto I = find(Range&: Predecessors, Val: Old);
132 assert(I != Predecessors.end());
133 assert(Old->getParent() == New->getParent() &&
134 "replaced predecessor must have the same parent");
135 *I = New;
136 }
137
138 /// This function replaces one successor with another, useful when
139 /// trying to replace an old block in the CFG with a new one.
140 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
141 auto I = find(Range&: Successors, Val: Old);
142 assert(I != Successors.end());
143 assert(Old->getParent() == New->getParent() &&
144 "replaced successor must have the same parent");
145 *I = New;
146 }
147
148protected:
149 VPBlockBase(const unsigned char SC, const std::string &N)
150 : SubclassID(SC), Name(N) {}
151
152public:
153 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
154 /// that are actually instantiated. Values of this enumeration are kept in the
155 /// SubclassID field of the VPBlockBase objects. They are used for concrete
156 /// type identification.
157 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
158
159 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
160
161 virtual ~VPBlockBase() = default;
162
163 const std::string &getName() const { return Name; }
164
165 void setName(const Twine &newName) { Name = newName.str(); }
166
167 /// \return an ID for the concrete type of this object.
168 /// This is used to implement the classof checks. This should not be used
169 /// for any other purpose, as the values may change as LLVM evolves.
170 unsigned getVPBlockID() const { return SubclassID; }
171
172 VPRegionBlock *getParent() { return Parent; }
173 const VPRegionBlock *getParent() const { return Parent; }
174
175 /// \return A pointer to the plan containing the current block.
176 VPlan *getPlan();
177 const VPlan *getPlan() const;
178
179 /// Sets the pointer of the plan containing the block. The block must be the
180 /// entry block into the VPlan.
181 void setPlan(VPlan *ParentPlan);
182
183 void setParent(VPRegionBlock *P) { Parent = P; }
184
185 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
186 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
187 /// VPBlockBase is a VPBasicBlock, it is returned.
188 const VPBasicBlock *getEntryBasicBlock() const;
189 VPBasicBlock *getEntryBasicBlock();
190
191 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
192 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
193 /// VPBlockBase is a VPBasicBlock, it is returned.
194 const VPBasicBlock *getExitingBasicBlock() const;
195 VPBasicBlock *getExitingBasicBlock();
196
197 const VPBlocksTy &getSuccessors() const { return Successors; }
198 VPBlocksTy &getSuccessors() { return Successors; }
199
200 iterator_range<VPBlockBase **> successors() { return Successors; }
201 iterator_range<VPBlockBase **> predecessors() { return Predecessors; }
202
203 const VPBlocksTy &getPredecessors() const { return Predecessors; }
204 VPBlocksTy &getPredecessors() { return Predecessors; }
205
206 /// \return the successor of this VPBlockBase if it has a single successor.
207 /// Otherwise return a null pointer.
208 VPBlockBase *getSingleSuccessor() const {
209 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
210 }
211
212 /// \return the predecessor of this VPBlockBase if it has a single
213 /// predecessor. Otherwise return a null pointer.
214 VPBlockBase *getSinglePredecessor() const {
215 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
216 }
217
218 size_t getNumSuccessors() const { return Successors.size(); }
219 size_t getNumPredecessors() const { return Predecessors.size(); }
220
221 /// An Enclosing Block of a block B is any block containing B, including B
222 /// itself. \return the closest enclosing block starting from "this", which
223 /// has successors. \return the root enclosing block if all enclosing blocks
224 /// have no successors.
225 VPBlockBase *getEnclosingBlockWithSuccessors();
226
227 /// \return the closest enclosing block starting from "this", which has
228 /// predecessors. \return the root enclosing block if all enclosing blocks
229 /// have no predecessors.
230 VPBlockBase *getEnclosingBlockWithPredecessors();
231
232 /// \return the successors either attached directly to this VPBlockBase or, if
233 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
234 /// successors of its own, search recursively for the first enclosing
235 /// VPRegionBlock that has successors and return them. If no such
236 /// VPRegionBlock exists, return the (empty) successors of the topmost
237 /// VPBlockBase reached.
238 const VPBlocksTy &getHierarchicalSuccessors() {
239 return getEnclosingBlockWithSuccessors()->getSuccessors();
240 }
241
242 /// \return the hierarchical successor of this VPBlockBase if it has a single
243 /// hierarchical successor. Otherwise return a null pointer.
244 VPBlockBase *getSingleHierarchicalSuccessor() {
245 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
246 }
247
248 /// \return the predecessors either attached directly to this VPBlockBase or,
249 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
250 /// predecessors of its own, search recursively for the first enclosing
251 /// VPRegionBlock that has predecessors and return them. If no such
252 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
253 /// VPBlockBase reached.
254 const VPBlocksTy &getHierarchicalPredecessors() {
255 return getEnclosingBlockWithPredecessors()->getPredecessors();
256 }
257
258 /// \return the hierarchical predecessor of this VPBlockBase if it has a
259 /// single hierarchical predecessor. Otherwise return a null pointer.
260 VPBlockBase *getSingleHierarchicalPredecessor() {
261 return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
262 }
263
264 /// Set a given VPBlockBase \p Successor as the single successor of this
265 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
266 /// This VPBlockBase must have no successors.
267 void setOneSuccessor(VPBlockBase *Successor) {
268 assert(Successors.empty() && "Setting one successor when others exist.");
269 assert(Successor->getParent() == getParent() &&
270 "connected blocks must have the same parent");
271 appendSuccessor(Successor);
272 }
273
274 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
275 /// successors of this VPBlockBase. This VPBlockBase is not added as
276 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
277 /// successors.
278 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
279 assert(Successors.empty() && "Setting two successors when others exist.");
280 appendSuccessor(Successor: IfTrue);
281 appendSuccessor(Successor: IfFalse);
282 }
283
284 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
285 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
286 /// as successor of any VPBasicBlock in \p NewPreds.
287 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
288 assert(Predecessors.empty() && "Block predecessors already set.");
289 for (auto *Pred : NewPreds)
290 appendPredecessor(Predecessor: Pred);
291 }
292
293 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
294 /// This VPBlockBase must have no successors. This VPBlockBase is not added
295 /// as predecessor of any VPBasicBlock in \p NewSuccs.
296 void setSuccessors(ArrayRef<VPBlockBase *> NewSuccs) {
297 assert(Successors.empty() && "Block successors already set.");
298 for (auto *Succ : NewSuccs)
299 appendSuccessor(Successor: Succ);
300 }
301
302 /// Remove all the predecessor of this block.
303 void clearPredecessors() { Predecessors.clear(); }
304
305 /// Remove all the successors of this block.
306 void clearSuccessors() { Successors.clear(); }
307
308 /// Swap predecessors of the block. The block must have exactly 2
309 /// predecessors.
310 void swapPredecessors() {
311 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
312 std::swap(a&: Predecessors[0], b&: Predecessors[1]);
313 }
314
315 /// Swap successors of the block. The block must have exactly 2 successors.
316 // TODO: This should be part of introducing conditional branch recipes rather
317 // than being independent.
318 void swapSuccessors() {
319 assert(Successors.size() == 2 && "must have 2 successors to swap");
320 std::swap(a&: Successors[0], b&: Successors[1]);
321 }
322
323 /// Returns the index for \p Pred in the blocks predecessors list.
324 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
325 assert(count(Predecessors, Pred) == 1 &&
326 "must have Pred exactly once in Predecessors");
327 return std::distance(first: Predecessors.begin(), last: find(Range: Predecessors, Val: Pred));
328 }
329
330 /// Returns the index for \p Succ in the blocks successor list.
331 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
332 assert(count(Successors, Succ) == 1 &&
333 "must have Succ exactly once in Successors");
334 return std::distance(first: Successors.begin(), last: find(Range: Successors, Val: Succ));
335 }
336
337 /// The method which generates the output IR that correspond to this
338 /// VPBlockBase, thereby "executing" the VPlan.
339 virtual void execute(VPTransformState *State) = 0;
340
341 /// Return the cost of the block.
342 virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
343
344 /// Return true if it is legal to hoist instructions into this block.
345 bool isLegalToHoistInto() {
346 // There are currently no constraints that prevent an instruction to be
347 // hoisted into a VPBlockBase.
348 return true;
349 }
350
351#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
352 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
353 OS << getName();
354 }
355
356 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
357 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
358 /// consequtive numbers.
359 ///
360 /// Note that the numbering is applied to the whole VPlan, so printing
361 /// individual blocks is consistent with the whole VPlan printing.
362 virtual void print(raw_ostream &O, const Twine &Indent,
363 VPSlotTracker &SlotTracker) const = 0;
364
365 /// Print plain-text dump of this VPlan to \p O.
366 void print(raw_ostream &O) const;
367
368 /// Print the successors of this block to \p O, prefixing all lines with \p
369 /// Indent.
370 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
371
372 /// Dump this VPBlockBase to dbgs().
373 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
374#endif
375
376 /// Clone the current block and it's recipes without updating the operands of
377 /// the cloned recipes, including all blocks in the single-entry single-exit
378 /// region for VPRegionBlocks.
379 virtual VPBlockBase *clone() = 0;
380};
381
382/// VPRecipeBase is a base class modeling a sequence of one or more output IR
383/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
384/// and is responsible for deleting its defined values. Single-value
385/// recipes must inherit from VPSingleDef instead of inheriting from both
386/// VPRecipeBase and VPValue separately.
387class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
388 public VPDef,
389 public VPUser {
390 friend VPBasicBlock;
391 friend class VPBlockUtils;
392
393 /// Each VPRecipe belongs to a single VPBasicBlock.
394 VPBasicBlock *Parent = nullptr;
395
396 /// The debug location for the recipe.
397 DebugLoc DL;
398
399public:
400 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
401 DebugLoc DL = {})
402 : VPDef(SC), VPUser(Operands), DL(DL) {}
403
404 virtual ~VPRecipeBase() = default;
405
406 /// Clone the current recipe.
407 virtual VPRecipeBase *clone() = 0;
408
409 /// \return the VPBasicBlock which this VPRecipe belongs to.
410 VPBasicBlock *getParent() { return Parent; }
411 const VPBasicBlock *getParent() const { return Parent; }
412
413 /// The method which generates the output IR instructions that correspond to
414 /// this VPRecipe, thereby "executing" the VPlan.
415 virtual void execute(VPTransformState &State) = 0;
416
417 /// Return the cost of this recipe, taking into account if the cost
418 /// computation should be skipped and the ForceTargetInstructionCost flag.
419 /// Also takes care of printing the cost for debugging.
420 InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
421
422 /// Insert an unlinked recipe into a basic block immediately before
423 /// the specified recipe.
424 void insertBefore(VPRecipeBase *InsertPos);
425 /// Insert an unlinked recipe into \p BB immediately before the insertion
426 /// point \p IP;
427 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
428
429 /// Insert an unlinked Recipe into a basic block immediately after
430 /// the specified Recipe.
431 void insertAfter(VPRecipeBase *InsertPos);
432
433 /// Unlink this recipe from its current VPBasicBlock and insert it into
434 /// the VPBasicBlock that MovePos lives in, right after MovePos.
435 void moveAfter(VPRecipeBase *MovePos);
436
437 /// Unlink this recipe and insert into BB before I.
438 ///
439 /// \pre I is a valid iterator into BB.
440 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
441
442 /// This method unlinks 'this' from the containing basic block, but does not
443 /// delete it.
444 void removeFromParent();
445
446 /// This method unlinks 'this' from the containing basic block and deletes it.
447 ///
448 /// \returns an iterator pointing to the element after the erased one
449 iplist<VPRecipeBase>::iterator eraseFromParent();
450
451 /// Method to support type inquiry through isa, cast, and dyn_cast.
452 static inline bool classof(const VPDef *D) {
453 // All VPDefs are also VPRecipeBases.
454 return true;
455 }
456
457 static inline bool classof(const VPUser *U) { return true; }
458
459 /// Returns true if the recipe may have side-effects.
460 bool mayHaveSideEffects() const;
461
462 /// Returns true for PHI-like recipes.
463 bool isPhi() const;
464
465 /// Returns true if the recipe may read from memory.
466 bool mayReadFromMemory() const;
467
468 /// Returns true if the recipe may write to memory.
469 bool mayWriteToMemory() const;
470
471 /// Returns true if the recipe may read from or write to memory.
472 bool mayReadOrWriteMemory() const {
473 return mayReadFromMemory() || mayWriteToMemory();
474 }
475
476 /// Returns the debug location of the recipe.
477 DebugLoc getDebugLoc() const { return DL; }
478
479 /// Return true if the recipe is a scalar cast.
480 bool isScalarCast() const;
481
482 /// Set the recipe's debug location to \p NewDL.
483 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
484
485protected:
486 /// Compute the cost of this recipe either using a recipe's specialized
487 /// implementation or using the legacy cost model and the underlying
488 /// instructions.
489 virtual InstructionCost computeCost(ElementCount VF,
490 VPCostContext &Ctx) const;
491};
492
493// Helper macro to define common classof implementations for recipes.
494#define VP_CLASSOF_IMPL(VPDefID) \
495 static inline bool classof(const VPDef *D) { \
496 return D->getVPDefID() == VPDefID; \
497 } \
498 static inline bool classof(const VPValue *V) { \
499 auto *R = V->getDefiningRecipe(); \
500 return R && R->getVPDefID() == VPDefID; \
501 } \
502 static inline bool classof(const VPUser *U) { \
503 auto *R = dyn_cast<VPRecipeBase>(U); \
504 return R && R->getVPDefID() == VPDefID; \
505 } \
506 static inline bool classof(const VPRecipeBase *R) { \
507 return R->getVPDefID() == VPDefID; \
508 } \
509 static inline bool classof(const VPSingleDefRecipe *R) { \
510 return R->getVPDefID() == VPDefID; \
511 }
512
513/// VPSingleDef is a base class for recipes for modeling a sequence of one or
514/// more output IR that define a single result VPValue.
515/// Note that VPRecipeBase must be inherited from before VPValue.
516class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
517public:
518 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
519 DebugLoc DL = {})
520 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
521
522 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
523 Value *UV, DebugLoc DL = {})
524 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
525
526 static inline bool classof(const VPRecipeBase *R) {
527 switch (R->getVPDefID()) {
528 case VPRecipeBase::VPDerivedIVSC:
529 case VPRecipeBase::VPEVLBasedIVPHISC:
530 case VPRecipeBase::VPExpandSCEVSC:
531 case VPRecipeBase::VPExpressionSC:
532 case VPRecipeBase::VPInstructionSC:
533 case VPRecipeBase::VPReductionEVLSC:
534 case VPRecipeBase::VPReductionSC:
535 case VPRecipeBase::VPReplicateSC:
536 case VPRecipeBase::VPScalarIVStepsSC:
537 case VPRecipeBase::VPVectorPointerSC:
538 case VPRecipeBase::VPVectorEndPointerSC:
539 case VPRecipeBase::VPWidenCallSC:
540 case VPRecipeBase::VPWidenCanonicalIVSC:
541 case VPRecipeBase::VPWidenCastSC:
542 case VPRecipeBase::VPWidenGEPSC:
543 case VPRecipeBase::VPWidenIntrinsicSC:
544 case VPRecipeBase::VPWidenSC:
545 case VPRecipeBase::VPWidenSelectSC:
546 case VPRecipeBase::VPBlendSC:
547 case VPRecipeBase::VPPredInstPHISC:
548 case VPRecipeBase::VPCanonicalIVPHISC:
549 case VPRecipeBase::VPActiveLaneMaskPHISC:
550 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
551 case VPRecipeBase::VPWidenPHISC:
552 case VPRecipeBase::VPWidenIntOrFpInductionSC:
553 case VPRecipeBase::VPWidenPointerInductionSC:
554 case VPRecipeBase::VPReductionPHISC:
555 case VPRecipeBase::VPPartialReductionSC:
556 return true;
557 case VPRecipeBase::VPBranchOnMaskSC:
558 case VPRecipeBase::VPInterleaveSC:
559 case VPRecipeBase::VPIRInstructionSC:
560 case VPRecipeBase::VPWidenLoadEVLSC:
561 case VPRecipeBase::VPWidenLoadSC:
562 case VPRecipeBase::VPWidenStoreEVLSC:
563 case VPRecipeBase::VPWidenStoreSC:
564 case VPRecipeBase::VPHistogramSC:
565 // TODO: Widened stores don't define a value, but widened loads do. Split
566 // the recipes to be able to make widened loads VPSingleDefRecipes.
567 return false;
568 }
569 llvm_unreachable("Unhandled VPDefID");
570 }
571
572 static inline bool classof(const VPUser *U) {
573 auto *R = dyn_cast<VPRecipeBase>(Val: U);
574 return R && classof(R);
575 }
576
577 virtual VPSingleDefRecipe *clone() override = 0;
578
579 /// Returns the underlying instruction.
580 Instruction *getUnderlyingInstr() {
581 return cast<Instruction>(Val: getUnderlyingValue());
582 }
583 const Instruction *getUnderlyingInstr() const {
584 return cast<Instruction>(Val: getUnderlyingValue());
585 }
586
587#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
588 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
589 LLVM_DUMP_METHOD void dump() const;
590#endif
591};
592
593/// Class to record and manage LLVM IR flags.
594class VPIRFlags {
595 enum class OperationType : unsigned char {
596 Cmp,
597 OverflowingBinOp,
598 DisjointOp,
599 PossiblyExactOp,
600 GEPOp,
601 FPMathOp,
602 NonNegOp,
603 Other
604 };
605
606public:
607 struct WrapFlagsTy {
608 char HasNUW : 1;
609 char HasNSW : 1;
610
611 WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
612 };
613
614 struct DisjointFlagsTy {
615 char IsDisjoint : 1;
616 DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
617 };
618
619 struct NonNegFlagsTy {
620 char NonNeg : 1;
621 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
622 };
623
624private:
625 struct ExactFlagsTy {
626 char IsExact : 1;
627 };
628 struct FastMathFlagsTy {
629 char AllowReassoc : 1;
630 char NoNaNs : 1;
631 char NoInfs : 1;
632 char NoSignedZeros : 1;
633 char AllowReciprocal : 1;
634 char AllowContract : 1;
635 char ApproxFunc : 1;
636
637 FastMathFlagsTy(const FastMathFlags &FMF);
638 };
639
640 OperationType OpType;
641
642 union {
643 CmpInst::Predicate CmpPredicate;
644 WrapFlagsTy WrapFlags;
645 DisjointFlagsTy DisjointFlags;
646 ExactFlagsTy ExactFlags;
647 GEPNoWrapFlags GEPFlags;
648 NonNegFlagsTy NonNegFlags;
649 FastMathFlagsTy FMFs;
650 unsigned AllFlags;
651 };
652
653public:
654 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
655
656 VPIRFlags(Instruction &I) {
657 if (auto *Op = dyn_cast<CmpInst>(Val: &I)) {
658 OpType = OperationType::Cmp;
659 CmpPredicate = Op->getPredicate();
660 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(Val: &I)) {
661 OpType = OperationType::DisjointOp;
662 DisjointFlags.IsDisjoint = Op->isDisjoint();
663 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(Val: &I)) {
664 OpType = OperationType::OverflowingBinOp;
665 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
666 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(Val: &I)) {
667 OpType = OperationType::PossiblyExactOp;
668 ExactFlags.IsExact = Op->isExact();
669 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) {
670 OpType = OperationType::GEPOp;
671 GEPFlags = GEP->getNoWrapFlags();
672 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(Val: &I)) {
673 OpType = OperationType::NonNegOp;
674 NonNegFlags.NonNeg = PNNI->hasNonNeg();
675 } else if (auto *Op = dyn_cast<FPMathOperator>(Val: &I)) {
676 OpType = OperationType::FPMathOp;
677 FMFs = Op->getFastMathFlags();
678 } else {
679 OpType = OperationType::Other;
680 AllFlags = 0;
681 }
682 }
683
684 VPIRFlags(CmpInst::Predicate Pred)
685 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
686
687 VPIRFlags(WrapFlagsTy WrapFlags)
688 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
689
690 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
691
692 VPIRFlags(DisjointFlagsTy DisjointFlags)
693 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
694
695 VPIRFlags(NonNegFlagsTy NonNegFlags)
696 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
697
698 VPIRFlags(GEPNoWrapFlags GEPFlags)
699 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
700
701public:
702 void transferFlags(VPIRFlags &Other) {
703 OpType = Other.OpType;
704 AllFlags = Other.AllFlags;
705 }
706
707 /// Drop all poison-generating flags.
708 void dropPoisonGeneratingFlags() {
709 // NOTE: This needs to be kept in-sync with
710 // Instruction::dropPoisonGeneratingFlags.
711 switch (OpType) {
712 case OperationType::OverflowingBinOp:
713 WrapFlags.HasNUW = false;
714 WrapFlags.HasNSW = false;
715 break;
716 case OperationType::DisjointOp:
717 DisjointFlags.IsDisjoint = false;
718 break;
719 case OperationType::PossiblyExactOp:
720 ExactFlags.IsExact = false;
721 break;
722 case OperationType::GEPOp:
723 GEPFlags = GEPNoWrapFlags::none();
724 break;
725 case OperationType::FPMathOp:
726 FMFs.NoNaNs = false;
727 FMFs.NoInfs = false;
728 break;
729 case OperationType::NonNegOp:
730 NonNegFlags.NonNeg = false;
731 break;
732 case OperationType::Cmp:
733 case OperationType::Other:
734 break;
735 }
736 }
737
738 /// Apply the IR flags to \p I.
739 void applyFlags(Instruction &I) const {
740 switch (OpType) {
741 case OperationType::OverflowingBinOp:
742 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
743 I.setHasNoSignedWrap(WrapFlags.HasNSW);
744 break;
745 case OperationType::DisjointOp:
746 cast<PossiblyDisjointInst>(Val: &I)->setIsDisjoint(DisjointFlags.IsDisjoint);
747 break;
748 case OperationType::PossiblyExactOp:
749 I.setIsExact(ExactFlags.IsExact);
750 break;
751 case OperationType::GEPOp:
752 cast<GetElementPtrInst>(Val: &I)->setNoWrapFlags(GEPFlags);
753 break;
754 case OperationType::FPMathOp:
755 I.setHasAllowReassoc(FMFs.AllowReassoc);
756 I.setHasNoNaNs(FMFs.NoNaNs);
757 I.setHasNoInfs(FMFs.NoInfs);
758 I.setHasNoSignedZeros(FMFs.NoSignedZeros);
759 I.setHasAllowReciprocal(FMFs.AllowReciprocal);
760 I.setHasAllowContract(FMFs.AllowContract);
761 I.setHasApproxFunc(FMFs.ApproxFunc);
762 break;
763 case OperationType::NonNegOp:
764 I.setNonNeg(NonNegFlags.NonNeg);
765 break;
766 case OperationType::Cmp:
767 case OperationType::Other:
768 break;
769 }
770 }
771
772 CmpInst::Predicate getPredicate() const {
773 assert(OpType == OperationType::Cmp &&
774 "recipe doesn't have a compare predicate");
775 return CmpPredicate;
776 }
777
778 void setPredicate(CmpInst::Predicate Pred) {
779 assert(OpType == OperationType::Cmp &&
780 "recipe doesn't have a compare predicate");
781 CmpPredicate = Pred;
782 }
783
784 GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; }
785
786 /// Returns true if the recipe has fast-math flags.
787 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
788
789 FastMathFlags getFastMathFlags() const;
790
791 /// Returns true if the recipe has non-negative flag.
792 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
793
794 bool isNonNeg() const {
795 assert(OpType == OperationType::NonNegOp &&
796 "recipe doesn't have a NNEG flag");
797 return NonNegFlags.NonNeg;
798 }
799
800 bool hasNoUnsignedWrap() const {
801 assert(OpType == OperationType::OverflowingBinOp &&
802 "recipe doesn't have a NUW flag");
803 return WrapFlags.HasNUW;
804 }
805
806 bool hasNoSignedWrap() const {
807 assert(OpType == OperationType::OverflowingBinOp &&
808 "recipe doesn't have a NSW flag");
809 return WrapFlags.HasNSW;
810 }
811
812 bool isDisjoint() const {
813 assert(OpType == OperationType::DisjointOp &&
814 "recipe cannot have a disjoing flag");
815 return DisjointFlags.IsDisjoint;
816 }
817
818#if !defined(NDEBUG)
819 /// Returns true if the set flags are valid for \p Opcode.
820 bool flagsValidForOpcode(unsigned Opcode) const;
821#endif
822
823#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
824 void printFlags(raw_ostream &O) const;
825#endif
826};
827
828/// A pure-virtual common base class for recipes defining a single VPValue and
829/// using IR flags.
830struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
831 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
832 DebugLoc DL = {})
833 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags() {}
834
835 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
836 Instruction &I)
837 : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()), VPIRFlags(I) {}
838
839 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
840 const VPIRFlags &Flags, DebugLoc DL = {})
841 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
842
843 static inline bool classof(const VPRecipeBase *R) {
844 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
845 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
846 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
847 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
848 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
849 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
850 R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
851 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
852 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
853 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
854 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
855 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
856 }
857
858 static inline bool classof(const VPUser *U) {
859 auto *R = dyn_cast<VPRecipeBase>(Val: U);
860 return R && classof(R);
861 }
862
863 static inline bool classof(const VPValue *V) {
864 auto *R = dyn_cast_or_null<VPRecipeBase>(Val: V->getDefiningRecipe());
865 return R && classof(R);
866 }
867
868 void execute(VPTransformState &State) override = 0;
869};
870
871/// Helper to access the operand that contains the unroll part for this recipe
872/// after unrolling.
873template <unsigned PartOpIdx> class VPUnrollPartAccessor {
874protected:
875 /// Return the VPValue operand containing the unroll part or null if there is
876 /// no such operand.
877 VPValue *getUnrollPartOperand(VPUser &U) const;
878
879 /// Return the unroll part.
880 unsigned getUnrollPart(VPUser &U) const;
881};
882
883/// Helper to manage IR metadata for recipes. It filters out metadata that
884/// cannot be propagated.
885class VPIRMetadata {
886 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
887
888public:
889 VPIRMetadata() {}
890
891 /// Adds metatadata that can be preserved from the original instruction
892 /// \p I.
893 VPIRMetadata(Instruction &I) { getMetadataToPropagate(Inst: &I, Metadata); }
894
895 /// Adds metatadata that can be preserved from the original instruction
896 /// \p I and noalias metadata guaranteed by runtime checks using \p LVer.
897 VPIRMetadata(Instruction &I, LoopVersioning *LVer);
898
899 /// Copy constructor for cloning.
900 VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {}
901
902 /// Add all metadata to \p I.
903 void applyMetadata(Instruction &I) const;
904
905 /// Add metadata with kind \p Kind and \p Node.
906 void addMetadata(unsigned Kind, MDNode *Node) {
907 Metadata.emplace_back(Args&: Kind, Args&: Node);
908 }
909};
910
911/// This is a concrete Recipe that models a single VPlan-level instruction.
912/// While as any Recipe it may generate a sequence of IR instructions when
913/// executed, these instructions would always form a single-def expression as
914/// the VPInstruction is also a single def-use vertex.
915class VPInstruction : public VPRecipeWithIRFlags,
916 public VPIRMetadata,
917 public VPUnrollPartAccessor<1> {
918 friend class VPlanSlp;
919
920public:
921 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
922 enum {
923 FirstOrderRecurrenceSplice =
924 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
925 // values of a first-order recurrence.
926 Not,
927 SLPLoad,
928 SLPStore,
929 ActiveLaneMask,
930 ExplicitVectorLength,
931 CalculateTripCountMinusVF,
932 // Increment the canonical IV separately for each unrolled part.
933 CanonicalIVIncrementForPart,
934 BranchOnCount,
935 BranchOnCond,
936 Broadcast,
937 /// Given operands of (the same) struct type, creates a struct of fixed-
938 /// width vectors each containing a struct field of all operands. The
939 /// number of operands matches the element count of every vector.
940 BuildStructVector,
941 /// Creates a fixed-width vector containing all operands. The number of
942 /// operands matches the vector element count.
943 BuildVector,
944 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
945 /// where one of (x,y) is loop invariant, and both x and y are integer type.
946 ComputeAnyOfResult,
947 ComputeFindIVResult,
948 ComputeReductionResult,
949 // Extracts the last lane from its operand if it is a vector, or the last
950 // part if scalar. In the latter case, the recipe will be removed during
951 // unrolling.
952 ExtractLastElement,
953 // Extracts the second-to-last lane from its operand or the second-to-last
954 // part if it is scalar. In the latter case, the recipe will be removed
955 // during unrolling.
956 ExtractPenultimateElement,
957 LogicalAnd, // Non-poison propagating logical And.
958 // Add an offset in bytes (second operand) to a base pointer (first
959 // operand). Only generates scalar values (either for the first lane only or
960 // for all lanes, depending on its uses).
961 PtrAdd,
962 // Returns a scalar boolean value, which is true if any lane of its
963 // (boolean) vector operands is true. It produces the reduced value across
964 // all unrolled iterations. Unrolling will add all copies of its original
965 // operand as additional operands.
966 AnyOf,
967 // Calculates the first active lane index of the vector predicate operands.
968 // It produces the lane index across all unrolled iterations. Unrolling will
969 // add all copies of its original operand as additional operands.
970 FirstActiveLane,
971
972 // The opcodes below are used for VPInstructionWithType.
973 //
974 /// Scale the first operand (vector step) by the second operand
975 /// (scalar-step). Casts both operands to the result type if needed.
976 WideIVStep,
977 /// Start vector for reductions with 3 operands: the original start value,
978 /// the identity value for the reduction and an integer indicating the
979 /// scaling factor.
980 ReductionStartVector,
981 // Creates a step vector starting from 0 to VF with a step of 1.
982 StepVector,
983
984 };
985
986private:
987 typedef unsigned char OpcodeTy;
988 OpcodeTy Opcode;
989
990 /// An optional name that can be used for the generated IR instruction.
991 const std::string Name;
992
993 /// Returns true if this VPInstruction generates scalar values for all lanes.
994 /// Most VPInstructions generate a single value per part, either vector or
995 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
996 /// values per all lanes, stemming from an original ingredient. This method
997 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
998 /// underlying ingredient.
999 bool doesGeneratePerAllLanes() const;
1000
1001 /// Returns true if we can generate a scalar for the first lane only if
1002 /// needed.
1003 bool canGenerateScalarForFirstLane() const;
1004
1005 /// Utility methods serving execute(): generates a single vector instance of
1006 /// the modeled instruction. \returns the generated value. . In some cases an
1007 /// existing value is returned rather than a generated one.
1008 Value *generate(VPTransformState &State);
1009
1010 /// Utility methods serving execute(): generates a scalar single instance of
1011 /// the modeled instruction for a given lane. \returns the scalar generated
1012 /// value for lane \p Lane.
1013 Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
1014
1015#if !defined(NDEBUG)
1016 /// Return the number of operands determined by the opcode of the
1017 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1018 /// directly by the opcode.
1019 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1020#endif
1021
1022public:
1023 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL = {},
1024 const Twine &Name = "")
1025 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1026 VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {}
1027
1028 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1029 const VPIRFlags &Flags, DebugLoc DL = {},
1030 const Twine &Name = "");
1031
1032 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1033
1034 VPInstruction *clone() override {
1035 SmallVector<VPValue *, 2> Operands(operands());
1036 auto *New = new VPInstruction(Opcode, Operands, *this, getDebugLoc(), Name);
1037 if (getUnderlyingValue())
1038 New->setUnderlyingValue(getUnderlyingInstr());
1039 return New;
1040 }
1041
1042 unsigned getOpcode() const { return Opcode; }
1043
1044 /// Generate the instruction.
1045 /// TODO: We currently execute only per-part unless a specific instance is
1046 /// provided.
1047 void execute(VPTransformState &State) override;
1048
1049 /// Return the cost of this VPInstruction.
1050 InstructionCost computeCost(ElementCount VF,
1051 VPCostContext &Ctx) const override;
1052
1053#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1054 /// Print the VPInstruction to \p O.
1055 void print(raw_ostream &O, const Twine &Indent,
1056 VPSlotTracker &SlotTracker) const override;
1057
1058 /// Print the VPInstruction to dbgs() (for debugging).
1059 LLVM_DUMP_METHOD void dump() const;
1060#endif
1061
1062 bool hasResult() const {
1063 // CallInst may or may not have a result, depending on the called function.
1064 // Conservatively return calls have results for now.
1065 switch (getOpcode()) {
1066 case Instruction::Ret:
1067 case Instruction::Br:
1068 case Instruction::Store:
1069 case Instruction::Switch:
1070 case Instruction::IndirectBr:
1071 case Instruction::Resume:
1072 case Instruction::CatchRet:
1073 case Instruction::Unreachable:
1074 case Instruction::Fence:
1075 case Instruction::AtomicRMW:
1076 case VPInstruction::BranchOnCond:
1077 case VPInstruction::BranchOnCount:
1078 return false;
1079 default:
1080 return true;
1081 }
1082 }
1083
1084 /// Returns true if the underlying opcode may read from or write to memory.
1085 bool opcodeMayReadOrWriteFromMemory() const;
1086
1087 /// Returns true if the recipe only uses the first lane of operand \p Op.
1088 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1089
1090 /// Returns true if the recipe only uses the first part of operand \p Op.
1091 bool onlyFirstPartUsed(const VPValue *Op) const override;
1092
1093 /// Returns true if this VPInstruction produces a scalar value from a vector,
1094 /// e.g. by performing a reduction or extracting a lane.
1095 bool isVectorToScalar() const;
1096
1097 /// Returns true if this VPInstruction's operands are single scalars and the
1098 /// result is also a single scalar.
1099 bool isSingleScalar() const;
1100
1101 /// Returns the symbolic name assigned to the VPInstruction.
1102 StringRef getName() const { return Name; }
1103};
1104
1105/// A specialization of VPInstruction augmenting it with a dedicated result
1106/// type, to be used when the opcode and operands of the VPInstruction don't
1107/// directly determine the result type. Note that there is no separate VPDef ID
1108/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1109/// distinguished purely by the opcode.
1110class VPInstructionWithType : public VPInstruction {
1111 /// Scalar result type produced by the recipe.
1112 Type *ResultTy;
1113
1114public:
1115 VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
1116 Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL,
1117 const Twine &Name = "")
1118 : VPInstruction(Opcode, Operands, Flags, DL, Name), ResultTy(ResultTy) {}
1119
1120 static inline bool classof(const VPRecipeBase *R) {
1121 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1122 // type information.
1123 if (R->isScalarCast())
1124 return true;
1125 auto *VPI = dyn_cast<VPInstruction>(Val: R);
1126 if (!VPI)
1127 return false;
1128 switch (VPI->getOpcode()) {
1129 case VPInstruction::WideIVStep:
1130 case VPInstruction::StepVector:
1131 return true;
1132 default:
1133 return false;
1134 }
1135 }
1136
1137 static inline bool classof(const VPUser *R) {
1138 return isa<VPInstructionWithType>(Val: cast<VPRecipeBase>(Val: R));
1139 }
1140
1141 VPInstruction *clone() override {
1142 SmallVector<VPValue *, 2> Operands(operands());
1143 auto *New =
1144 new VPInstructionWithType(getOpcode(), Operands, getResultType(), *this,
1145 getDebugLoc(), getName());
1146 New->setUnderlyingValue(getUnderlyingValue());
1147 return New;
1148 }
1149
1150 void execute(VPTransformState &State) override;
1151
1152 /// Return the cost of this VPInstruction.
1153 InstructionCost computeCost(ElementCount VF,
1154 VPCostContext &Ctx) const override {
1155 // TODO: Compute accurate cost after retiring the legacy cost model.
1156 return 0;
1157 }
1158
1159 Type *getResultType() const { return ResultTy; }
1160
1161#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1162 /// Print the recipe.
1163 void print(raw_ostream &O, const Twine &Indent,
1164 VPSlotTracker &SlotTracker) const override;
1165#endif
1166};
1167
1168/// Helper type to provide functions to access incoming values and blocks for
1169/// phi-like recipes.
1170class VPPhiAccessors {
1171protected:
1172 /// Return a VPRecipeBase* to the current object.
1173 virtual const VPRecipeBase *getAsRecipe() const = 0;
1174
1175public:
1176 virtual ~VPPhiAccessors() = default;
1177
1178 /// Returns the incoming VPValue with index \p Idx.
1179 VPValue *getIncomingValue(unsigned Idx) const {
1180 return getAsRecipe()->getOperand(N: Idx);
1181 }
1182
1183 /// Returns the incoming block with index \p Idx.
1184 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1185
1186 /// Returns the number of incoming values, also number of incoming blocks.
1187 virtual unsigned getNumIncoming() const {
1188 return getAsRecipe()->getNumOperands();
1189 }
1190
1191 /// Removes the incoming value for \p IncomingBlock, which must be a
1192 /// predecessor.
1193 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1194
1195#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1196 /// Print the recipe.
1197 void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1198#endif
1199};
1200
1201struct VPPhi : public VPInstruction, public VPPhiAccessors {
1202 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1203 : VPInstruction(Instruction::PHI, Operands, DL, Name) {}
1204
1205 static inline bool classof(const VPUser *U) {
1206 auto *R = dyn_cast<VPInstruction>(Val: U);
1207 return R && R->getOpcode() == Instruction::PHI;
1208 }
1209
1210 VPPhi *clone() override {
1211 return new VPPhi(operands(), getDebugLoc(), getName());
1212 }
1213
1214 void execute(VPTransformState &State) override;
1215
1216#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1217 /// Print the recipe.
1218 void print(raw_ostream &O, const Twine &Indent,
1219 VPSlotTracker &SlotTracker) const override;
1220#endif
1221
1222protected:
1223 const VPRecipeBase *getAsRecipe() const override { return this; }
1224};
1225
1226/// A recipe to wrap on original IR instruction not to be modified during
1227/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1228/// Expect PHIs, VPIRInstructions cannot have any operands.
1229class VPIRInstruction : public VPRecipeBase {
1230 Instruction &I;
1231
1232protected:
1233 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1234 /// subclasses may need to be created, e.g. VPIRPhi.
1235 VPIRInstruction(Instruction &I)
1236 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1237
1238public:
1239 ~VPIRInstruction() override = default;
1240
1241 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1242 /// VPIRInstruction.
1243 static VPIRInstruction *create(Instruction &I);
1244
1245 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1246
1247 VPIRInstruction *clone() override {
1248 auto *R = create(I);
1249 for (auto *Op : operands())
1250 R->addOperand(Operand: Op);
1251 return R;
1252 }
1253
1254 void execute(VPTransformState &State) override;
1255
1256 /// Return the cost of this VPIRInstruction.
1257 InstructionCost computeCost(ElementCount VF,
1258 VPCostContext &Ctx) const override;
1259
1260 Instruction &getInstruction() const { return I; }
1261
1262#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1263 /// Print the recipe.
1264 void print(raw_ostream &O, const Twine &Indent,
1265 VPSlotTracker &SlotTracker) const override;
1266#endif
1267
1268 bool usesScalars(const VPValue *Op) const override {
1269 assert(is_contained(operands(), Op) &&
1270 "Op must be an operand of the recipe");
1271 return true;
1272 }
1273
1274 bool onlyFirstPartUsed(const VPValue *Op) const override {
1275 assert(is_contained(operands(), Op) &&
1276 "Op must be an operand of the recipe");
1277 return true;
1278 }
1279
1280 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1281 assert(is_contained(operands(), Op) &&
1282 "Op must be an operand of the recipe");
1283 return true;
1284 }
1285
1286 /// Update the recipes first operand to the last lane of the operand using \p
1287 /// Builder. Must only be used for VPIRInstructions with at least one operand
1288 /// wrapping a PHINode.
1289 void extractLastLaneOfFirstOperand(VPBuilder &Builder);
1290};
1291
1292/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1293/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1294/// allowed, and it is used to add a new incoming value for the single
1295/// predecessor VPBB.
1296struct VPIRPhi : public VPIRInstruction, public VPPhiAccessors {
1297 VPIRPhi(PHINode &PN) : VPIRInstruction(PN) {}
1298
1299 static inline bool classof(const VPRecipeBase *U) {
1300 auto *R = dyn_cast<VPIRInstruction>(Val: U);
1301 return R && isa<PHINode>(Val: R->getInstruction());
1302 }
1303
1304 PHINode &getIRPhi() { return cast<PHINode>(Val&: getInstruction()); }
1305
1306 void execute(VPTransformState &State) override;
1307
1308#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1309 /// Print the recipe.
1310 void print(raw_ostream &O, const Twine &Indent,
1311 VPSlotTracker &SlotTracker) const override;
1312#endif
1313
1314protected:
1315 const VPRecipeBase *getAsRecipe() const override { return this; }
1316};
1317
1318/// VPWidenRecipe is a recipe for producing a widened instruction using the
1319/// opcode and operands of the recipe. This recipe covers most of the
1320/// traditional vectorization cases where each recipe transforms into a
1321/// vectorized version of itself.
1322class VPWidenRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
1323 unsigned Opcode;
1324
1325public:
1326 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1327 const VPIRFlags &Flags, DebugLoc DL)
1328 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1329 Opcode(Opcode) {}
1330
1331 VPWidenRecipe(Instruction &I, ArrayRef<VPValue *> Operands)
1332 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPIRMetadata(I),
1333 Opcode(I.getOpcode()) {}
1334
1335 ~VPWidenRecipe() override = default;
1336
1337 VPWidenRecipe *clone() override {
1338 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1339 R->transferFlags(Other&: *this);
1340 return R;
1341 }
1342
1343 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1344
1345 /// Produce a widened instruction using the opcode and operands of the recipe,
1346 /// processing State.VF elements.
1347 void execute(VPTransformState &State) override;
1348
1349 /// Return the cost of this VPWidenRecipe.
1350 InstructionCost computeCost(ElementCount VF,
1351 VPCostContext &Ctx) const override;
1352
1353 unsigned getOpcode() const { return Opcode; }
1354
1355#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1356 /// Print the recipe.
1357 void print(raw_ostream &O, const Twine &Indent,
1358 VPSlotTracker &SlotTracker) const override;
1359#endif
1360};
1361
1362/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1363class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
1364 /// Cast instruction opcode.
1365 Instruction::CastOps Opcode;
1366
1367 /// Result type for the cast.
1368 Type *ResultTy;
1369
1370public:
1371 VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1372 CastInst &UI)
1373 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPIRMetadata(UI),
1374 Opcode(Opcode), ResultTy(ResultTy) {
1375 assert(UI.getOpcode() == Opcode &&
1376 "opcode of underlying cast doesn't match");
1377 }
1378
1379 VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1380 const VPIRFlags &Flags = {}, DebugLoc DL = {})
1381 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1382 VPIRMetadata(), Opcode(Opcode), ResultTy(ResultTy) {
1383 assert(flagsValidForOpcode(Opcode) &&
1384 "Set flags not supported for the provided opcode");
1385 }
1386
1387 ~VPWidenCastRecipe() override = default;
1388
1389 VPWidenCastRecipe *clone() override {
1390 if (auto *UV = getUnderlyingValue())
1391 return new VPWidenCastRecipe(Opcode, getOperand(N: 0), ResultTy,
1392 *cast<CastInst>(Val: UV));
1393
1394 return new VPWidenCastRecipe(Opcode, getOperand(N: 0), ResultTy);
1395 }
1396
1397 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1398
1399 /// Produce widened copies of the cast.
1400 void execute(VPTransformState &State) override;
1401
1402 /// Return the cost of this VPWidenCastRecipe.
1403 InstructionCost computeCost(ElementCount VF,
1404 VPCostContext &Ctx) const override;
1405
1406#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1407 /// Print the recipe.
1408 void print(raw_ostream &O, const Twine &Indent,
1409 VPSlotTracker &SlotTracker) const override;
1410#endif
1411
1412 Instruction::CastOps getOpcode() const { return Opcode; }
1413
1414 /// Returns the result type of the cast.
1415 Type *getResultType() const { return ResultTy; }
1416};
1417
1418/// A recipe for widening vector intrinsics.
1419class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
1420 /// ID of the vector intrinsic to widen.
1421 Intrinsic::ID VectorIntrinsicID;
1422
1423 /// Scalar return type of the intrinsic.
1424 Type *ResultTy;
1425
1426 /// True if the intrinsic may read from memory.
1427 bool MayReadFromMemory;
1428
1429 /// True if the intrinsic may read write to memory.
1430 bool MayWriteToMemory;
1431
1432 /// True if the intrinsic may have side-effects.
1433 bool MayHaveSideEffects;
1434
1435public:
1436 VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
1437 ArrayRef<VPValue *> CallArguments, Type *Ty,
1438 DebugLoc DL = {})
1439 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
1440 VPIRMetadata(CI), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1441 MayReadFromMemory(CI.mayReadFromMemory()),
1442 MayWriteToMemory(CI.mayWriteToMemory()),
1443 MayHaveSideEffects(CI.mayHaveSideEffects()) {}
1444
1445 VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
1446 ArrayRef<VPValue *> CallArguments, Type *Ty,
1447 DebugLoc DL = {})
1448 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
1449 VPIRMetadata(), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
1450 LLVMContext &Ctx = Ty->getContext();
1451 AttributeSet Attrs = Intrinsic::getFnAttributes(C&: Ctx, id: VectorIntrinsicID);
1452 MemoryEffects ME = Attrs.getMemoryEffects();
1453 MayReadFromMemory = !ME.onlyWritesMemory();
1454 MayWriteToMemory = !ME.onlyReadsMemory();
1455 MayHaveSideEffects = MayWriteToMemory ||
1456 !Attrs.hasAttribute(Kind: Attribute::NoUnwind) ||
1457 !Attrs.hasAttribute(Kind: Attribute::WillReturn);
1458 }
1459
1460 ~VPWidenIntrinsicRecipe() override = default;
1461
1462 VPWidenIntrinsicRecipe *clone() override {
1463 if (Value *CI = getUnderlyingValue())
1464 return new VPWidenIntrinsicRecipe(*cast<CallInst>(Val: CI), VectorIntrinsicID,
1465 operands(), ResultTy, getDebugLoc());
1466 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1467 getDebugLoc());
1468 }
1469
1470 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1471
1472 /// Produce a widened version of the vector intrinsic.
1473 void execute(VPTransformState &State) override;
1474
1475 /// Return the cost of this vector intrinsic.
1476 InstructionCost computeCost(ElementCount VF,
1477 VPCostContext &Ctx) const override;
1478
1479 /// Return the ID of the intrinsic.
1480 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1481
1482 /// Return the scalar return type of the intrinsic.
1483 Type *getResultType() const { return ResultTy; }
1484
1485 /// Return to name of the intrinsic as string.
1486 StringRef getIntrinsicName() const;
1487
1488 /// Returns true if the intrinsic may read from memory.
1489 bool mayReadFromMemory() const { return MayReadFromMemory; }
1490
1491 /// Returns true if the intrinsic may write to memory.
1492 bool mayWriteToMemory() const { return MayWriteToMemory; }
1493
1494 /// Returns true if the intrinsic may have side-effects.
1495 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1496
1497#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1498 /// Print the recipe.
1499 void print(raw_ostream &O, const Twine &Indent,
1500 VPSlotTracker &SlotTracker) const override;
1501#endif
1502
1503 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1504};
1505
1506/// A recipe for widening Call instructions using library calls.
1507class VPWidenCallRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
1508 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1509 /// between a given VF and the chosen vectorized variant, so there will be a
1510 /// different VPlan for each VF with a valid variant.
1511 Function *Variant;
1512
1513public:
1514 VPWidenCallRecipe(Value *UV, Function *Variant,
1515 ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
1516 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
1517 *cast<Instruction>(Val: UV)),
1518 VPIRMetadata(*cast<Instruction>(Val: UV)), Variant(Variant) {
1519 assert(
1520 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1521 "last operand must be the called function");
1522 }
1523
1524 ~VPWidenCallRecipe() override = default;
1525
1526 VPWidenCallRecipe *clone() override {
1527 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1528 getDebugLoc());
1529 }
1530
1531 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1532
1533 /// Produce a widened version of the call instruction.
1534 void execute(VPTransformState &State) override;
1535
1536 /// Return the cost of this VPWidenCallRecipe.
1537 InstructionCost computeCost(ElementCount VF,
1538 VPCostContext &Ctx) const override;
1539
1540 Function *getCalledScalarFunction() const {
1541 return cast<Function>(Val: getOperand(N: getNumOperands() - 1)->getLiveInIRValue());
1542 }
1543
1544 operand_range args() { return make_range(x: op_begin(), y: std::prev(x: op_end())); }
1545 const_operand_range args() const {
1546 return make_range(x: op_begin(), y: std::prev(x: op_end()));
1547 }
1548
1549#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1550 /// Print the recipe.
1551 void print(raw_ostream &O, const Twine &Indent,
1552 VPSlotTracker &SlotTracker) const override;
1553#endif
1554};
1555
1556/// A recipe representing a sequence of load -> update -> store as part of
1557/// a histogram operation. This means there may be aliasing between vector
1558/// lanes, which is handled by the llvm.experimental.vector.histogram family
1559/// of intrinsics. The only update operations currently supported are
1560/// 'add' and 'sub' where the other term is loop-invariant.
1561class VPHistogramRecipe : public VPRecipeBase {
1562 /// Opcode of the update operation, currently either add or sub.
1563 unsigned Opcode;
1564
1565public:
1566 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1567 DebugLoc DL = {})
1568 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1569
1570 ~VPHistogramRecipe() override = default;
1571
1572 VPHistogramRecipe *clone() override {
1573 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1574 }
1575
1576 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1577
1578 /// Produce a vectorized histogram operation.
1579 void execute(VPTransformState &State) override;
1580
1581 /// Return the cost of this VPHistogramRecipe.
1582 InstructionCost computeCost(ElementCount VF,
1583 VPCostContext &Ctx) const override;
1584
1585 unsigned getOpcode() const { return Opcode; }
1586
1587 /// Return the mask operand if one was provided, or a null pointer if all
1588 /// lanes should be executed unconditionally.
1589 VPValue *getMask() const {
1590 return getNumOperands() == 3 ? getOperand(N: 2) : nullptr;
1591 }
1592
1593#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1594 /// Print the recipe
1595 void print(raw_ostream &O, const Twine &Indent,
1596 VPSlotTracker &SlotTracker) const override;
1597#endif
1598};
1599
1600/// A recipe for widening select instructions.
1601struct VPWidenSelectRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
1602 VPWidenSelectRecipe(SelectInst &I, ArrayRef<VPValue *> Operands)
1603 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I),
1604 VPIRMetadata(I) {}
1605
1606 ~VPWidenSelectRecipe() override = default;
1607
1608 VPWidenSelectRecipe *clone() override {
1609 return new VPWidenSelectRecipe(*cast<SelectInst>(Val: getUnderlyingInstr()),
1610 operands());
1611 }
1612
1613 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1614
1615 /// Produce a widened version of the select instruction.
1616 void execute(VPTransformState &State) override;
1617
1618 /// Return the cost of this VPWidenSelectRecipe.
1619 InstructionCost computeCost(ElementCount VF,
1620 VPCostContext &Ctx) const override;
1621
1622#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1623 /// Print the recipe.
1624 void print(raw_ostream &O, const Twine &Indent,
1625 VPSlotTracker &SlotTracker) const override;
1626#endif
1627
1628 VPValue *getCond() const {
1629 return getOperand(N: 0);
1630 }
1631
1632 bool isInvariantCond() const {
1633 return getCond()->isDefinedOutsideLoopRegions();
1634 }
1635
1636 /// Returns true if the recipe only uses the first lane of operand \p Op.
1637 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1638 assert(is_contained(operands(), Op) &&
1639 "Op must be an operand of the recipe");
1640 return Op == getCond() && isInvariantCond();
1641 }
1642};
1643
1644/// A recipe for handling GEP instructions.
1645class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
1646 bool isPointerLoopInvariant() const {
1647 return getOperand(N: 0)->isDefinedOutsideLoopRegions();
1648 }
1649
1650 bool isIndexLoopInvariant(unsigned I) const {
1651 return getOperand(N: I + 1)->isDefinedOutsideLoopRegions();
1652 }
1653
1654 bool areAllOperandsInvariant() const {
1655 return all_of(Range: operands(), P: [](VPValue *Op) {
1656 return Op->isDefinedOutsideLoopRegions();
1657 });
1658 }
1659
1660public:
1661 VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands)
1662 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {
1663 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
1664 (void)Metadata;
1665 getMetadataToPropagate(Inst: GEP, Metadata);
1666 assert(Metadata.empty() && "unexpected metadata on GEP");
1667 }
1668
1669 ~VPWidenGEPRecipe() override = default;
1670
1671 VPWidenGEPRecipe *clone() override {
1672 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(Val: getUnderlyingInstr()),
1673 operands());
1674 }
1675
1676 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1677
1678 /// Generate the gep nodes.
1679 void execute(VPTransformState &State) override;
1680
1681 /// Return the cost of this VPWidenGEPRecipe.
1682 InstructionCost computeCost(ElementCount VF,
1683 VPCostContext &Ctx) const override {
1684 // TODO: Compute accurate cost after retiring the legacy cost model.
1685 return 0;
1686 }
1687
1688#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1689 /// Print the recipe.
1690 void print(raw_ostream &O, const Twine &Indent,
1691 VPSlotTracker &SlotTracker) const override;
1692#endif
1693
1694 /// Returns true if the recipe only uses the first lane of operand \p Op.
1695 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1696 assert(is_contained(operands(), Op) &&
1697 "Op must be an operand of the recipe");
1698 if (Op == getOperand(N: 0))
1699 return isPointerLoopInvariant();
1700 else
1701 return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions();
1702 }
1703};
1704
1705/// A recipe to compute a pointer to the last element of each part of a widened
1706/// memory access for widened memory accesses of IndexedTy. Used for
1707/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1708class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags,
1709 public VPUnrollPartAccessor<2> {
1710 Type *IndexedTy;
1711
1712 /// The constant stride of the pointer computed by this recipe, expressed in
1713 /// units of IndexedTy.
1714 int64_t Stride;
1715
1716public:
1717 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
1718 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1719 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1720 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1721 IndexedTy(IndexedTy), Stride(Stride) {
1722 assert(Stride < 0 && "Stride must be negative");
1723 }
1724
1725 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1726
1727 VPValue *getVFValue() { return getOperand(N: 1); }
1728 const VPValue *getVFValue() const { return getOperand(N: 1); }
1729
1730 void execute(VPTransformState &State) override;
1731
1732 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1733 assert(is_contained(operands(), Op) &&
1734 "Op must be an operand of the recipe");
1735 return true;
1736 }
1737
1738 /// Return the cost of this VPVectorPointerRecipe.
1739 InstructionCost computeCost(ElementCount VF,
1740 VPCostContext &Ctx) const override {
1741 // TODO: Compute accurate cost after retiring the legacy cost model.
1742 return 0;
1743 }
1744
1745 /// Returns true if the recipe only uses the first part of operand \p Op.
1746 bool onlyFirstPartUsed(const VPValue *Op) const override {
1747 assert(is_contained(operands(), Op) &&
1748 "Op must be an operand of the recipe");
1749 assert(getNumOperands() <= 2 && "must have at most two operands");
1750 return true;
1751 }
1752
1753 VPVectorEndPointerRecipe *clone() override {
1754 return new VPVectorEndPointerRecipe(getOperand(N: 0), getVFValue(), IndexedTy,
1755 Stride, getGEPNoWrapFlags(),
1756 getDebugLoc());
1757 }
1758
1759#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1760 /// Print the recipe.
1761 void print(raw_ostream &O, const Twine &Indent,
1762 VPSlotTracker &SlotTracker) const override;
1763#endif
1764};
1765
1766/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1767class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
1768 public VPUnrollPartAccessor<1> {
1769 Type *IndexedTy;
1770
1771public:
1772 VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags,
1773 DebugLoc DL)
1774 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1775 GEPFlags, DL),
1776 IndexedTy(IndexedTy) {}
1777
1778 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1779
1780 void execute(VPTransformState &State) override;
1781
1782 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1783 assert(is_contained(operands(), Op) &&
1784 "Op must be an operand of the recipe");
1785 return true;
1786 }
1787
1788 /// Returns true if the recipe only uses the first part of operand \p Op.
1789 bool onlyFirstPartUsed(const VPValue *Op) const override {
1790 assert(is_contained(operands(), Op) &&
1791 "Op must be an operand of the recipe");
1792 assert(getNumOperands() <= 2 && "must have at most two operands");
1793 return true;
1794 }
1795
1796 VPVectorPointerRecipe *clone() override {
1797 return new VPVectorPointerRecipe(getOperand(N: 0), IndexedTy,
1798 getGEPNoWrapFlags(), getDebugLoc());
1799 }
1800
1801 /// Return the cost of this VPHeaderPHIRecipe.
1802 InstructionCost computeCost(ElementCount VF,
1803 VPCostContext &Ctx) const override {
1804 // TODO: Compute accurate cost after retiring the legacy cost model.
1805 return 0;
1806 }
1807
1808#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1809 /// Print the recipe.
1810 void print(raw_ostream &O, const Twine &Indent,
1811 VPSlotTracker &SlotTracker) const override;
1812#endif
1813};
1814
1815/// A pure virtual base class for all recipes modeling header phis, including
1816/// phis for first order recurrences, pointer inductions and reductions. The
1817/// start value is the first operand of the recipe and the incoming value from
1818/// the backedge is the second operand.
1819///
1820/// Inductions are modeled using the following sub-classes:
1821/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1822/// starting at a specified value (zero for the main vector loop, the resume
1823/// value for the epilogue vector loop) and stepping by 1. The induction
1824/// controls exiting of the vector loop by comparing against the vector trip
1825/// count. Produces a single scalar PHI for the induction value per
1826/// iteration.
1827/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1828/// floating point inductions with arbitrary start and step values. Produces
1829/// a vector PHI per-part.
1830/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1831/// value of an IV with different start and step values. Produces a single
1832/// scalar value per iteration
1833/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1834/// canonical or derived induction.
1835/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1836/// pointer induction. Produces either a vector PHI per-part or scalar values
1837/// per-lane based on the canonical induction.
1838class VPHeaderPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors {
1839protected:
1840 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1841 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
1842 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
1843 UnderlyingInstr, DL) {}
1844
1845 const VPRecipeBase *getAsRecipe() const override { return this; }
1846
1847public:
1848 ~VPHeaderPHIRecipe() override = default;
1849
1850 /// Method to support type inquiry through isa, cast, and dyn_cast.
1851 static inline bool classof(const VPRecipeBase *B) {
1852 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1853 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1854 }
1855 static inline bool classof(const VPValue *V) {
1856 auto *B = V->getDefiningRecipe();
1857 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1858 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
1859 }
1860
1861 /// Generate the phi nodes.
1862 void execute(VPTransformState &State) override = 0;
1863
1864 /// Return the cost of this header phi recipe.
1865 InstructionCost computeCost(ElementCount VF,
1866 VPCostContext &Ctx) const override;
1867
1868#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1869 /// Print the recipe.
1870 void print(raw_ostream &O, const Twine &Indent,
1871 VPSlotTracker &SlotTracker) const override = 0;
1872#endif
1873
1874 /// Returns the start value of the phi, if one is set.
1875 VPValue *getStartValue() {
1876 return getNumOperands() == 0 ? nullptr : getOperand(N: 0);
1877 }
1878 VPValue *getStartValue() const {
1879 return getNumOperands() == 0 ? nullptr : getOperand(N: 0);
1880 }
1881
1882 /// Update the start value of the recipe.
1883 void setStartValue(VPValue *V) { setOperand(I: 0, New: V); }
1884
1885 /// Returns the incoming value from the loop backedge.
1886 virtual VPValue *getBackedgeValue() {
1887 return getOperand(N: 1);
1888 }
1889
1890 /// Returns the backedge value as a recipe. The backedge value is guaranteed
1891 /// to be a recipe.
1892 virtual VPRecipeBase &getBackedgeRecipe() {
1893 return *getBackedgeValue()->getDefiningRecipe();
1894 }
1895};
1896
1897/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
1898/// VPWidenPointerInductionRecipe), providing shared functionality, including
1899/// retrieving the step value, induction descriptor and original phi node.
1900class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
1901 const InductionDescriptor &IndDesc;
1902
1903public:
1904 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
1905 VPValue *Step, const InductionDescriptor &IndDesc,
1906 DebugLoc DL)
1907 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
1908 addOperand(Operand: Step);
1909 }
1910
1911 static inline bool classof(const VPRecipeBase *R) {
1912 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
1913 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
1914 }
1915
1916 static inline bool classof(const VPValue *V) {
1917 auto *R = V->getDefiningRecipe();
1918 return R && classof(R);
1919 }
1920
1921 static inline bool classof(const VPHeaderPHIRecipe *R) {
1922 return classof(R: static_cast<const VPRecipeBase *>(R));
1923 }
1924
1925 virtual void execute(VPTransformState &State) override = 0;
1926
1927 /// Returns the step value of the induction.
1928 VPValue *getStepValue() { return getOperand(N: 1); }
1929 const VPValue *getStepValue() const { return getOperand(N: 1); }
1930
1931 /// Update the step value of the recipe.
1932 void setStepValue(VPValue *V) { setOperand(I: 1, New: V); }
1933
1934 /// Returns the number of incoming values, also number of incoming blocks.
1935 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
1936 /// incoming value, its start value.
1937 unsigned getNumIncoming() const override { return 1; }
1938
1939 PHINode *getPHINode() const { return cast<PHINode>(Val: getUnderlyingValue()); }
1940
1941 /// Returns the induction descriptor for the recipe.
1942 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1943
1944 VPValue *getBackedgeValue() override {
1945 // TODO: All operands of base recipe must exist and be at same index in
1946 // derived recipe.
1947 llvm_unreachable(
1948 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1949 }
1950
1951 VPRecipeBase &getBackedgeRecipe() override {
1952 // TODO: All operands of base recipe must exist and be at same index in
1953 // derived recipe.
1954 llvm_unreachable(
1955 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1956 }
1957
1958 /// Returns true if the recipe only uses the first lane of operand \p Op.
1959 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1960 assert(is_contained(operands(), Op) &&
1961 "Op must be an operand of the recipe");
1962 // The recipe creates its own wide start value, so it only requests the
1963 // first lane of the operand.
1964 // TODO: Remove once creating the start value is modeled separately.
1965 return Op == getStartValue() || Op == getStepValue();
1966 }
1967};
1968
1969/// A recipe for handling phi nodes of integer and floating-point inductions,
1970/// producing their vector values. This is an abstract recipe and must be
1971/// converted to concrete recipes before executing.
1972class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
1973 TruncInst *Trunc;
1974
1975 // If this recipe is unrolled it will have 2 additional operands.
1976 bool isUnrolled() const { return getNumOperands() == 5; }
1977
1978public:
1979 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1980 VPValue *VF, const InductionDescriptor &IndDesc,
1981 DebugLoc DL)
1982 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
1983 Step, IndDesc, DL),
1984 Trunc(nullptr) {
1985 addOperand(Operand: VF);
1986 }
1987
1988 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1989 VPValue *VF, const InductionDescriptor &IndDesc,
1990 TruncInst *Trunc, DebugLoc DL)
1991 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
1992 Step, IndDesc, DL),
1993 Trunc(Trunc) {
1994 addOperand(Operand: VF);
1995 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
1996 (void)Metadata;
1997 if (Trunc)
1998 getMetadataToPropagate(Inst: Trunc, Metadata);
1999 assert(Metadata.empty() && "unexpected metadata on Trunc");
2000 }
2001
2002 ~VPWidenIntOrFpInductionRecipe() override = default;
2003
2004 VPWidenIntOrFpInductionRecipe *clone() override {
2005 return new VPWidenIntOrFpInductionRecipe(
2006 getPHINode(), getStartValue(), getStepValue(), getVFValue(),
2007 getInductionDescriptor(), Trunc, getDebugLoc());
2008 }
2009
2010 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2011
2012 void execute(VPTransformState &State) override {
2013 llvm_unreachable("cannot execute this recipe, should be expanded via "
2014 "expandVPWidenIntOrFpInductionRecipe");
2015 }
2016
2017#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2018 /// Print the recipe.
2019 void print(raw_ostream &O, const Twine &Indent,
2020 VPSlotTracker &SlotTracker) const override;
2021#endif
2022
2023 VPValue *getVFValue() { return getOperand(N: 2); }
2024 const VPValue *getVFValue() const { return getOperand(N: 2); }
2025
2026 VPValue *getSplatVFValue() {
2027 // If the recipe has been unrolled return the VPValue for the induction
2028 // increment.
2029 return isUnrolled() ? getOperand(N: getNumOperands() - 2) : nullptr;
2030 }
2031
2032 /// Returns the number of incoming values, also number of incoming blocks.
2033 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2034 /// incoming value, its start value.
2035 unsigned getNumIncoming() const override { return 1; }
2036
2037 /// Returns the first defined value as TruncInst, if it is one or nullptr
2038 /// otherwise.
2039 TruncInst *getTruncInst() { return Trunc; }
2040 const TruncInst *getTruncInst() const { return Trunc; }
2041
2042 /// Returns true if the induction is canonical, i.e. starting at 0 and
2043 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2044 /// same type as the canonical induction.
2045 bool isCanonical() const;
2046
2047 /// Returns the scalar type of the induction.
2048 Type *getScalarType() const {
2049 return Trunc ? Trunc->getType()
2050 : getStartValue()->getLiveInIRValue()->getType();
2051 }
2052
2053 /// Returns the VPValue representing the value of this induction at
2054 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2055 /// take place.
2056 VPValue *getLastUnrolledPartOperand() {
2057 return isUnrolled() ? getOperand(N: getNumOperands() - 1) : this;
2058 }
2059};
2060
2061class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
2062 public VPUnrollPartAccessor<4> {
2063 bool IsScalarAfterVectorization;
2064
2065public:
2066 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2067 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2068 /// VF*UF.
2069 VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
2070 VPValue *NumUnrolledElems,
2071 const InductionDescriptor &IndDesc,
2072 bool IsScalarAfterVectorization, DebugLoc DL)
2073 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2074 Step, IndDesc, DL),
2075 IsScalarAfterVectorization(IsScalarAfterVectorization) {
2076 addOperand(Operand: NumUnrolledElems);
2077 }
2078
2079 ~VPWidenPointerInductionRecipe() override = default;
2080
2081 VPWidenPointerInductionRecipe *clone() override {
2082 return new VPWidenPointerInductionRecipe(
2083 cast<PHINode>(Val: getUnderlyingInstr()), getOperand(N: 0), getOperand(N: 1),
2084 getOperand(N: 2), getInductionDescriptor(), IsScalarAfterVectorization,
2085 getDebugLoc());
2086 }
2087
2088 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2089
2090 /// Generate vector values for the pointer induction.
2091 void execute(VPTransformState &State) override;
2092
2093 /// Returns true if only scalar values will be generated.
2094 bool onlyScalarsGenerated(bool IsScalable);
2095
2096 /// Returns the VPValue representing the value of this induction at
2097 /// the first unrolled part, if it exists. Returns itself if unrolling did not
2098 /// take place.
2099 VPValue *getFirstUnrolledPartOperand() {
2100 return getUnrollPart(U&: *this) == 0 ? this : getOperand(N: 3);
2101 }
2102
2103#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2104 /// Print the recipe.
2105 void print(raw_ostream &O, const Twine &Indent,
2106 VPSlotTracker &SlotTracker) const override;
2107#endif
2108};
2109
2110/// A recipe for widened phis. Incoming values are operands of the recipe and
2111/// their operand index corresponds to the incoming predecessor block. If the
2112/// recipe is placed in an entry block to a (non-replicate) region, it must have
2113/// exactly 2 incoming values, the first from the predecessor of the region and
2114/// the second from the exiting block of the region.
2115class VPWidenPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors {
2116 /// Name to use for the generated IR instruction for the widened phi.
2117 std::string Name;
2118
2119protected:
2120 const VPRecipeBase *getAsRecipe() const override { return this; }
2121
2122public:
2123 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2124 /// debug location \p DL.
2125 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {},
2126 const Twine &Name = "")
2127 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL),
2128 Name(Name.str()) {
2129 if (Start)
2130 addOperand(Operand: Start);
2131 }
2132
2133 VPWidenPHIRecipe *clone() override {
2134 auto *C = new VPWidenPHIRecipe(cast<PHINode>(Val: getUnderlyingValue()),
2135 getOperand(N: 0), getDebugLoc(), Name);
2136 for (VPValue *Op : llvm::drop_begin(RangeOrContainer: operands()))
2137 C->addOperand(Operand: Op);
2138 return C;
2139 }
2140
2141 ~VPWidenPHIRecipe() override = default;
2142
2143 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2144
2145 /// Generate the phi/select nodes.
2146 void execute(VPTransformState &State) override;
2147
2148#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2149 /// Print the recipe.
2150 void print(raw_ostream &O, const Twine &Indent,
2151 VPSlotTracker &SlotTracker) const override;
2152#endif
2153};
2154
2155/// A recipe for handling first-order recurrence phis. The start value is the
2156/// first operand of the recipe and the incoming value from the backedge is the
2157/// second operand.
2158struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
2159 VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
2160 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2161
2162 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2163
2164 VPFirstOrderRecurrencePHIRecipe *clone() override {
2165 return new VPFirstOrderRecurrencePHIRecipe(
2166 cast<PHINode>(Val: getUnderlyingInstr()), *getOperand(N: 0));
2167 }
2168
2169 void execute(VPTransformState &State) override;
2170
2171 /// Return the cost of this first-order recurrence phi recipe.
2172 InstructionCost computeCost(ElementCount VF,
2173 VPCostContext &Ctx) const override;
2174
2175#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2176 /// Print the recipe.
2177 void print(raw_ostream &O, const Twine &Indent,
2178 VPSlotTracker &SlotTracker) const override;
2179#endif
2180
2181 /// Returns true if the recipe only uses the first lane of operand \p Op.
2182 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2183 assert(is_contained(operands(), Op) &&
2184 "Op must be an operand of the recipe");
2185 return Op == getStartValue();
2186 }
2187};
2188
2189/// A recipe for handling reduction phis. The start value is the first operand
2190/// of the recipe and the incoming value from the backedge is the second
2191/// operand.
2192class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
2193 public VPUnrollPartAccessor<2> {
2194 /// Descriptor for the reduction.
2195 const RecurrenceDescriptor &RdxDesc;
2196
2197 /// The phi is part of an in-loop reduction.
2198 bool IsInLoop;
2199
2200 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2201 bool IsOrdered;
2202
2203 /// When expanding the reduction PHI, the plan's VF element count is divided
2204 /// by this factor to form the reduction phi's VF.
2205 unsigned VFScaleFactor = 1;
2206
2207public:
2208 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
2209 /// RdxDesc.
2210 VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
2211 VPValue &Start, bool IsInLoop = false,
2212 bool IsOrdered = false, unsigned VFScaleFactor = 1)
2213 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
2214 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
2215 VFScaleFactor(VFScaleFactor) {
2216 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2217 }
2218
2219 ~VPReductionPHIRecipe() override = default;
2220
2221 VPReductionPHIRecipe *clone() override {
2222 auto *R = new VPReductionPHIRecipe(
2223 dyn_cast_or_null<PHINode>(Val: getUnderlyingValue()), RdxDesc,
2224 *getOperand(N: 0), IsInLoop, IsOrdered, VFScaleFactor);
2225 R->addOperand(Operand: getBackedgeValue());
2226 return R;
2227 }
2228
2229 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2230
2231 /// Generate the phi/select nodes.
2232 void execute(VPTransformState &State) override;
2233
2234 /// Get the factor that the VF of this recipe's output should be scaled by.
2235 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2236
2237#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2238 /// Print the recipe.
2239 void print(raw_ostream &O, const Twine &Indent,
2240 VPSlotTracker &SlotTracker) const override;
2241#endif
2242
2243 const RecurrenceDescriptor &getRecurrenceDescriptor() const {
2244 return RdxDesc;
2245 }
2246
2247 /// Returns true, if the phi is part of an ordered reduction.
2248 bool isOrdered() const { return IsOrdered; }
2249
2250 /// Returns true, if the phi is part of an in-loop reduction.
2251 bool isInLoop() const { return IsInLoop; }
2252
2253 /// Returns true if the recipe only uses the first lane of operand \p Op.
2254 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2255 assert(is_contained(operands(), Op) &&
2256 "Op must be an operand of the recipe");
2257 return isOrdered() || isInLoop();
2258 }
2259};
2260
2261/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2262/// instructions.
2263class VPBlendRecipe : public VPSingleDefRecipe {
2264public:
2265 /// The blend operation is a User of the incoming values and of their
2266 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2267 /// be omitted (implied by passing an odd number of operands) in which case
2268 /// all other incoming values are merged into it.
2269 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
2270 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2271 assert(Operands.size() > 0 && "Expected at least one operand!");
2272 }
2273
2274 VPBlendRecipe *clone() override {
2275 SmallVector<VPValue *> Ops(operands());
2276 return new VPBlendRecipe(cast<PHINode>(Val: getUnderlyingValue()), Ops);
2277 }
2278
2279 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2280
2281 /// A normalized blend is one that has an odd number of operands, whereby the
2282 /// first operand does not have an associated mask.
2283 bool isNormalized() const { return getNumOperands() % 2; }
2284
2285 /// Return the number of incoming values, taking into account when normalized
2286 /// the first incoming value will have no mask.
2287 unsigned getNumIncomingValues() const {
2288 return (getNumOperands() + isNormalized()) / 2;
2289 }
2290
2291 /// Return incoming value number \p Idx.
2292 VPValue *getIncomingValue(unsigned Idx) const {
2293 return Idx == 0 ? getOperand(N: 0) : getOperand(N: Idx * 2 - isNormalized());
2294 }
2295
2296 /// Return mask number \p Idx.
2297 VPValue *getMask(unsigned Idx) const {
2298 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2299 return Idx == 0 ? getOperand(N: 1) : getOperand(N: Idx * 2 + !isNormalized());
2300 }
2301
2302 /// Generate the phi/select nodes.
2303 void execute(VPTransformState &State) override;
2304
2305 /// Return the cost of this VPWidenMemoryRecipe.
2306 InstructionCost computeCost(ElementCount VF,
2307 VPCostContext &Ctx) const override;
2308
2309#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2310 /// Print the recipe.
2311 void print(raw_ostream &O, const Twine &Indent,
2312 VPSlotTracker &SlotTracker) const override;
2313#endif
2314
2315 /// Returns true if the recipe only uses the first lane of operand \p Op.
2316 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2317 assert(is_contained(operands(), Op) &&
2318 "Op must be an operand of the recipe");
2319 // Recursing through Blend recipes only, must terminate at header phi's the
2320 // latest.
2321 return all_of(Range: users(),
2322 P: [this](VPUser *U) { return U->onlyFirstLaneUsed(Op: this); });
2323 }
2324};
2325
2326/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2327/// or stores into one wide load/store and shuffles. The first operand of a
2328/// VPInterleave recipe is the address, followed by the stored values, followed
2329/// by an optional mask.
2330class VPInterleaveRecipe : public VPRecipeBase {
2331 const InterleaveGroup<Instruction> *IG;
2332
2333 /// Indicates if the interleave group is in a conditional block and requires a
2334 /// mask.
2335 bool HasMask = false;
2336
2337 /// Indicates if gaps between members of the group need to be masked out or if
2338 /// unusued gaps can be loaded speculatively.
2339 bool NeedsMaskForGaps = false;
2340
2341public:
2342 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2343 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2344 bool NeedsMaskForGaps, DebugLoc DL)
2345 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr},
2346 DL),
2347
2348 IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) {
2349 // TODO: extend the masked interleaved-group support to reversed access.
2350 assert((!Mask || !IG->isReverse()) &&
2351 "Reversed masked interleave-group not supported.");
2352 for (unsigned i = 0; i < IG->getFactor(); ++i)
2353 if (Instruction *I = IG->getMember(Index: i)) {
2354 if (I->getType()->isVoidTy())
2355 continue;
2356 new VPValue(I, this);
2357 }
2358
2359 for (auto *SV : StoredValues)
2360 addOperand(Operand: SV);
2361 if (Mask) {
2362 HasMask = true;
2363 addOperand(Operand: Mask);
2364 }
2365 }
2366 ~VPInterleaveRecipe() override = default;
2367
2368 VPInterleaveRecipe *clone() override {
2369 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2370 NeedsMaskForGaps, getDebugLoc());
2371 }
2372
2373 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2374
2375 /// Return the address accessed by this recipe.
2376 VPValue *getAddr() const {
2377 return getOperand(N: 0); // Address is the 1st, mandatory operand.
2378 }
2379
2380 /// Return the mask used by this recipe. Note that a full mask is represented
2381 /// by a nullptr.
2382 VPValue *getMask() const {
2383 // Mask is optional and therefore the last, currently 2nd operand.
2384 return HasMask ? getOperand(N: getNumOperands() - 1) : nullptr;
2385 }
2386
2387 /// Return the VPValues stored by this interleave group. If it is a load
2388 /// interleave group, return an empty ArrayRef.
2389 ArrayRef<VPValue *> getStoredValues() const {
2390 // The first operand is the address, followed by the stored values, followed
2391 // by an optional mask.
2392 return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2393 .slice(N: 1, M: getNumStoreOperands());
2394 }
2395
2396 /// Generate the wide load or store, and shuffles.
2397 void execute(VPTransformState &State) override;
2398
2399 /// Return the cost of this VPInterleaveRecipe.
2400 InstructionCost computeCost(ElementCount VF,
2401 VPCostContext &Ctx) const override;
2402
2403#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2404 /// Print the recipe.
2405 void print(raw_ostream &O, const Twine &Indent,
2406 VPSlotTracker &SlotTracker) const override;
2407#endif
2408
2409 const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
2410
2411 /// Returns the number of stored operands of this interleave group. Returns 0
2412 /// for load interleave groups.
2413 unsigned getNumStoreOperands() const {
2414 return getNumOperands() - (HasMask ? 2 : 1);
2415 }
2416
2417 /// The recipe only uses the first lane of the address.
2418 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2419 assert(is_contained(operands(), Op) &&
2420 "Op must be an operand of the recipe");
2421 return Op == getAddr() && !llvm::is_contained(Range: getStoredValues(), Element: Op);
2422 }
2423
2424 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2425};
2426
2427/// A recipe to represent inloop reduction operations, performing a reduction on
2428/// a vector operand into a scalar value, and adding the result to a chain.
2429/// The Operands are {ChainOp, VecOp, [Condition]}.
2430class VPReductionRecipe : public VPRecipeWithIRFlags {
2431 /// The recurrence kind for the reduction in question.
2432 RecurKind RdxKind;
2433 bool IsOrdered;
2434 /// Whether the reduction is conditional.
2435 bool IsConditional = false;
2436
2437protected:
2438 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2439 FastMathFlags FMFs, Instruction *I,
2440 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2441 bool IsOrdered, DebugLoc DL)
2442 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2443 IsOrdered(IsOrdered) {
2444 if (CondOp) {
2445 IsConditional = true;
2446 addOperand(Operand: CondOp);
2447 }
2448 setUnderlyingValue(I);
2449 }
2450
2451public:
2452 VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
2453 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2454 bool IsOrdered, DebugLoc DL = {})
2455 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2456 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2457 IsOrdered, DL) {}
2458
2459 VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs,
2460 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2461 bool IsOrdered, DebugLoc DL = {})
2462 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2463 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2464 IsOrdered, DL) {}
2465
2466 ~VPReductionRecipe() override = default;
2467
2468 VPReductionRecipe *clone() override {
2469 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2470 getUnderlyingInstr(), getChainOp(), getVecOp(),
2471 getCondOp(), IsOrdered, getDebugLoc());
2472 }
2473
2474 static inline bool classof(const VPRecipeBase *R) {
2475 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2476 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2477 }
2478
2479 static inline bool classof(const VPUser *U) {
2480 auto *R = dyn_cast<VPRecipeBase>(Val: U);
2481 return R && classof(R);
2482 }
2483
2484 /// Generate the reduction in the loop.
2485 void execute(VPTransformState &State) override;
2486
2487 /// Return the cost of VPReductionRecipe.
2488 InstructionCost computeCost(ElementCount VF,
2489 VPCostContext &Ctx) const override;
2490
2491#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2492 /// Print the recipe.
2493 void print(raw_ostream &O, const Twine &Indent,
2494 VPSlotTracker &SlotTracker) const override;
2495#endif
2496
2497 /// Return the recurrence kind for the in-loop reduction.
2498 RecurKind getRecurrenceKind() const { return RdxKind; }
2499 /// Return true if the in-loop reduction is ordered.
2500 bool isOrdered() const { return IsOrdered; };
2501 /// Return true if the in-loop reduction is conditional.
2502 bool isConditional() const { return IsConditional; };
2503 /// The VPValue of the scalar Chain being accumulated.
2504 VPValue *getChainOp() const { return getOperand(N: 0); }
2505 /// The VPValue of the vector value to be reduced.
2506 VPValue *getVecOp() const { return getOperand(N: 1); }
2507 /// The VPValue of the condition for the block.
2508 VPValue *getCondOp() const {
2509 return isConditional() ? getOperand(N: getNumOperands() - 1) : nullptr;
2510 }
2511};
2512
2513/// A recipe for forming partial reductions. In the loop, an accumulator and
2514/// vector operand are added together and passed to the next iteration as the
2515/// next accumulator. After the loop body, the accumulator is reduced to a
2516/// scalar value.
2517class VPPartialReductionRecipe : public VPReductionRecipe {
2518 unsigned Opcode;
2519
2520 /// The divisor by which the VF of this recipe's output should be divided
2521 /// during execution.
2522 unsigned VFScaleFactor;
2523
2524public:
2525 VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0,
2526 VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
2527 : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
2528 VFScaleFactor, ReductionInst) {}
2529 VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
2530 VPValue *Cond, unsigned ScaleFactor,
2531 Instruction *ReductionInst = nullptr)
2532 : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
2533 FastMathFlags(), ReductionInst,
2534 ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
2535 Opcode(Opcode), VFScaleFactor(ScaleFactor) {
2536 [[maybe_unused]] auto *AccumulatorRecipe =
2537 getChainOp()->getDefiningRecipe();
2538 assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2539 isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2540 "Unexpected operand order for partial reduction recipe");
2541 }
2542 ~VPPartialReductionRecipe() override = default;
2543
2544 VPPartialReductionRecipe *clone() override {
2545 return new VPPartialReductionRecipe(Opcode, getOperand(N: 0), getOperand(N: 1),
2546 getCondOp(), VFScaleFactor,
2547 getUnderlyingInstr());
2548 }
2549
2550 VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
2551
2552 /// Generate the reduction in the loop.
2553 void execute(VPTransformState &State) override;
2554
2555 /// Return the cost of this VPPartialReductionRecipe.
2556 InstructionCost computeCost(ElementCount VF,
2557 VPCostContext &Ctx) const override;
2558
2559 /// Get the binary op's opcode.
2560 unsigned getOpcode() const { return Opcode; }
2561
2562 /// Get the factor that the VF of this recipe's output should be scaled by.
2563 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2564
2565#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2566 /// Print the recipe.
2567 void print(raw_ostream &O, const Twine &Indent,
2568 VPSlotTracker &SlotTracker) const override;
2569#endif
2570};
2571
2572/// A recipe to represent inloop reduction operations with vector-predication
2573/// intrinsics, performing a reduction on a vector operand with the explicit
2574/// vector length (EVL) into a scalar value, and adding the result to a chain.
2575/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2576class VPReductionEVLRecipe : public VPReductionRecipe {
2577public:
2578 VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp,
2579 DebugLoc DL = {})
2580 : VPReductionRecipe(
2581 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2582 R.getFastMathFlags(),
2583 cast_or_null<Instruction>(Val: R.getUnderlyingValue()),
2584 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2585 R.isOrdered(), DL) {}
2586
2587 ~VPReductionEVLRecipe() override = default;
2588
2589 VPReductionEVLRecipe *clone() override {
2590 llvm_unreachable("cloning not implemented yet");
2591 }
2592
2593 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2594
2595 /// Generate the reduction in the loop
2596 void execute(VPTransformState &State) override;
2597
2598#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2599 /// Print the recipe.
2600 void print(raw_ostream &O, const Twine &Indent,
2601 VPSlotTracker &SlotTracker) const override;
2602#endif
2603
2604 /// The VPValue of the explicit vector length.
2605 VPValue *getEVL() const { return getOperand(N: 2); }
2606
2607 /// Returns true if the recipe only uses the first lane of operand \p Op.
2608 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2609 assert(is_contained(operands(), Op) &&
2610 "Op must be an operand of the recipe");
2611 return Op == getEVL();
2612 }
2613};
2614
2615/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2616/// copies of the original scalar type, one per lane, instead of producing a
2617/// single copy of widened type for all lanes. If the instruction is known to be
2618/// a single scalar, only one copy, per lane zero, will be generated.
2619class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
2620 /// Indicator if only a single replica per lane is needed.
2621 bool IsSingleScalar;
2622
2623 /// Indicator if the replicas are also predicated.
2624 bool IsPredicated;
2625
2626public:
2627 VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands,
2628 bool IsSingleScalar, VPValue *Mask = nullptr,
2629 VPIRMetadata Metadata = {})
2630 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2631 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2632 IsPredicated(Mask) {
2633 if (Mask)
2634 addOperand(Operand: Mask);
2635 }
2636
2637 ~VPReplicateRecipe() override = default;
2638
2639 VPReplicateRecipe *clone() override {
2640 auto *Copy =
2641 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsSingleScalar,
2642 isPredicated() ? getMask() : nullptr, *this);
2643 Copy->transferFlags(Other&: *this);
2644 return Copy;
2645 }
2646
2647 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2648
2649 /// Generate replicas of the desired Ingredient. Replicas will be generated
2650 /// for all parts and lanes unless a specific part and lane are specified in
2651 /// the \p State.
2652 void execute(VPTransformState &State) override;
2653
2654 /// Return the cost of this VPReplicateRecipe.
2655 InstructionCost computeCost(ElementCount VF,
2656 VPCostContext &Ctx) const override;
2657
2658#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2659 /// Print the recipe.
2660 void print(raw_ostream &O, const Twine &Indent,
2661 VPSlotTracker &SlotTracker) const override;
2662#endif
2663
2664 bool isSingleScalar() const { return IsSingleScalar; }
2665
2666 bool isPredicated() const { return IsPredicated; }
2667
2668 /// Returns true if the recipe only uses the first lane of operand \p Op.
2669 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2670 assert(is_contained(operands(), Op) &&
2671 "Op must be an operand of the recipe");
2672 return isSingleScalar();
2673 }
2674
2675 /// Returns true if the recipe uses scalars of operand \p Op.
2676 bool usesScalars(const VPValue *Op) const override {
2677 assert(is_contained(operands(), Op) &&
2678 "Op must be an operand of the recipe");
2679 return true;
2680 }
2681
2682 /// Returns true if the recipe is used by a widened recipe via an intervening
2683 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2684 /// in a vector.
2685 bool shouldPack() const;
2686
2687 /// Return the mask of a predicated VPReplicateRecipe.
2688 VPValue *getMask() {
2689 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2690 return getOperand(N: getNumOperands() - 1);
2691 }
2692
2693 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2694};
2695
2696/// A recipe for generating conditional branches on the bits of a mask.
2697class VPBranchOnMaskRecipe : public VPRecipeBase {
2698public:
2699 VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
2700 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
2701
2702 VPBranchOnMaskRecipe *clone() override {
2703 return new VPBranchOnMaskRecipe(getOperand(N: 0), getDebugLoc());
2704 }
2705
2706 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2707
2708 /// Generate the extraction of the appropriate bit from the block mask and the
2709 /// conditional branch.
2710 void execute(VPTransformState &State) override;
2711
2712 /// Return the cost of this VPBranchOnMaskRecipe.
2713 InstructionCost computeCost(ElementCount VF,
2714 VPCostContext &Ctx) const override;
2715
2716#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2717 /// Print the recipe.
2718 void print(raw_ostream &O, const Twine &Indent,
2719 VPSlotTracker &SlotTracker) const override {
2720 O << Indent << "BRANCH-ON-MASK ";
2721 printOperands(O, SlotTracker);
2722 }
2723#endif
2724
2725 /// Returns true if the recipe uses scalars of operand \p Op.
2726 bool usesScalars(const VPValue *Op) const override {
2727 assert(is_contained(operands(), Op) &&
2728 "Op must be an operand of the recipe");
2729 return true;
2730 }
2731};
2732
2733/// A recipe to combine multiple recipes into a single 'expression' recipe,
2734/// which should be considered a single entity for cost-modeling and transforms.
2735/// The recipe needs to be 'decomposed', i.e. replaced by its individual
2736/// expression recipes, before execute. The individual expression recipes are
2737/// completely disconnected from the def-use graph of other recipes not part of
2738/// the expression. Def-use edges between pairs of expression recipes remain
2739/// intact, whereas every edge between an expression recipe and a recipe outside
2740/// the expression is elevated to connect the non-expression recipe with the
2741/// VPExpressionRecipe itself.
2742class VPExpressionRecipe : public VPSingleDefRecipe {
2743 /// Recipes included in this VPExpressionRecipe.
2744 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
2745
2746 /// Temporary VPValues used for external operands of the expression, i.e.
2747 /// operands not defined by recipes in the expression.
2748 SmallVector<VPValue *> LiveInPlaceholders;
2749
2750 enum class ExpressionTypes {
2751 /// Represents an inloop extended reduction operation, performing a
2752 /// reduction on an extended vector operand into a scalar value, and adding
2753 /// the result to a chain.
2754 ExtendedReduction,
2755 /// Represent an inloop multiply-accumulate reduction, multiplying the
2756 /// extended vector operands, performing a reduction.add on the result, and
2757 /// adding the scalar result to a chain.
2758 ExtMulAccReduction,
2759 /// Represent an inloop multiply-accumulate reduction, multiplying the
2760 /// vector operands, performing a reduction.add on the result, and adding
2761 /// the scalar result to a chain.
2762 MulAccReduction,
2763 };
2764
2765 /// Type of the expression.
2766 ExpressionTypes ExpressionType;
2767
2768 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
2769 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
2770 /// in the expression) are replaced by temporary VPValues and the original
2771 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
2772 /// as needed (excluding last) to ensure they are only used by other recipes
2773 /// in the expression.
2774 VPExpressionRecipe(ExpressionTypes ExpressionType,
2775 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
2776
2777public:
2778 VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
2779 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
2780 VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
2781 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
2782 VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
2783 VPWidenRecipe *Mul, VPReductionRecipe *Red)
2784 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
2785 {Ext0, Ext1, Mul, Red}) {}
2786
2787 ~VPExpressionRecipe() override {
2788 for (auto *R : reverse(C&: ExpressionRecipes))
2789 delete R;
2790 for (VPValue *T : LiveInPlaceholders)
2791 delete T;
2792 }
2793
2794 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
2795
2796 VPExpressionRecipe *clone() override {
2797 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
2798 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
2799 for (auto *R : ExpressionRecipes)
2800 NewExpressiondRecipes.push_back(Elt: R->clone());
2801 for (auto *New : NewExpressiondRecipes) {
2802 for (const auto &[Idx, Old] : enumerate(First&: ExpressionRecipes))
2803 New->replaceUsesOfWith(From: Old, To: NewExpressiondRecipes[Idx]);
2804 // Update placeholder operands in the cloned recipe to use the external
2805 // operands, to be internalized when the cloned expression is constructed.
2806 for (const auto &[Placeholder, OutsideOp] :
2807 zip(t&: LiveInPlaceholders, u: operands()))
2808 New->replaceUsesOfWith(From: Placeholder, To: OutsideOp);
2809 }
2810 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
2811 }
2812
2813 /// Return the VPValue to use to infer the result type of the recipe.
2814 VPValue *getOperandOfResultType() const {
2815 unsigned OpIdx =
2816 cast<VPReductionRecipe>(Val: ExpressionRecipes.back())->isConditional() ? 2
2817 : 1;
2818 return getOperand(N: getNumOperands() - OpIdx);
2819 }
2820
2821 /// Insert the recipes of the expression back into the VPlan, directly before
2822 /// the current recipe. Leaves the expression recipe empty, which must be
2823 /// removed before codegen.
2824 void decompose();
2825
2826 /// Method for generating code, must not be called as this recipe is abstract.
2827 void execute(VPTransformState &State) override {
2828 llvm_unreachable("recipe must be removed before execute");
2829 }
2830
2831 InstructionCost computeCost(ElementCount VF,
2832 VPCostContext &Ctx) const override;
2833
2834#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2835 /// Print the recipe.
2836 void print(raw_ostream &O, const Twine &Indent,
2837 VPSlotTracker &SlotTracker) const override;
2838#endif
2839
2840 /// Returns true if this expression contains recipes that may read from or
2841 /// write to memory.
2842 bool mayReadOrWriteMemory() const;
2843
2844 /// Returns true if this expression contains recipes that may have side
2845 /// effects.
2846 bool mayHaveSideEffects() const;
2847};
2848
2849/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2850/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2851/// order to merge values that are set under such a branch and feed their uses.
2852/// The phi nodes can be scalar or vector depending on the users of the value.
2853/// This recipe works in concert with VPBranchOnMaskRecipe.
2854class VPPredInstPHIRecipe : public VPSingleDefRecipe {
2855public:
2856 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2857 /// nodes after merging back from a Branch-on-Mask.
2858 VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
2859 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
2860 ~VPPredInstPHIRecipe() override = default;
2861
2862 VPPredInstPHIRecipe *clone() override {
2863 return new VPPredInstPHIRecipe(getOperand(N: 0), getDebugLoc());
2864 }
2865
2866 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2867
2868 /// Generates phi nodes for live-outs (from a replicate region) as needed to
2869 /// retain SSA form.
2870 void execute(VPTransformState &State) override;
2871
2872 /// Return the cost of this VPPredInstPHIRecipe.
2873 InstructionCost computeCost(ElementCount VF,
2874 VPCostContext &Ctx) const override {
2875 // TODO: Compute accurate cost after retiring the legacy cost model.
2876 return 0;
2877 }
2878
2879#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2880 /// Print the recipe.
2881 void print(raw_ostream &O, const Twine &Indent,
2882 VPSlotTracker &SlotTracker) const override;
2883#endif
2884
2885 /// Returns true if the recipe uses scalars of operand \p Op.
2886 bool usesScalars(const VPValue *Op) const override {
2887 assert(is_contained(operands(), Op) &&
2888 "Op must be an operand of the recipe");
2889 return true;
2890 }
2891};
2892
2893/// A common base class for widening memory operations. An optional mask can be
2894/// provided as the last operand.
2895class VPWidenMemoryRecipe : public VPRecipeBase, public VPIRMetadata {
2896protected:
2897 Instruction &Ingredient;
2898
2899 /// Whether the accessed addresses are consecutive.
2900 bool Consecutive;
2901
2902 /// Whether the consecutive accessed addresses are in reverse order.
2903 bool Reverse;
2904
2905 /// Whether the memory access is masked.
2906 bool IsMasked = false;
2907
2908 void setMask(VPValue *Mask) {
2909 assert(!IsMasked && "cannot re-set mask");
2910 if (!Mask)
2911 return;
2912 addOperand(Operand: Mask);
2913 IsMasked = true;
2914 }
2915
2916 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2917 std::initializer_list<VPValue *> Operands,
2918 bool Consecutive, bool Reverse,
2919 const VPIRMetadata &Metadata, DebugLoc DL)
2920 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
2921 Consecutive(Consecutive), Reverse(Reverse) {
2922 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2923 }
2924
2925public:
2926 VPWidenMemoryRecipe *clone() override {
2927 llvm_unreachable("cloning not supported");
2928 }
2929
2930 static inline bool classof(const VPRecipeBase *R) {
2931 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2932 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2933 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2934 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2935 }
2936
2937 static inline bool classof(const VPUser *U) {
2938 auto *R = dyn_cast<VPRecipeBase>(Val: U);
2939 return R && classof(R);
2940 }
2941
2942 /// Return whether the loaded-from / stored-to addresses are consecutive.
2943 bool isConsecutive() const { return Consecutive; }
2944
2945 /// Return whether the consecutive loaded/stored addresses are in reverse
2946 /// order.
2947 bool isReverse() const { return Reverse; }
2948
2949 /// Return the address accessed by this recipe.
2950 VPValue *getAddr() const { return getOperand(N: 0); }
2951
2952 /// Returns true if the recipe is masked.
2953 bool isMasked() const { return IsMasked; }
2954
2955 /// Return the mask used by this recipe. Note that a full mask is represented
2956 /// by a nullptr.
2957 VPValue *getMask() const {
2958 // Mask is optional and therefore the last operand.
2959 return isMasked() ? getOperand(N: getNumOperands() - 1) : nullptr;
2960 }
2961
2962 /// Generate the wide load/store.
2963 void execute(VPTransformState &State) override {
2964 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2965 }
2966
2967 /// Return the cost of this VPWidenMemoryRecipe.
2968 InstructionCost computeCost(ElementCount VF,
2969 VPCostContext &Ctx) const override;
2970
2971 Instruction &getIngredient() const { return Ingredient; }
2972};
2973
2974/// A recipe for widening load operations, using the address to load from and an
2975/// optional mask.
2976struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
2977 VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2978 bool Consecutive, bool Reverse,
2979 const VPIRMetadata &Metadata, DebugLoc DL)
2980 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2981 Reverse, Metadata, DL),
2982 VPValue(this, &Load) {
2983 setMask(Mask);
2984 }
2985
2986 VPWidenLoadRecipe *clone() override {
2987 return new VPWidenLoadRecipe(cast<LoadInst>(Val&: Ingredient), getAddr(),
2988 getMask(), Consecutive, Reverse, *this,
2989 getDebugLoc());
2990 }
2991
2992 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
2993
2994 /// Generate a wide load or gather.
2995 void execute(VPTransformState &State) override;
2996
2997#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2998 /// Print the recipe.
2999 void print(raw_ostream &O, const Twine &Indent,
3000 VPSlotTracker &SlotTracker) const override;
3001#endif
3002
3003 /// Returns true if the recipe only uses the first lane of operand \p Op.
3004 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3005 assert(is_contained(operands(), Op) &&
3006 "Op must be an operand of the recipe");
3007 // Widened, consecutive loads operations only demand the first lane of
3008 // their address.
3009 return Op == getAddr() && isConsecutive();
3010 }
3011};
3012
3013/// A recipe for widening load operations with vector-predication intrinsics,
3014/// using the address to load from, the explicit vector length and an optional
3015/// mask.
3016struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3017 VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
3018 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3019 {L.getAddr(), &EVL}, L.isConsecutive(),
3020 L.isReverse(), L, L.getDebugLoc()),
3021 VPValue(this, &getIngredient()) {
3022 setMask(Mask);
3023 }
3024
3025 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3026
3027 /// Return the EVL operand.
3028 VPValue *getEVL() const { return getOperand(N: 1); }
3029
3030 /// Generate the wide load or gather.
3031 void execute(VPTransformState &State) override;
3032
3033 /// Return the cost of this VPWidenLoadEVLRecipe.
3034 InstructionCost computeCost(ElementCount VF,
3035 VPCostContext &Ctx) const override;
3036
3037#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3038 /// Print the recipe.
3039 void print(raw_ostream &O, const Twine &Indent,
3040 VPSlotTracker &SlotTracker) const override;
3041#endif
3042
3043 /// Returns true if the recipe only uses the first lane of operand \p Op.
3044 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3045 assert(is_contained(operands(), Op) &&
3046 "Op must be an operand of the recipe");
3047 // Widened loads only demand the first lane of EVL and consecutive loads
3048 // only demand the first lane of their address.
3049 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3050 }
3051};
3052
3053/// A recipe for widening store operations, using the stored value, the address
3054/// to store to and an optional mask.
3055struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
3056 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3057 VPValue *Mask, bool Consecutive, bool Reverse,
3058 const VPIRMetadata &Metadata, DebugLoc DL)
3059 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3060 Consecutive, Reverse, Metadata, DL) {
3061 setMask(Mask);
3062 }
3063
3064 VPWidenStoreRecipe *clone() override {
3065 return new VPWidenStoreRecipe(cast<StoreInst>(Val&: Ingredient), getAddr(),
3066 getStoredValue(), getMask(), Consecutive,
3067 Reverse, *this, getDebugLoc());
3068 }
3069
3070 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3071
3072 /// Return the value stored by this recipe.
3073 VPValue *getStoredValue() const { return getOperand(N: 1); }
3074
3075 /// Generate a wide store or scatter.
3076 void execute(VPTransformState &State) override;
3077
3078#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3079 /// Print the recipe.
3080 void print(raw_ostream &O, const Twine &Indent,
3081 VPSlotTracker &SlotTracker) const override;
3082#endif
3083
3084 /// Returns true if the recipe only uses the first lane of operand \p Op.
3085 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3086 assert(is_contained(operands(), Op) &&
3087 "Op must be an operand of the recipe");
3088 // Widened, consecutive stores only demand the first lane of their address,
3089 // unless the same operand is also stored.
3090 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3091 }
3092};
3093
3094/// A recipe for widening store operations with vector-predication intrinsics,
3095/// using the value to store, the address to store to, the explicit vector
3096/// length and an optional mask.
3097struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
3098 VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
3099 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3100 {S.getAddr(), S.getStoredValue(), &EVL},
3101 S.isConsecutive(), S.isReverse(), S,
3102 S.getDebugLoc()) {
3103 setMask(Mask);
3104 }
3105
3106 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3107
3108 /// Return the address accessed by this recipe.
3109 VPValue *getStoredValue() const { return getOperand(N: 1); }
3110
3111 /// Return the EVL operand.
3112 VPValue *getEVL() const { return getOperand(N: 2); }
3113
3114 /// Generate the wide store or scatter.
3115 void execute(VPTransformState &State) override;
3116
3117 /// Return the cost of this VPWidenStoreEVLRecipe.
3118 InstructionCost computeCost(ElementCount VF,
3119 VPCostContext &Ctx) const override;
3120
3121#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3122 /// Print the recipe.
3123 void print(raw_ostream &O, const Twine &Indent,
3124 VPSlotTracker &SlotTracker) const override;
3125#endif
3126
3127 /// Returns true if the recipe only uses the first lane of operand \p Op.
3128 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3129 assert(is_contained(operands(), Op) &&
3130 "Op must be an operand of the recipe");
3131 if (Op == getEVL()) {
3132 assert(getStoredValue() != Op && "unexpected store of EVL");
3133 return true;
3134 }
3135 // Widened, consecutive memory operations only demand the first lane of
3136 // their address, unless the same operand is also stored. That latter can
3137 // happen with opaque pointers.
3138 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3139 }
3140};
3141
3142/// Recipe to expand a SCEV expression.
3143class VPExpandSCEVRecipe : public VPSingleDefRecipe {
3144 const SCEV *Expr;
3145 ScalarEvolution &SE;
3146
3147public:
3148 VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
3149 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
3150
3151 ~VPExpandSCEVRecipe() override = default;
3152
3153 VPExpandSCEVRecipe *clone() override {
3154 return new VPExpandSCEVRecipe(Expr, SE);
3155 }
3156
3157 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3158
3159 /// Generate a canonical vector induction variable of the vector loop, with
3160 void execute(VPTransformState &State) override;
3161
3162 /// Return the cost of this VPExpandSCEVRecipe.
3163 InstructionCost computeCost(ElementCount VF,
3164 VPCostContext &Ctx) const override {
3165 // TODO: Compute accurate cost after retiring the legacy cost model.
3166 return 0;
3167 }
3168
3169#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3170 /// Print the recipe.
3171 void print(raw_ostream &O, const Twine &Indent,
3172 VPSlotTracker &SlotTracker) const override;
3173#endif
3174
3175 const SCEV *getSCEV() const { return Expr; }
3176};
3177
3178/// Canonical scalar induction phi of the vector loop. Starting at the specified
3179/// start value (either 0 or the resume value when vectorizing the epilogue
3180/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3181/// canonical induction variable.
3182class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
3183public:
3184 VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
3185 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3186
3187 ~VPCanonicalIVPHIRecipe() override = default;
3188
3189 VPCanonicalIVPHIRecipe *clone() override {
3190 auto *R = new VPCanonicalIVPHIRecipe(getOperand(N: 0), getDebugLoc());
3191 R->addOperand(Operand: getBackedgeValue());
3192 return R;
3193 }
3194
3195 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3196
3197 void execute(VPTransformState &State) override {
3198 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3199 "scalar phi recipe");
3200 }
3201
3202#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3203 /// Print the recipe.
3204 void print(raw_ostream &O, const Twine &Indent,
3205 VPSlotTracker &SlotTracker) const override;
3206#endif
3207
3208 /// Returns the scalar type of the induction.
3209 Type *getScalarType() const {
3210 return getStartValue()->getLiveInIRValue()->getType();
3211 }
3212
3213 /// Returns true if the recipe only uses the first lane of operand \p Op.
3214 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3215 assert(is_contained(operands(), Op) &&
3216 "Op must be an operand of the recipe");
3217 return true;
3218 }
3219
3220 /// Returns true if the recipe only uses the first part of operand \p Op.
3221 bool onlyFirstPartUsed(const VPValue *Op) const override {
3222 assert(is_contained(operands(), Op) &&
3223 "Op must be an operand of the recipe");
3224 return true;
3225 }
3226
3227 /// Return the cost of this VPCanonicalIVPHIRecipe.
3228 InstructionCost computeCost(ElementCount VF,
3229 VPCostContext &Ctx) const override {
3230 // For now, match the behavior of the legacy cost model.
3231 return 0;
3232 }
3233};
3234
3235/// A recipe for generating the active lane mask for the vector loop that is
3236/// used to predicate the vector operations.
3237/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3238/// remove VPActiveLaneMaskPHIRecipe.
3239class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
3240public:
3241 VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
3242 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3243 DL) {}
3244
3245 ~VPActiveLaneMaskPHIRecipe() override = default;
3246
3247 VPActiveLaneMaskPHIRecipe *clone() override {
3248 auto *R = new VPActiveLaneMaskPHIRecipe(getOperand(N: 0), getDebugLoc());
3249 if (getNumOperands() == 2)
3250 R->addOperand(Operand: getOperand(N: 1));
3251 return R;
3252 }
3253
3254 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3255
3256 /// Generate the active lane mask phi of the vector loop.
3257 void execute(VPTransformState &State) override;
3258
3259#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3260 /// Print the recipe.
3261 void print(raw_ostream &O, const Twine &Indent,
3262 VPSlotTracker &SlotTracker) const override;
3263#endif
3264};
3265
3266/// A recipe for generating the phi node for the current index of elements,
3267/// adjusted in accordance with EVL value. It starts at the start value of the
3268/// canonical induction and gets incremented by EVL in each iteration of the
3269/// vector loop.
3270class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
3271public:
3272 VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
3273 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3274
3275 ~VPEVLBasedIVPHIRecipe() override = default;
3276
3277 VPEVLBasedIVPHIRecipe *clone() override {
3278 llvm_unreachable("cloning not implemented yet");
3279 }
3280
3281 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3282
3283 void execute(VPTransformState &State) override {
3284 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3285 "scalar phi recipe");
3286 }
3287
3288 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3289 InstructionCost computeCost(ElementCount VF,
3290 VPCostContext &Ctx) const override {
3291 // For now, match the behavior of the legacy cost model.
3292 return 0;
3293 }
3294
3295 /// Returns true if the recipe only uses the first lane of operand \p Op.
3296 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3297 assert(is_contained(operands(), Op) &&
3298 "Op must be an operand of the recipe");
3299 return true;
3300 }
3301
3302#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3303 /// Print the recipe.
3304 void print(raw_ostream &O, const Twine &Indent,
3305 VPSlotTracker &SlotTracker) const override;
3306#endif
3307};
3308
3309/// A Recipe for widening the canonical induction variable of the vector loop.
3310class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe,
3311 public VPUnrollPartAccessor<1> {
3312public:
3313 VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
3314 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3315
3316 ~VPWidenCanonicalIVRecipe() override = default;
3317
3318 VPWidenCanonicalIVRecipe *clone() override {
3319 return new VPWidenCanonicalIVRecipe(
3320 cast<VPCanonicalIVPHIRecipe>(Val: getOperand(N: 0)));
3321 }
3322
3323 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3324
3325 /// Generate a canonical vector induction variable of the vector loop, with
3326 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3327 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3328 void execute(VPTransformState &State) override;
3329
3330 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3331 InstructionCost computeCost(ElementCount VF,
3332 VPCostContext &Ctx) const override {
3333 // TODO: Compute accurate cost after retiring the legacy cost model.
3334 return 0;
3335 }
3336
3337#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3338 /// Print the recipe.
3339 void print(raw_ostream &O, const Twine &Indent,
3340 VPSlotTracker &SlotTracker) const override;
3341#endif
3342};
3343
3344/// A recipe for converting the input value \p IV value to the corresponding
3345/// value of an IV with different start and step values, using Start + IV *
3346/// Step.
3347class VPDerivedIVRecipe : public VPSingleDefRecipe {
3348 /// Kind of the induction.
3349 const InductionDescriptor::InductionKind Kind;
3350 /// If not nullptr, the floating point induction binary operator. Must be set
3351 /// for floating point inductions.
3352 const FPMathOperator *FPBinOp;
3353
3354 /// Name to use for the generated IR instruction for the derived IV.
3355 std::string Name;
3356
3357public:
3358 VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
3359 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3360 const Twine &Name = "")
3361 : VPDerivedIVRecipe(
3362 IndDesc.getKind(),
3363 dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp()),
3364 Start, CanonicalIV, Step, Name) {}
3365
3366 VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
3367 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3368 VPValue *Step, const Twine &Name = "")
3369 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3370 FPBinOp(FPBinOp), Name(Name.str()) {}
3371
3372 ~VPDerivedIVRecipe() override = default;
3373
3374 VPDerivedIVRecipe *clone() override {
3375 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(N: 1),
3376 getStepValue());
3377 }
3378
3379 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3380
3381 /// Generate the transformed value of the induction at offset StartValue (1.
3382 /// operand) + IV (2. operand) * StepValue (3, operand).
3383 void execute(VPTransformState &State) override;
3384
3385 /// Return the cost of this VPDerivedIVRecipe.
3386 InstructionCost computeCost(ElementCount VF,
3387 VPCostContext &Ctx) const override {
3388 // TODO: Compute accurate cost after retiring the legacy cost model.
3389 return 0;
3390 }
3391
3392#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3393 /// Print the recipe.
3394 void print(raw_ostream &O, const Twine &Indent,
3395 VPSlotTracker &SlotTracker) const override;
3396#endif
3397
3398 Type *getScalarType() const {
3399 return getStartValue()->getLiveInIRValue()->getType();
3400 }
3401
3402 VPValue *getStartValue() const { return getOperand(N: 0); }
3403 VPValue *getStepValue() const { return getOperand(N: 2); }
3404
3405 /// Returns true if the recipe only uses the first lane of operand \p Op.
3406 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3407 assert(is_contained(operands(), Op) &&
3408 "Op must be an operand of the recipe");
3409 return true;
3410 }
3411};
3412
3413/// A recipe for handling phi nodes of integer and floating-point inductions,
3414/// producing their scalar values.
3415class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
3416 public VPUnrollPartAccessor<3> {
3417 Instruction::BinaryOps InductionOpcode;
3418
3419public:
3420 VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF,
3421 Instruction::BinaryOps Opcode, FastMathFlags FMFs,
3422 DebugLoc DL)
3423 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3424 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3425 InductionOpcode(Opcode) {}
3426
3427 VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
3428 VPValue *Step, VPValue *VF, DebugLoc DL = {})
3429 : VPScalarIVStepsRecipe(
3430 IV, Step, VF, IndDesc.getInductionOpcode(),
3431 dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp())
3432 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3433 : FastMathFlags(),
3434 DL) {}
3435
3436 ~VPScalarIVStepsRecipe() override = default;
3437
3438 VPScalarIVStepsRecipe *clone() override {
3439 return new VPScalarIVStepsRecipe(
3440 getOperand(N: 0), getOperand(N: 1), getOperand(N: 2), InductionOpcode,
3441 hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(),
3442 getDebugLoc());
3443 }
3444
3445 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3446 /// this is only accurate after the VPlan has been unrolled.
3447 bool isPart0() { return getUnrollPart(U&: *this) == 0; }
3448
3449 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3450
3451 /// Generate the scalarized versions of the phi node as needed by their users.
3452 void execute(VPTransformState &State) override;
3453
3454 /// Return the cost of this VPScalarIVStepsRecipe.
3455 InstructionCost computeCost(ElementCount VF,
3456 VPCostContext &Ctx) const override {
3457 // TODO: Compute accurate cost after retiring the legacy cost model.
3458 return 0;
3459 }
3460
3461#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3462 /// Print the recipe.
3463 void print(raw_ostream &O, const Twine &Indent,
3464 VPSlotTracker &SlotTracker) const override;
3465#endif
3466
3467 VPValue *getStepValue() const { return getOperand(N: 1); }
3468
3469 /// Returns true if the recipe only uses the first lane of operand \p Op.
3470 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3471 assert(is_contained(operands(), Op) &&
3472 "Op must be an operand of the recipe");
3473 return true;
3474 }
3475};
3476
3477/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3478/// types implementing VPPhiAccessors. Used by isa<> & co.
3479template <> struct CastIsPossible<VPPhiAccessors, const VPRecipeBase *> {
3480 static inline bool isPossible(const VPRecipeBase *f) {
3481 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3482 return isa<VPIRPhi, VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPhi>(Val: f);
3483 }
3484};
3485/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3486/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3487template <typename SrcTy>
3488struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3489
3490 using Self = CastInfo<VPPhiAccessors, SrcTy>;
3491
3492 /// doCast is used by cast<>.
3493 static inline VPPhiAccessors *doCast(SrcTy R) {
3494 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3495 switch (R->getVPDefID()) {
3496 case VPDef::VPInstructionSC:
3497 return cast<VPPhi>(R);
3498 case VPDef::VPIRInstructionSC:
3499 return cast<VPIRPhi>(R);
3500 case VPDef::VPWidenPHISC:
3501 return cast<VPWidenPHIRecipe>(R);
3502 default:
3503 return cast<VPHeaderPHIRecipe>(R);
3504 }
3505 }());
3506 }
3507
3508 /// doCastIfPossible is used by dyn_cast<>.
3509 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3510 if (!Self::isPossible(f))
3511 return nullptr;
3512 return doCast(R: f);
3513 }
3514};
3515template <>
3516struct CastInfo<VPPhiAccessors, VPRecipeBase *>
3517 : CastInfoVPPhiAccessors<VPRecipeBase *> {};
3518template <>
3519struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
3520 : CastInfoVPPhiAccessors<const VPRecipeBase *> {};
3521
3522/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3523/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3524/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3525class VPBasicBlock : public VPBlockBase {
3526 friend class VPlan;
3527
3528 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3529 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3530 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3531 if (Recipe)
3532 appendRecipe(Recipe);
3533 }
3534
3535public:
3536 using RecipeListTy = iplist<VPRecipeBase>;
3537
3538protected:
3539 /// The VPRecipes held in the order of output instructions to generate.
3540 RecipeListTy Recipes;
3541
3542 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3543 : VPBlockBase(BlockSC, Name.str()) {}
3544
3545public:
3546 ~VPBasicBlock() override {
3547 while (!Recipes.empty())
3548 Recipes.pop_back();
3549 }
3550
3551 /// Instruction iterators...
3552 using iterator = RecipeListTy::iterator;
3553 using const_iterator = RecipeListTy::const_iterator;
3554 using reverse_iterator = RecipeListTy::reverse_iterator;
3555 using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
3556
3557 //===--------------------------------------------------------------------===//
3558 /// Recipe iterator methods
3559 ///
3560 inline iterator begin() { return Recipes.begin(); }
3561 inline const_iterator begin() const { return Recipes.begin(); }
3562 inline iterator end() { return Recipes.end(); }
3563 inline const_iterator end() const { return Recipes.end(); }
3564
3565 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3566 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3567 inline reverse_iterator rend() { return Recipes.rend(); }
3568 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3569
3570 inline size_t size() const { return Recipes.size(); }
3571 inline bool empty() const { return Recipes.empty(); }
3572 inline const VPRecipeBase &front() const { return Recipes.front(); }
3573 inline VPRecipeBase &front() { return Recipes.front(); }
3574 inline const VPRecipeBase &back() const { return Recipes.back(); }
3575 inline VPRecipeBase &back() { return Recipes.back(); }
3576
3577 /// Returns a reference to the list of recipes.
3578 RecipeListTy &getRecipeList() { return Recipes; }
3579
3580 /// Returns a pointer to a member of the recipe list.
3581 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
3582 return &VPBasicBlock::Recipes;
3583 }
3584
3585 /// Method to support type inquiry through isa, cast, and dyn_cast.
3586 static inline bool classof(const VPBlockBase *V) {
3587 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3588 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3589 }
3590
3591 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3592 assert(Recipe && "No recipe to append.");
3593 assert(!Recipe->Parent && "Recipe already in VPlan");
3594 Recipe->Parent = this;
3595 Recipes.insert(where: InsertPt, New: Recipe);
3596 }
3597
3598 /// Augment the existing recipes of a VPBasicBlock with an additional
3599 /// \p Recipe as the last recipe.
3600 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, InsertPt: end()); }
3601
3602 /// The method which generates the output IR instructions that correspond to
3603 /// this VPBasicBlock, thereby "executing" the VPlan.
3604 void execute(VPTransformState *State) override;
3605
3606 /// Return the cost of this VPBasicBlock.
3607 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
3608
3609 /// Return the position of the first non-phi node recipe in the block.
3610 iterator getFirstNonPhi();
3611
3612 /// Returns an iterator range over the PHI-like recipes in the block.
3613 iterator_range<iterator> phis() {
3614 return make_range(x: begin(), y: getFirstNonPhi());
3615 }
3616
3617 /// Split current block at \p SplitAt by inserting a new block between the
3618 /// current block and its successors and moving all recipes starting at
3619 /// SplitAt to the new block. Returns the new block.
3620 VPBasicBlock *splitAt(iterator SplitAt);
3621
3622 VPRegionBlock *getEnclosingLoopRegion();
3623 const VPRegionBlock *getEnclosingLoopRegion() const;
3624
3625#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3626 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3627 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3628 ///
3629 /// Note that the numbering is applied to the whole VPlan, so printing
3630 /// individual blocks is consistent with the whole VPlan printing.
3631 void print(raw_ostream &O, const Twine &Indent,
3632 VPSlotTracker &SlotTracker) const override;
3633 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3634#endif
3635
3636 /// If the block has multiple successors, return the branch recipe terminating
3637 /// the block. If there are no or only a single successor, return nullptr;
3638 VPRecipeBase *getTerminator();
3639 const VPRecipeBase *getTerminator() const;
3640
3641 /// Returns true if the block is exiting it's parent region.
3642 bool isExiting() const;
3643
3644 /// Clone the current block and it's recipes, without updating the operands of
3645 /// the cloned recipes.
3646 VPBasicBlock *clone() override;
3647
3648 /// Returns the predecessor block at index \p Idx with the predecessors as per
3649 /// the corresponding plain CFG. If the block is an entry block to a region,
3650 /// the first predecessor is the single predecessor of a region, and the
3651 /// second predecessor is the exiting block of the region.
3652 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
3653
3654protected:
3655 /// Execute the recipes in the IR basic block \p BB.
3656 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3657
3658 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
3659 /// generated for this VPBB.
3660 void connectToPredecessors(VPTransformState &State);
3661
3662private:
3663 /// Create an IR BasicBlock to hold the output instructions generated by this
3664 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3665 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
3666};
3667
3668inline const VPBasicBlock *
3669VPPhiAccessors::getIncomingBlock(unsigned Idx) const {
3670 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
3671}
3672
3673/// A special type of VPBasicBlock that wraps an existing IR basic block.
3674/// Recipes of the block get added before the first non-phi instruction in the
3675/// wrapped block.
3676/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3677/// preheader block.
3678class VPIRBasicBlock : public VPBasicBlock {
3679 friend class VPlan;
3680
3681 BasicBlock *IRBB;
3682
3683 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
3684 VPIRBasicBlock(BasicBlock *IRBB)
3685 : VPBasicBlock(VPIRBasicBlockSC,
3686 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3687 IRBB(IRBB) {}
3688
3689public:
3690 ~VPIRBasicBlock() override {}
3691
3692 static inline bool classof(const VPBlockBase *V) {
3693 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3694 }
3695
3696 /// The method which generates the output IR instructions that correspond to
3697 /// this VPBasicBlock, thereby "executing" the VPlan.
3698 void execute(VPTransformState *State) override;
3699
3700 VPIRBasicBlock *clone() override;
3701
3702 BasicBlock *getIRBasicBlock() const { return IRBB; }
3703};
3704
3705/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3706/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3707/// A VPRegionBlock may indicate that its contents are to be replicated several
3708/// times. This is designed to support predicated scalarization, in which a
3709/// scalar if-then code structure needs to be generated VF * UF times. Having
3710/// this replication indicator helps to keep a single model for multiple
3711/// candidate VF's. The actual replication takes place only once the desired VF
3712/// and UF have been determined.
3713class VPRegionBlock : public VPBlockBase {
3714 friend class VPlan;
3715
3716 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3717 VPBlockBase *Entry;
3718
3719 /// Hold the Single Exiting block of the SESE region modelled by the
3720 /// VPRegionBlock.
3721 VPBlockBase *Exiting;
3722
3723 /// An indicator whether this region is to generate multiple replicated
3724 /// instances of output IR corresponding to its VPBlockBases.
3725 bool IsReplicator;
3726
3727 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
3728 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
3729 const std::string &Name = "", bool IsReplicator = false)
3730 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3731 IsReplicator(IsReplicator) {
3732 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3733 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3734 Entry->setParent(this);
3735 Exiting->setParent(this);
3736 }
3737 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3738 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3739 IsReplicator(IsReplicator) {}
3740
3741public:
3742 ~VPRegionBlock() override {}
3743
3744 /// Method to support type inquiry through isa, cast, and dyn_cast.
3745 static inline bool classof(const VPBlockBase *V) {
3746 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3747 }
3748
3749 const VPBlockBase *getEntry() const { return Entry; }
3750 VPBlockBase *getEntry() { return Entry; }
3751
3752 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3753 /// EntryBlock must have no predecessors.
3754 void setEntry(VPBlockBase *EntryBlock) {
3755 assert(EntryBlock->getPredecessors().empty() &&
3756 "Entry block cannot have predecessors.");
3757 Entry = EntryBlock;
3758 EntryBlock->setParent(this);
3759 }
3760
3761 const VPBlockBase *getExiting() const { return Exiting; }
3762 VPBlockBase *getExiting() { return Exiting; }
3763
3764 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3765 /// ExitingBlock must have no successors.
3766 void setExiting(VPBlockBase *ExitingBlock) {
3767 assert(ExitingBlock->getSuccessors().empty() &&
3768 "Exit block cannot have successors.");
3769 Exiting = ExitingBlock;
3770 ExitingBlock->setParent(this);
3771 }
3772
3773 /// Returns the pre-header VPBasicBlock of the loop region.
3774 VPBasicBlock *getPreheaderVPBB() {
3775 assert(!isReplicator() && "should only get pre-header of loop regions");
3776 return getSinglePredecessor()->getExitingBasicBlock();
3777 }
3778
3779 /// An indicator whether this region is to generate multiple replicated
3780 /// instances of output IR corresponding to its VPBlockBases.
3781 bool isReplicator() const { return IsReplicator; }
3782
3783 /// The method which generates the output IR instructions that correspond to
3784 /// this VPRegionBlock, thereby "executing" the VPlan.
3785 void execute(VPTransformState *State) override;
3786
3787 // Return the cost of this region.
3788 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
3789
3790#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3791 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3792 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3793 /// consequtive numbers.
3794 ///
3795 /// Note that the numbering is applied to the whole VPlan, so printing
3796 /// individual regions is consistent with the whole VPlan printing.
3797 void print(raw_ostream &O, const Twine &Indent,
3798 VPSlotTracker &SlotTracker) const override;
3799 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3800#endif
3801
3802 /// Clone all blocks in the single-entry single-exit region of the block and
3803 /// their recipes without updating the operands of the cloned recipes.
3804 VPRegionBlock *clone() override;
3805
3806 /// Remove the current region from its VPlan, connecting its predecessor to
3807 /// its entry, and its exiting block to its successor.
3808 void dissolveToCFGLoop();
3809};
3810
3811/// VPlan models a candidate for vectorization, encoding various decisions take
3812/// to produce efficient output IR, including which branches, basic-blocks and
3813/// output IR instructions to generate, and their cost. VPlan holds a
3814/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3815/// VPBasicBlock.
3816class VPlan {
3817 friend class VPlanPrinter;
3818 friend class VPSlotTracker;
3819
3820 /// VPBasicBlock corresponding to the original preheader. Used to place
3821 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3822 /// rest of VPlan execution.
3823 /// When this VPlan is used for the epilogue vector loop, the entry will be
3824 /// replaced by a new entry block created during skeleton creation.
3825 VPBasicBlock *Entry;
3826
3827 /// VPIRBasicBlock wrapping the header of the original scalar loop.
3828 VPIRBasicBlock *ScalarHeader;
3829
3830 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
3831 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
3832 /// e.g. if the scalar epilogue always executes.
3833 SmallVector<VPIRBasicBlock *, 2> ExitBlocks;
3834
3835 /// Holds the VFs applicable to this VPlan.
3836 SmallSetVector<ElementCount, 2> VFs;
3837
3838 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3839 /// any UF.
3840 SmallSetVector<unsigned, 2> UFs;
3841
3842 /// Holds the name of the VPlan, for printing.
3843 std::string Name;
3844
3845 /// Represents the trip count of the original loop, for folding
3846 /// the tail.
3847 VPValue *TripCount = nullptr;
3848
3849 /// Represents the backedge taken count of the original loop, for folding
3850 /// the tail. It equals TripCount - 1.
3851 VPValue *BackedgeTakenCount = nullptr;
3852
3853 /// Represents the vector trip count.
3854 VPValue VectorTripCount;
3855
3856 /// Represents the vectorization factor of the loop.
3857 VPValue VF;
3858
3859 /// Represents the loop-invariant VF * UF of the vector loop region.
3860 VPValue VFxUF;
3861
3862 /// Holds a mapping between Values and their corresponding VPValue inside
3863 /// VPlan.
3864 Value2VPValueTy Value2VPValue;
3865
3866 /// Contains all the external definitions created for this VPlan. External
3867 /// definitions are VPValues that hold a pointer to their underlying IR.
3868 SmallVector<VPValue *, 16> VPLiveIns;
3869
3870 /// Mapping from SCEVs to the VPValues representing their expansions.
3871 /// NOTE: This mapping is temporary and will be removed once all users have
3872 /// been modeled in VPlan directly.
3873 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3874
3875 /// Blocks allocated and owned by the VPlan. They will be deleted once the
3876 /// VPlan is destroyed.
3877 SmallVector<VPBlockBase *> CreatedBlocks;
3878
3879 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
3880 /// wrapping the original header of the scalar loop.
3881 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
3882 : Entry(Entry), ScalarHeader(ScalarHeader) {
3883 Entry->setPlan(this);
3884 assert(ScalarHeader->getNumSuccessors() == 0 &&
3885 "scalar header must be a leaf node");
3886 }
3887
3888public:
3889 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
3890 /// original preheader and scalar header of \p L, to be used as entry and
3891 /// scalar header blocks of the new VPlan.
3892 VPlan(Loop *L);
3893
3894 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
3895 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
3896 VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC) {
3897 setEntry(createVPBasicBlock(Name: "preheader"));
3898 ScalarHeader = createVPIRBasicBlock(IRBB: ScalarHeaderBB);
3899 TripCount = TC;
3900 }
3901
3902 ~VPlan();
3903
3904 void setEntry(VPBasicBlock *VPBB) {
3905 Entry = VPBB;
3906 VPBB->setPlan(this);
3907 }
3908
3909 /// Prepare the plan for execution, setting up the required live-in values.
3910 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3911 VPTransformState &State);
3912
3913 /// Generate the IR code for this VPlan.
3914 void execute(VPTransformState *State);
3915
3916 /// Return the cost of this plan.
3917 InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
3918
3919 VPBasicBlock *getEntry() { return Entry; }
3920 const VPBasicBlock *getEntry() const { return Entry; }
3921
3922 /// Returns the preheader of the vector loop region, if one exists, or null
3923 /// otherwise.
3924 VPBasicBlock *getVectorPreheader() {
3925 VPRegionBlock *VectorRegion = getVectorLoopRegion();
3926 return VectorRegion
3927 ? cast<VPBasicBlock>(Val: VectorRegion->getSinglePredecessor())
3928 : nullptr;
3929 }
3930
3931 /// Returns the VPRegionBlock of the vector loop.
3932 VPRegionBlock *getVectorLoopRegion();
3933 const VPRegionBlock *getVectorLoopRegion() const;
3934
3935 /// Returns the 'middle' block of the plan, that is the block that selects
3936 /// whether to execute the scalar tail loop or the exit block from the loop
3937 /// latch. If there is an early exit from the vector loop, the middle block
3938 /// conceptully has the early exit block as third successor, split accross 2
3939 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
3940 /// tail loop or the exit bock. If the scalar tail loop or exit block are
3941 /// known to always execute, the middle block may branch directly to that
3942 /// block. This function cannot be called once the vector loop region has been
3943 /// removed.
3944 VPBasicBlock *getMiddleBlock() {
3945 VPRegionBlock *LoopRegion = getVectorLoopRegion();
3946 assert(
3947 LoopRegion &&
3948 "cannot call the function after vector loop region has been removed");
3949 auto *RegionSucc = cast<VPBasicBlock>(Val: LoopRegion->getSingleSuccessor());
3950 if (RegionSucc->getSingleSuccessor() ||
3951 is_contained(Range&: RegionSucc->getSuccessors(), Element: getScalarPreheader()))
3952 return RegionSucc;
3953 // There is an early exit. The successor of RegionSucc is the middle block.
3954 return cast<VPBasicBlock>(Val: RegionSucc->getSuccessors()[1]);
3955 }
3956
3957 const VPBasicBlock *getMiddleBlock() const {
3958 return const_cast<VPlan *>(this)->getMiddleBlock();
3959 }
3960
3961 /// Return the VPBasicBlock for the preheader of the scalar loop.
3962 VPBasicBlock *getScalarPreheader() const {
3963 return cast<VPBasicBlock>(Val: getScalarHeader()->getSinglePredecessor());
3964 }
3965
3966 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
3967 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
3968
3969 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
3970 /// the original scalar loop.
3971 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
3972
3973 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
3974 /// exit block.
3975 VPIRBasicBlock *getExitBlock(BasicBlock *IRBB) const;
3976
3977 /// Returns true if \p VPBB is an exit block.
3978 bool isExitBlock(VPBlockBase *VPBB);
3979
3980 /// The trip count of the original loop.
3981 VPValue *getTripCount() const {
3982 assert(TripCount && "trip count needs to be set before accessing it");
3983 return TripCount;
3984 }
3985
3986 /// Set the trip count assuming it is currently null; if it is not - use
3987 /// resetTripCount().
3988 void setTripCount(VPValue *NewTripCount) {
3989 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
3990 TripCount = NewTripCount;
3991 }
3992
3993 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3994 /// the original trip count have been replaced.
3995 void resetTripCount(VPValue *NewTripCount) {
3996 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3997 "TripCount must be set when resetting");
3998 TripCount = NewTripCount;
3999 }
4000
4001 /// The backedge taken count of the original loop.
4002 VPValue *getOrCreateBackedgeTakenCount() {
4003 if (!BackedgeTakenCount)
4004 BackedgeTakenCount = new VPValue();
4005 return BackedgeTakenCount;
4006 }
4007
4008 /// The vector trip count.
4009 VPValue &getVectorTripCount() { return VectorTripCount; }
4010
4011 /// Returns the VF of the vector loop region.
4012 VPValue &getVF() { return VF; };
4013
4014 /// Returns VF * UF of the vector loop region.
4015 VPValue &getVFxUF() { return VFxUF; }
4016
4017 void addVF(ElementCount VF) { VFs.insert(X: VF); }
4018
4019 void setVF(ElementCount VF) {
4020 assert(hasVF(VF) && "Cannot set VF not already in plan");
4021 VFs.clear();
4022 VFs.insert(X: VF);
4023 }
4024
4025 bool hasVF(ElementCount VF) const { return VFs.count(key: VF); }
4026 bool hasScalableVF() const {
4027 return any_of(Range: VFs, P: [](ElementCount VF) { return VF.isScalable(); });
4028 }
4029
4030 /// Returns an iterator range over all VFs of the plan.
4031 iterator_range<SmallSetVector<ElementCount, 2>::iterator>
4032 vectorFactors() const {
4033 return {VFs.begin(), VFs.end()};
4034 }
4035
4036 bool hasScalarVFOnly() const {
4037 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4038 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4039 "Plan with scalar VF should only have a single VF");
4040 return HasScalarVFOnly;
4041 }
4042
4043 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(key: UF); }
4044
4045 unsigned getUF() const {
4046 assert(UFs.size() == 1 && "Expected a single UF");
4047 return UFs[0];
4048 }
4049
4050 void setUF(unsigned UF) {
4051 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4052 UFs.clear();
4053 UFs.insert(X: UF);
4054 }
4055
4056 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4057 /// concrete UF.
4058 bool isUnrolled() const { return UFs.size() == 1; }
4059
4060 /// Return a string with the name of the plan and the applicable VFs and UFs.
4061 std::string getName() const;
4062
4063 void setName(const Twine &newName) { Name = newName.str(); }
4064
4065 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4066 /// yet) for \p V.
4067 VPValue *getOrAddLiveIn(Value *V) {
4068 assert(V && "Trying to get or add the VPValue of a null Value");
4069 auto [It, Inserted] = Value2VPValue.try_emplace(Key: V);
4070 if (Inserted) {
4071 VPValue *VPV = new VPValue(V);
4072 VPLiveIns.push_back(Elt: VPV);
4073 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4074 It->second = VPV;
4075 }
4076
4077 assert(It->second->isLiveIn() && "Only live-ins should be in mapping");
4078 return It->second;
4079 }
4080
4081 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4082 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(Val: V); }
4083
4084 /// Return the list of live-in VPValues available in the VPlan.
4085 ArrayRef<VPValue *> getLiveIns() const {
4086 assert(all_of(Value2VPValue,
4087 [this](const auto &P) {
4088 return is_contained(VPLiveIns, P.second);
4089 }) &&
4090 "all VPValues in Value2VPValue must also be in VPLiveIns");
4091 return VPLiveIns;
4092 }
4093
4094#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4095 /// Print the live-ins of this VPlan to \p O.
4096 void printLiveIns(raw_ostream &O) const;
4097
4098 /// Print this VPlan to \p O.
4099 void print(raw_ostream &O) const;
4100
4101 /// Print this VPlan in DOT format to \p O.
4102 void printDOT(raw_ostream &O) const;
4103
4104 /// Dump the plan to stderr (for debugging).
4105 LLVM_DUMP_METHOD void dump() const;
4106#endif
4107
4108 /// Returns the canonical induction recipe of the vector loop.
4109 VPCanonicalIVPHIRecipe *getCanonicalIV() {
4110 VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
4111 if (EntryVPBB->empty()) {
4112 // VPlan native path.
4113 EntryVPBB = cast<VPBasicBlock>(Val: EntryVPBB->getSingleSuccessor());
4114 }
4115 return cast<VPCanonicalIVPHIRecipe>(Val: &*EntryVPBB->begin());
4116 }
4117
4118 VPValue *getSCEVExpansion(const SCEV *S) const {
4119 return SCEVToExpansion.lookup(Val: S);
4120 }
4121
4122 void addSCEVExpansion(const SCEV *S, VPValue *V) {
4123 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
4124 SCEVToExpansion[S] = V;
4125 }
4126
4127 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4128 /// recipes to refer to the clones, and return it.
4129 VPlan *duplicate();
4130
4131 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4132 /// present. The returned block is owned by the VPlan and deleted once the
4133 /// VPlan is destroyed.
4134 VPBasicBlock *createVPBasicBlock(const Twine &Name,
4135 VPRecipeBase *Recipe = nullptr) {
4136 auto *VPB = new VPBasicBlock(Name, Recipe);
4137 CreatedBlocks.push_back(Elt: VPB);
4138 return VPB;
4139 }
4140
4141 /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p
4142 /// IsReplicator is true, the region is a replicate region. The returned block
4143 /// is owned by the VPlan and deleted once the VPlan is destroyed.
4144 VPRegionBlock *createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4145 const std::string &Name = "",
4146 bool IsReplicator = false) {
4147 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator);
4148 CreatedBlocks.push_back(Elt: VPB);
4149 return VPB;
4150 }
4151
4152 /// Create a new VPRegionBlock with \p Name and entry and exiting blocks set
4153 /// to nullptr. If \p IsReplicator is true, the region is a replicate region.
4154 /// The returned block is owned by the VPlan and deleted once the VPlan is
4155 /// destroyed.
4156 VPRegionBlock *createVPRegionBlock(const std::string &Name = "",
4157 bool IsReplicator = false) {
4158 auto *VPB = new VPRegionBlock(Name, IsReplicator);
4159 CreatedBlocks.push_back(Elt: VPB);
4160 return VPB;
4161 }
4162
4163 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4164 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4165 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4166 VPIRBasicBlock *createEmptyVPIRBasicBlock(BasicBlock *IRBB);
4167
4168 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4169 /// instructions in \p IRBB, except its terminator which is managed by the
4170 /// successors of the block in VPlan. The returned block is owned by the VPlan
4171 /// and deleted once the VPlan is destroyed.
4172 VPIRBasicBlock *createVPIRBasicBlock(BasicBlock *IRBB);
4173
4174 /// Returns true if the VPlan is based on a loop with an early exit. That is
4175 /// the case if the VPlan has either more than one exit block or a single exit
4176 /// block with multiple predecessors (one for the exit via the latch and one
4177 /// via the other early exit).
4178 bool hasEarlyExit() const {
4179 return ExitBlocks.size() > 1 || ExitBlocks[0]->getNumPredecessors() > 1;
4180 }
4181
4182 /// Returns true if the scalar tail may execute after the vector loop. Note
4183 /// that this relies on unneeded branches to the scalar tail loop being
4184 /// removed.
4185 bool hasScalarTail() const {
4186 return !(getScalarPreheader()->getNumPredecessors() == 0 ||
4187 getScalarPreheader()->getSinglePredecessor() == getEntry());
4188 }
4189};
4190
4191#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4192inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4193 Plan.print(OS);
4194 return OS;
4195}
4196#endif
4197
4198} // end namespace llvm
4199
4200#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
4201