1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
30#include "llvm/ADT/SmallPtrSet.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
35#include "llvm/Analysis/IVDescriptors.h"
36#include "llvm/Analysis/MemoryLocation.h"
37#include "llvm/Analysis/VectorUtils.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/Support/Compiler.h"
42#include "llvm/Support/InstructionCost.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
54class InnerLoopVectorizer;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
58class RecurrenceDescriptor;
59class SCEV;
60class Type;
61class VPBasicBlock;
62class VPBuilder;
63class VPDominatorTree;
64class VPRegionBlock;
65class VPlan;
66class VPLane;
67class VPReplicateRecipe;
68class VPlanSlp;
69class Value;
70class LoopVectorizationCostModel;
71
72struct VPCostContext;
73
74namespace Intrinsic {
75typedef unsigned ID;
76}
77
78using VPlanPtr = std::unique_ptr<VPlan>;
79
80/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
81/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
82class LLVM_ABI_FOR_TEST VPBlockBase {
83 friend class VPBlockUtils;
84
85 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
86
87 /// An optional name for the block.
88 std::string Name;
89
90 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
91 /// it is a topmost VPBlockBase.
92 VPRegionBlock *Parent = nullptr;
93
94 /// List of predecessor blocks.
95 SmallVector<VPBlockBase *, 1> Predecessors;
96
97 /// List of successor blocks.
98 SmallVector<VPBlockBase *, 1> Successors;
99
100 /// VPlan containing the block. Can only be set on the entry block of the
101 /// plan.
102 VPlan *Plan = nullptr;
103
104 /// Add \p Successor as the last successor to this block.
105 void appendSuccessor(VPBlockBase *Successor) {
106 assert(Successor && "Cannot add nullptr successor!");
107 Successors.push_back(Elt: Successor);
108 }
109
110 /// Add \p Predecessor as the last predecessor to this block.
111 void appendPredecessor(VPBlockBase *Predecessor) {
112 assert(Predecessor && "Cannot add nullptr predecessor!");
113 Predecessors.push_back(Elt: Predecessor);
114 }
115
116 /// Remove \p Predecessor from the predecessors of this block.
117 void removePredecessor(VPBlockBase *Predecessor) {
118 auto Pos = find(Range&: Predecessors, Val: Predecessor);
119 assert(Pos && "Predecessor does not exist");
120 Predecessors.erase(CI: Pos);
121 }
122
123 /// Remove \p Successor from the successors of this block.
124 void removeSuccessor(VPBlockBase *Successor) {
125 auto Pos = find(Range&: Successors, Val: Successor);
126 assert(Pos && "Successor does not exist");
127 Successors.erase(CI: Pos);
128 }
129
130 /// This function replaces one predecessor with another, useful when
131 /// trying to replace an old block in the CFG with a new one.
132 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
133 auto I = find(Range&: Predecessors, Val: Old);
134 assert(I != Predecessors.end());
135 assert(Old->getParent() == New->getParent() &&
136 "replaced predecessor must have the same parent");
137 *I = New;
138 }
139
140 /// This function replaces one successor with another, useful when
141 /// trying to replace an old block in the CFG with a new one.
142 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
143 auto I = find(Range&: Successors, Val: Old);
144 assert(I != Successors.end());
145 assert(Old->getParent() == New->getParent() &&
146 "replaced successor must have the same parent");
147 *I = New;
148 }
149
150protected:
151 VPBlockBase(const unsigned char SC, const std::string &N)
152 : SubclassID(SC), Name(N) {}
153
154public:
155 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
156 /// that are actually instantiated. Values of this enumeration are kept in the
157 /// SubclassID field of the VPBlockBase objects. They are used for concrete
158 /// type identification.
159 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
160
161 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
162
163 virtual ~VPBlockBase() = default;
164
165 const std::string &getName() const { return Name; }
166
167 void setName(const Twine &newName) { Name = newName.str(); }
168
169 /// \return an ID for the concrete type of this object.
170 /// This is used to implement the classof checks. This should not be used
171 /// for any other purpose, as the values may change as LLVM evolves.
172 unsigned getVPBlockID() const { return SubclassID; }
173
174 VPRegionBlock *getParent() { return Parent; }
175 const VPRegionBlock *getParent() const { return Parent; }
176
177 /// \return A pointer to the plan containing the current block.
178 VPlan *getPlan();
179 const VPlan *getPlan() const;
180
181 /// Sets the pointer of the plan containing the block. The block must be the
182 /// entry block into the VPlan.
183 void setPlan(VPlan *ParentPlan);
184
185 void setParent(VPRegionBlock *P) { Parent = P; }
186
187 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
188 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
189 /// VPBlockBase is a VPBasicBlock, it is returned.
190 const VPBasicBlock *getEntryBasicBlock() const;
191 VPBasicBlock *getEntryBasicBlock();
192
193 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
194 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
195 /// VPBlockBase is a VPBasicBlock, it is returned.
196 const VPBasicBlock *getExitingBasicBlock() const;
197 VPBasicBlock *getExitingBasicBlock();
198
199 const VPBlocksTy &getSuccessors() const { return Successors; }
200 VPBlocksTy &getSuccessors() { return Successors; }
201
202 /// Returns true if this block has any successors.
203 bool hasSuccessors() const { return !Successors.empty(); }
204 /// Returns true if this block has any predecessors.
205 bool hasPredecessors() const { return !Predecessors.empty(); }
206
207 iterator_range<VPBlockBase **> successors() { return Successors; }
208 iterator_range<VPBlockBase **> predecessors() { return Predecessors; }
209
210 const VPBlocksTy &getPredecessors() const { return Predecessors; }
211 VPBlocksTy &getPredecessors() { return Predecessors; }
212
213 /// \return the successor of this VPBlockBase if it has a single successor.
214 /// Otherwise return a null pointer.
215 VPBlockBase *getSingleSuccessor() const {
216 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
217 }
218
219 /// \return the predecessor of this VPBlockBase if it has a single
220 /// predecessor. Otherwise return a null pointer.
221 VPBlockBase *getSinglePredecessor() const {
222 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
223 }
224
225 size_t getNumSuccessors() const { return Successors.size(); }
226 size_t getNumPredecessors() const { return Predecessors.size(); }
227
228 /// An Enclosing Block of a block B is any block containing B, including B
229 /// itself. \return the closest enclosing block starting from "this", which
230 /// has successors. \return the root enclosing block if all enclosing blocks
231 /// have no successors.
232 VPBlockBase *getEnclosingBlockWithSuccessors();
233
234 /// \return the closest enclosing block starting from "this", which has
235 /// predecessors. \return the root enclosing block if all enclosing blocks
236 /// have no predecessors.
237 VPBlockBase *getEnclosingBlockWithPredecessors();
238
239 /// \return the successors either attached directly to this VPBlockBase or, if
240 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
241 /// successors of its own, search recursively for the first enclosing
242 /// VPRegionBlock that has successors and return them. If no such
243 /// VPRegionBlock exists, return the (empty) successors of the topmost
244 /// VPBlockBase reached.
245 const VPBlocksTy &getHierarchicalSuccessors() {
246 return getEnclosingBlockWithSuccessors()->getSuccessors();
247 }
248
249 /// \return the hierarchical successor of this VPBlockBase if it has a single
250 /// hierarchical successor. Otherwise return a null pointer.
251 VPBlockBase *getSingleHierarchicalSuccessor() {
252 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
253 }
254
255 /// \return the predecessors either attached directly to this VPBlockBase or,
256 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
257 /// predecessors of its own, search recursively for the first enclosing
258 /// VPRegionBlock that has predecessors and return them. If no such
259 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
260 /// VPBlockBase reached.
261 const VPBlocksTy &getHierarchicalPredecessors() {
262 return getEnclosingBlockWithPredecessors()->getPredecessors();
263 }
264
265 /// \return the hierarchical predecessor of this VPBlockBase if it has a
266 /// single hierarchical predecessor. Otherwise return a null pointer.
267 VPBlockBase *getSingleHierarchicalPredecessor() {
268 return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
269 }
270
271 /// Set a given VPBlockBase \p Successor as the single successor of this
272 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
273 /// This VPBlockBase must have no successors.
274 void setOneSuccessor(VPBlockBase *Successor) {
275 assert(Successors.empty() && "Setting one successor when others exist.");
276 assert(Successor->getParent() == getParent() &&
277 "connected blocks must have the same parent");
278 appendSuccessor(Successor);
279 }
280
281 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
282 /// successors of this VPBlockBase. This VPBlockBase is not added as
283 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
284 /// successors.
285 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
286 assert(Successors.empty() && "Setting two successors when others exist.");
287 appendSuccessor(Successor: IfTrue);
288 appendSuccessor(Successor: IfFalse);
289 }
290
291 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
292 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
293 /// as successor of any VPBasicBlock in \p NewPreds.
294 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
295 assert(Predecessors.empty() && "Block predecessors already set.");
296 for (auto *Pred : NewPreds)
297 appendPredecessor(Predecessor: Pred);
298 }
299
300 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
301 /// This VPBlockBase must have no successors. This VPBlockBase is not added
302 /// as predecessor of any VPBasicBlock in \p NewSuccs.
303 void setSuccessors(ArrayRef<VPBlockBase *> NewSuccs) {
304 assert(Successors.empty() && "Block successors already set.");
305 for (auto *Succ : NewSuccs)
306 appendSuccessor(Successor: Succ);
307 }
308
309 /// Remove all the predecessor of this block.
310 void clearPredecessors() { Predecessors.clear(); }
311
312 /// Remove all the successors of this block.
313 void clearSuccessors() { Successors.clear(); }
314
315 /// Swap predecessors of the block. The block must have exactly 2
316 /// predecessors.
317 void swapPredecessors() {
318 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
319 std::swap(a&: Predecessors[0], b&: Predecessors[1]);
320 }
321
322 /// Swap successors of the block. The block must have exactly 2 successors.
323 // TODO: This should be part of introducing conditional branch recipes rather
324 // than being independent.
325 void swapSuccessors() {
326 assert(Successors.size() == 2 && "must have 2 successors to swap");
327 std::swap(a&: Successors[0], b&: Successors[1]);
328 }
329
330 /// Returns the index for \p Pred in the blocks predecessors list.
331 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
332 assert(count(Predecessors, Pred) == 1 &&
333 "must have Pred exactly once in Predecessors");
334 return std::distance(first: Predecessors.begin(), last: find(Range: Predecessors, Val: Pred));
335 }
336
337 /// Returns the index for \p Succ in the blocks successor list.
338 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
339 assert(count(Successors, Succ) == 1 &&
340 "must have Succ exactly once in Successors");
341 return std::distance(first: Successors.begin(), last: find(Range: Successors, Val: Succ));
342 }
343
344 /// The method which generates the output IR that correspond to this
345 /// VPBlockBase, thereby "executing" the VPlan.
346 virtual void execute(VPTransformState *State) = 0;
347
348 /// Return the cost of the block.
349 virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
350
351#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
352 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
353 OS << getName();
354 }
355
356 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
357 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
358 /// consequtive numbers.
359 ///
360 /// Note that the numbering is applied to the whole VPlan, so printing
361 /// individual blocks is consistent with the whole VPlan printing.
362 virtual void print(raw_ostream &O, const Twine &Indent,
363 VPSlotTracker &SlotTracker) const = 0;
364
365 /// Print plain-text dump of this VPlan to \p O.
366 void print(raw_ostream &O) const;
367
368 /// Print the successors of this block to \p O, prefixing all lines with \p
369 /// Indent.
370 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
371
372 /// Dump this VPBlockBase to dbgs().
373 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
374#endif
375
376 /// Clone the current block and it's recipes without updating the operands of
377 /// the cloned recipes, including all blocks in the single-entry single-exit
378 /// region for VPRegionBlocks.
379 virtual VPBlockBase *clone() = 0;
380};
381
382/// VPRecipeBase is a base class modeling a sequence of one or more output IR
383/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
384/// and is responsible for deleting its defined values. Single-value
385/// recipes must inherit from VPSingleDef instead of inheriting from both
386/// VPRecipeBase and VPValue separately.
387class LLVM_ABI_FOR_TEST VPRecipeBase
388 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
389 public VPDef,
390 public VPUser {
391 friend VPBasicBlock;
392 friend class VPBlockUtils;
393
394 /// Subclass identifier (for isa/dyn_cast).
395 const unsigned char SubclassID;
396
397 /// Each VPRecipe belongs to a single VPBasicBlock.
398 VPBasicBlock *Parent = nullptr;
399
400 /// The debug location for the recipe.
401 DebugLoc DL;
402
403public:
404 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
405 /// that is actually instantiated. Values of this enumeration are kept in the
406 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
407 /// type identification.
408 using VPRecipeTy = enum {
409 VPBranchOnMaskSC,
410 VPDerivedIVSC,
411 VPExpandSCEVSC,
412 VPExpressionSC,
413 VPIRInstructionSC,
414 VPInstructionSC,
415 VPInterleaveEVLSC,
416 VPInterleaveSC,
417 VPReductionEVLSC,
418 VPReductionSC,
419 VPReplicateSC,
420 VPScalarIVStepsSC,
421 VPVectorPointerSC,
422 VPVectorEndPointerSC,
423 VPWidenCallSC,
424 VPWidenCanonicalIVSC,
425 VPWidenCastSC,
426 VPWidenGEPSC,
427 VPWidenIntrinsicSC,
428 VPWidenLoadEVLSC,
429 VPWidenLoadSC,
430 VPWidenStoreEVLSC,
431 VPWidenStoreSC,
432 VPWidenSC,
433 VPBlendSC,
434 VPHistogramSC,
435 // START: Phi-like recipes. Need to be kept together.
436 VPWidenPHISC,
437 VPPredInstPHISC,
438 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
439 // VPHeaderPHIRecipe need to be kept together.
440 VPCanonicalIVPHISC,
441 VPCurrentIterationPHISC,
442 VPActiveLaneMaskPHISC,
443 VPFirstOrderRecurrencePHISC,
444 VPWidenIntOrFpInductionSC,
445 VPWidenPointerInductionSC,
446 VPReductionPHISC,
447 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
448 // END: Phi-like recipes
449 VPFirstPHISC = VPWidenPHISC,
450 VPFirstHeaderPHISC = VPCanonicalIVPHISC,
451 VPLastHeaderPHISC = VPReductionPHISC,
452 VPLastPHISC = VPReductionPHISC,
453 };
454
455 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
456 DebugLoc DL = DebugLoc::getUnknown())
457 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
458
459 ~VPRecipeBase() override = default;
460
461 /// Clone the current recipe.
462 virtual VPRecipeBase *clone() = 0;
463
464 /// \return the VPBasicBlock which this VPRecipe belongs to.
465 VPBasicBlock *getParent() { return Parent; }
466 const VPBasicBlock *getParent() const { return Parent; }
467
468 /// \return the VPRegionBlock which the recipe belongs to.
469 VPRegionBlock *getRegion();
470 const VPRegionBlock *getRegion() const;
471
472 /// The method which generates the output IR instructions that correspond to
473 /// this VPRecipe, thereby "executing" the VPlan.
474 virtual void execute(VPTransformState &State) = 0;
475
476 /// Return the cost of this recipe, taking into account if the cost
477 /// computation should be skipped and the ForceTargetInstructionCost flag.
478 /// Also takes care of printing the cost for debugging.
479 InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
480
481 /// Insert an unlinked recipe into a basic block immediately before
482 /// the specified recipe.
483 void insertBefore(VPRecipeBase *InsertPos);
484 /// Insert an unlinked recipe into \p BB immediately before the insertion
485 /// point \p IP;
486 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
487
488 /// Insert an unlinked Recipe into a basic block immediately after
489 /// the specified Recipe.
490 void insertAfter(VPRecipeBase *InsertPos);
491
492 /// Unlink this recipe from its current VPBasicBlock and insert it into
493 /// the VPBasicBlock that MovePos lives in, right after MovePos.
494 void moveAfter(VPRecipeBase *MovePos);
495
496 /// Unlink this recipe and insert into BB before I.
497 ///
498 /// \pre I is a valid iterator into BB.
499 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
500
501 /// This method unlinks 'this' from the containing basic block, but does not
502 /// delete it.
503 void removeFromParent();
504
505 /// This method unlinks 'this' from the containing basic block and deletes it.
506 ///
507 /// \returns an iterator pointing to the element after the erased one
508 iplist<VPRecipeBase>::iterator eraseFromParent();
509
510 /// \return an ID for the concrete type of this object.
511 unsigned getVPRecipeID() const { return SubclassID; }
512
513 /// Method to support type inquiry through isa, cast, and dyn_cast.
514 static inline bool classof(const VPDef *D) {
515 // All VPDefs are also VPRecipeBases.
516 return true;
517 }
518
519 static inline bool classof(const VPUser *U) { return true; }
520
521 /// Returns true if the recipe may have side-effects.
522 bool mayHaveSideEffects() const;
523
524 /// Returns true for PHI-like recipes.
525 bool isPhi() const;
526
527 /// Returns true if the recipe may read from memory.
528 bool mayReadFromMemory() const;
529
530 /// Returns true if the recipe may write to memory.
531 bool mayWriteToMemory() const;
532
533 /// Returns true if the recipe may read from or write to memory.
534 bool mayReadOrWriteMemory() const {
535 return mayReadFromMemory() || mayWriteToMemory();
536 }
537
538 /// Returns the debug location of the recipe.
539 DebugLoc getDebugLoc() const { return DL; }
540
541 /// Return true if the recipe is a scalar cast.
542 bool isScalarCast() const;
543
544 /// Set the recipe's debug location to \p NewDL.
545 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
546
547#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
548 /// Dump the recipe to stderr (for debugging).
549 LLVM_ABI_FOR_TEST void dump() const;
550
551 /// Print the recipe, delegating to printRecipe().
552 void print(raw_ostream &O, const Twine &Indent,
553 VPSlotTracker &SlotTracker) const;
554#endif
555
556protected:
557 /// Compute the cost of this recipe either using a recipe's specialized
558 /// implementation or using the legacy cost model and the underlying
559 /// instructions.
560 virtual InstructionCost computeCost(ElementCount VF,
561 VPCostContext &Ctx) const;
562
563#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
564 /// Each concrete VPRecipe prints itself, without printing common information,
565 /// like debug info or metadata.
566 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
567 VPSlotTracker &SlotTracker) const = 0;
568#endif
569};
570
571// Helper macro to define common classof implementations for recipes.
572#define VP_CLASSOF_IMPL(VPRecipeID) \
573 static inline bool classof(const VPRecipeBase *R) { \
574 return R->getVPRecipeID() == VPRecipeID; \
575 } \
576 static inline bool classof(const VPValue *V) { \
577 auto *R = V->getDefiningRecipe(); \
578 return R && R->getVPRecipeID() == VPRecipeID; \
579 } \
580 static inline bool classof(const VPUser *U) { \
581 auto *R = dyn_cast<VPRecipeBase>(U); \
582 return R && R->getVPRecipeID() == VPRecipeID; \
583 } \
584 static inline bool classof(const VPSingleDefRecipe *R) { \
585 return R->getVPRecipeID() == VPRecipeID; \
586 }
587
588/// VPSingleDef is a base class for recipes for modeling a sequence of one or
589/// more output IR that define a single result VPValue.
590/// Note that VPRecipeBase must be inherited from before VPValue.
591class VPSingleDefRecipe : public VPRecipeBase, public VPRecipeValue {
592public:
593 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
594 DebugLoc DL = DebugLoc::getUnknown())
595 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
596
597 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
598 Value *UV, DebugLoc DL = DebugLoc::getUnknown())
599 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
600
601 static inline bool classof(const VPRecipeBase *R) {
602 switch (R->getVPRecipeID()) {
603 case VPRecipeBase::VPDerivedIVSC:
604 case VPRecipeBase::VPExpandSCEVSC:
605 case VPRecipeBase::VPExpressionSC:
606 case VPRecipeBase::VPInstructionSC:
607 case VPRecipeBase::VPReductionEVLSC:
608 case VPRecipeBase::VPReductionSC:
609 case VPRecipeBase::VPReplicateSC:
610 case VPRecipeBase::VPScalarIVStepsSC:
611 case VPRecipeBase::VPVectorPointerSC:
612 case VPRecipeBase::VPVectorEndPointerSC:
613 case VPRecipeBase::VPWidenCallSC:
614 case VPRecipeBase::VPWidenCanonicalIVSC:
615 case VPRecipeBase::VPWidenCastSC:
616 case VPRecipeBase::VPWidenGEPSC:
617 case VPRecipeBase::VPWidenIntrinsicSC:
618 case VPRecipeBase::VPWidenSC:
619 case VPRecipeBase::VPBlendSC:
620 case VPRecipeBase::VPPredInstPHISC:
621 case VPRecipeBase::VPCanonicalIVPHISC:
622 case VPRecipeBase::VPCurrentIterationPHISC:
623 case VPRecipeBase::VPActiveLaneMaskPHISC:
624 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
625 case VPRecipeBase::VPWidenPHISC:
626 case VPRecipeBase::VPWidenIntOrFpInductionSC:
627 case VPRecipeBase::VPWidenPointerInductionSC:
628 case VPRecipeBase::VPReductionPHISC:
629 return true;
630 case VPRecipeBase::VPBranchOnMaskSC:
631 case VPRecipeBase::VPInterleaveEVLSC:
632 case VPRecipeBase::VPInterleaveSC:
633 case VPRecipeBase::VPIRInstructionSC:
634 case VPRecipeBase::VPWidenLoadEVLSC:
635 case VPRecipeBase::VPWidenLoadSC:
636 case VPRecipeBase::VPWidenStoreEVLSC:
637 case VPRecipeBase::VPWidenStoreSC:
638 case VPRecipeBase::VPHistogramSC:
639 // TODO: Widened stores don't define a value, but widened loads do. Split
640 // the recipes to be able to make widened loads VPSingleDefRecipes.
641 return false;
642 }
643 llvm_unreachable("Unhandled VPRecipeID");
644 }
645
646 static inline bool classof(const VPValue *V) {
647 auto *R = V->getDefiningRecipe();
648 return R && classof(R);
649 }
650
651 static inline bool classof(const VPUser *U) {
652 auto *R = dyn_cast<VPRecipeBase>(Val: U);
653 return R && classof(R);
654 }
655
656 VPSingleDefRecipe *clone() override = 0;
657
658 /// Returns the underlying instruction.
659 Instruction *getUnderlyingInstr() {
660 return cast<Instruction>(Val: getUnderlyingValue());
661 }
662 const Instruction *getUnderlyingInstr() const {
663 return cast<Instruction>(Val: getUnderlyingValue());
664 }
665
666#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
667 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
668 LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const;
669#endif
670};
671
672/// Class to record and manage LLVM IR flags.
673LLVM_PACKED_START
674class VPIRFlags {
675 enum class OperationType : unsigned char {
676 Cmp,
677 FCmp,
678 OverflowingBinOp,
679 Trunc,
680 DisjointOp,
681 PossiblyExactOp,
682 GEPOp,
683 FPMathOp,
684 NonNegOp,
685 ReductionOp,
686 Other
687 };
688
689public:
690 struct WrapFlagsTy {
691 char HasNUW : 1;
692 char HasNSW : 1;
693
694 WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
695 };
696
697 struct TruncFlagsTy {
698 char HasNUW : 1;
699 char HasNSW : 1;
700
701 TruncFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
702 };
703
704 struct DisjointFlagsTy {
705 char IsDisjoint : 1;
706 DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
707 };
708
709 struct NonNegFlagsTy {
710 char NonNeg : 1;
711 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
712 };
713
714private:
715 struct ExactFlagsTy {
716 char IsExact : 1;
717 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
718 };
719 struct FastMathFlagsTy {
720 char AllowReassoc : 1;
721 char NoNaNs : 1;
722 char NoInfs : 1;
723 char NoSignedZeros : 1;
724 char AllowReciprocal : 1;
725 char AllowContract : 1;
726 char ApproxFunc : 1;
727
728 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
729 };
730 /// Holds both the predicate and fast-math flags for floating-point
731 /// comparisons.
732 struct FCmpFlagsTy {
733 uint8_t CmpPredStorage;
734 FastMathFlagsTy FMFs;
735 };
736 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
737 struct ReductionFlagsTy {
738 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
739 // additional kinds.
740 unsigned char Kind : 6;
741 // TODO: Derive order/in-loop from plan and remove here.
742 unsigned char IsOrdered : 1;
743 unsigned char IsInLoop : 1;
744 FastMathFlagsTy FMFs;
745
746 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
747 FastMathFlags FMFs)
748 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
749 IsInLoop(IsInLoop), FMFs(FMFs) {}
750 };
751
752 OperationType OpType;
753
754 union {
755 uint8_t CmpPredStorage;
756 WrapFlagsTy WrapFlags;
757 TruncFlagsTy TruncFlags;
758 DisjointFlagsTy DisjointFlags;
759 ExactFlagsTy ExactFlags;
760 uint8_t GEPFlagsStorage;
761 NonNegFlagsTy NonNegFlags;
762 FastMathFlagsTy FMFs;
763 FCmpFlagsTy FCmpFlags;
764 ReductionFlagsTy ReductionFlags;
765 uint8_t AllFlags[2];
766 };
767
768public:
769 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
770
771 VPIRFlags(Instruction &I) : VPIRFlags() {
772 if (auto *FCmp = dyn_cast<FCmpInst>(Val: &I)) {
773 OpType = OperationType::FCmp;
774 Bitfield::set<CmpInst::PredicateField>(Packed&: FCmpFlags.CmpPredStorage,
775 Value: FCmp->getPredicate());
776 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
777 FCmpFlags.FMFs = FCmp->getFastMathFlags();
778 } else if (auto *Op = dyn_cast<CmpInst>(Val: &I)) {
779 OpType = OperationType::Cmp;
780 Bitfield::set<CmpInst::PredicateField>(Packed&: CmpPredStorage,
781 Value: Op->getPredicate());
782 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
783 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(Val: &I)) {
784 OpType = OperationType::DisjointOp;
785 DisjointFlags.IsDisjoint = Op->isDisjoint();
786 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(Val: &I)) {
787 OpType = OperationType::OverflowingBinOp;
788 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
789 } else if (auto *Op = dyn_cast<TruncInst>(Val: &I)) {
790 OpType = OperationType::Trunc;
791 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
792 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(Val: &I)) {
793 OpType = OperationType::PossiblyExactOp;
794 ExactFlags.IsExact = Op->isExact();
795 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) {
796 OpType = OperationType::GEPOp;
797 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
798 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
799 "wrap flags truncated");
800 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(Val: &I)) {
801 OpType = OperationType::NonNegOp;
802 NonNegFlags.NonNeg = PNNI->hasNonNeg();
803 } else if (auto *Op = dyn_cast<FPMathOperator>(Val: &I)) {
804 OpType = OperationType::FPMathOp;
805 FMFs = Op->getFastMathFlags();
806 }
807 }
808
809 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
810 Bitfield::set<CmpInst::PredicateField>(Packed&: CmpPredStorage, Value: Pred);
811 assert(getPredicate() == Pred && "predicate truncated");
812 }
813
814 VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
815 : OpType(OperationType::FCmp), AllFlags() {
816 Bitfield::set<CmpInst::PredicateField>(Packed&: FCmpFlags.CmpPredStorage, Value: Pred);
817 assert(getPredicate() == Pred && "predicate truncated");
818 FCmpFlags.FMFs = FMFs;
819 }
820
821 VPIRFlags(WrapFlagsTy WrapFlags)
822 : OpType(OperationType::OverflowingBinOp), AllFlags() {
823 this->WrapFlags = WrapFlags;
824 }
825
826 VPIRFlags(TruncFlagsTy TruncFlags)
827 : OpType(OperationType::Trunc), AllFlags() {
828 this->TruncFlags = TruncFlags;
829 }
830
831 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
832 this->FMFs = FMFs;
833 }
834
835 VPIRFlags(DisjointFlagsTy DisjointFlags)
836 : OpType(OperationType::DisjointOp), AllFlags() {
837 this->DisjointFlags = DisjointFlags;
838 }
839
840 VPIRFlags(NonNegFlagsTy NonNegFlags)
841 : OpType(OperationType::NonNegOp), AllFlags() {
842 this->NonNegFlags = NonNegFlags;
843 }
844
845 VPIRFlags(ExactFlagsTy ExactFlags)
846 : OpType(OperationType::PossiblyExactOp), AllFlags() {
847 this->ExactFlags = ExactFlags;
848 }
849
850 VPIRFlags(GEPNoWrapFlags GEPFlags)
851 : OpType(OperationType::GEPOp), AllFlags() {
852 GEPFlagsStorage = GEPFlags.getRaw();
853 }
854
855 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
856 : OpType(OperationType::ReductionOp), AllFlags() {
857 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
858 }
859
860 void transferFlags(VPIRFlags &Other) {
861 OpType = Other.OpType;
862 AllFlags[0] = Other.AllFlags[0];
863 AllFlags[1] = Other.AllFlags[1];
864 }
865
866 /// Only keep flags also present in \p Other. \p Other must have the same
867 /// OpType as the current object.
868 void intersectFlags(const VPIRFlags &Other);
869
870 /// Drop all poison-generating flags.
871 void dropPoisonGeneratingFlags() {
872 // NOTE: This needs to be kept in-sync with
873 // Instruction::dropPoisonGeneratingFlags.
874 switch (OpType) {
875 case OperationType::OverflowingBinOp:
876 WrapFlags.HasNUW = false;
877 WrapFlags.HasNSW = false;
878 break;
879 case OperationType::Trunc:
880 TruncFlags.HasNUW = false;
881 TruncFlags.HasNSW = false;
882 break;
883 case OperationType::DisjointOp:
884 DisjointFlags.IsDisjoint = false;
885 break;
886 case OperationType::PossiblyExactOp:
887 ExactFlags.IsExact = false;
888 break;
889 case OperationType::GEPOp:
890 GEPFlagsStorage = 0;
891 break;
892 case OperationType::FPMathOp:
893 case OperationType::FCmp:
894 case OperationType::ReductionOp:
895 getFMFsRef().NoNaNs = false;
896 getFMFsRef().NoInfs = false;
897 break;
898 case OperationType::NonNegOp:
899 NonNegFlags.NonNeg = false;
900 break;
901 case OperationType::Cmp:
902 case OperationType::Other:
903 break;
904 }
905 }
906
907 /// Apply the IR flags to \p I.
908 void applyFlags(Instruction &I) const {
909 switch (OpType) {
910 case OperationType::OverflowingBinOp:
911 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
912 I.setHasNoSignedWrap(WrapFlags.HasNSW);
913 break;
914 case OperationType::Trunc:
915 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
916 I.setHasNoSignedWrap(TruncFlags.HasNSW);
917 break;
918 case OperationType::DisjointOp:
919 cast<PossiblyDisjointInst>(Val: &I)->setIsDisjoint(DisjointFlags.IsDisjoint);
920 break;
921 case OperationType::PossiblyExactOp:
922 I.setIsExact(ExactFlags.IsExact);
923 break;
924 case OperationType::GEPOp:
925 cast<GetElementPtrInst>(Val: &I)->setNoWrapFlags(
926 GEPNoWrapFlags::fromRaw(Flags: GEPFlagsStorage));
927 break;
928 case OperationType::FPMathOp:
929 case OperationType::FCmp: {
930 const FastMathFlagsTy &F = getFMFsRef();
931 I.setHasAllowReassoc(F.AllowReassoc);
932 I.setHasNoNaNs(F.NoNaNs);
933 I.setHasNoInfs(F.NoInfs);
934 I.setHasNoSignedZeros(F.NoSignedZeros);
935 I.setHasAllowReciprocal(F.AllowReciprocal);
936 I.setHasAllowContract(F.AllowContract);
937 I.setHasApproxFunc(F.ApproxFunc);
938 break;
939 }
940 case OperationType::NonNegOp:
941 I.setNonNeg(NonNegFlags.NonNeg);
942 break;
943 case OperationType::ReductionOp:
944 llvm_unreachable("reduction ops should not use applyFlags");
945 case OperationType::Cmp:
946 case OperationType::Other:
947 break;
948 }
949 }
950
951 CmpInst::Predicate getPredicate() const {
952 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
953 "recipe doesn't have a compare predicate");
954 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
955 : CmpPredStorage;
956 return Bitfield::get<CmpInst::PredicateField>(Packed: Storage);
957 }
958
959 void setPredicate(CmpInst::Predicate Pred) {
960 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
961 "recipe doesn't have a compare predicate");
962 if (OpType == OperationType::FCmp)
963 Bitfield::set<CmpInst::PredicateField>(Packed&: FCmpFlags.CmpPredStorage, Value: Pred);
964 else
965 Bitfield::set<CmpInst::PredicateField>(Packed&: CmpPredStorage, Value: Pred);
966 assert(getPredicate() == Pred && "predicate truncated");
967 }
968
969 GEPNoWrapFlags getGEPNoWrapFlags() const {
970 return GEPNoWrapFlags::fromRaw(Flags: GEPFlagsStorage);
971 }
972
973 /// Returns true if the recipe has a comparison predicate.
974 bool hasPredicate() const {
975 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
976 }
977
978 /// Returns true if the recipe has fast-math flags.
979 bool hasFastMathFlags() const {
980 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
981 OpType == OperationType::ReductionOp;
982 }
983
984 LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const;
985
986 /// Returns true if the recipe has non-negative flag.
987 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
988
989 bool isNonNeg() const {
990 assert(OpType == OperationType::NonNegOp &&
991 "recipe doesn't have a NNEG flag");
992 return NonNegFlags.NonNeg;
993 }
994
995 bool hasNoUnsignedWrap() const {
996 switch (OpType) {
997 case OperationType::OverflowingBinOp:
998 return WrapFlags.HasNUW;
999 case OperationType::Trunc:
1000 return TruncFlags.HasNUW;
1001 default:
1002 llvm_unreachable("recipe doesn't have a NUW flag");
1003 }
1004 }
1005
1006 bool hasNoSignedWrap() const {
1007 switch (OpType) {
1008 case OperationType::OverflowingBinOp:
1009 return WrapFlags.HasNSW;
1010 case OperationType::Trunc:
1011 return TruncFlags.HasNSW;
1012 default:
1013 llvm_unreachable("recipe doesn't have a NSW flag");
1014 }
1015 }
1016
1017 bool isDisjoint() const {
1018 assert(OpType == OperationType::DisjointOp &&
1019 "recipe cannot have a disjoing flag");
1020 return DisjointFlags.IsDisjoint;
1021 }
1022
1023 RecurKind getRecurKind() const {
1024 assert(OpType == OperationType::ReductionOp &&
1025 "recipe doesn't have reduction flags");
1026 return static_cast<RecurKind>(ReductionFlags.Kind);
1027 }
1028
1029 bool isReductionOrdered() const {
1030 assert(OpType == OperationType::ReductionOp &&
1031 "recipe doesn't have reduction flags");
1032 return ReductionFlags.IsOrdered;
1033 }
1034
1035 bool isReductionInLoop() const {
1036 assert(OpType == OperationType::ReductionOp &&
1037 "recipe doesn't have reduction flags");
1038 return ReductionFlags.IsInLoop;
1039 }
1040
1041private:
1042 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1043 FastMathFlagsTy &getFMFsRef() {
1044 if (OpType == OperationType::FCmp)
1045 return FCmpFlags.FMFs;
1046 if (OpType == OperationType::ReductionOp)
1047 return ReductionFlags.FMFs;
1048 return FMFs;
1049 }
1050 const FastMathFlagsTy &getFMFsRef() const {
1051 if (OpType == OperationType::FCmp)
1052 return FCmpFlags.FMFs;
1053 if (OpType == OperationType::ReductionOp)
1054 return ReductionFlags.FMFs;
1055 return FMFs;
1056 }
1057
1058public:
1059 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1060 /// otherwise. Opcodes not supporting default flags include compares and
1061 /// ComputeReductionResult.
1062 static VPIRFlags getDefaultFlags(unsigned Opcode);
1063
1064#if !defined(NDEBUG)
1065 /// Returns true if the set flags are valid for \p Opcode.
1066 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1067
1068 /// Returns true if \p Opcode has its required flags set.
1069 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1070#endif
1071
1072#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1073 void printFlags(raw_ostream &O) const;
1074#endif
1075};
1076LLVM_PACKED_END
1077
1078static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1079
1080/// A pure-virtual common base class for recipes defining a single VPValue and
1081/// using IR flags.
1082struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
1083 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1084 const VPIRFlags &Flags,
1085 DebugLoc DL = DebugLoc::getUnknown())
1086 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1087
1088 static inline bool classof(const VPRecipeBase *R) {
1089 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1090 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1091 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1092 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1093 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1094 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1095 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1096 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1097 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1098 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1099 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1100 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC;
1101 }
1102
1103 static inline bool classof(const VPUser *U) {
1104 auto *R = dyn_cast<VPRecipeBase>(Val: U);
1105 return R && classof(R);
1106 }
1107
1108 static inline bool classof(const VPValue *V) {
1109 auto *R = V->getDefiningRecipe();
1110 return R && classof(R);
1111 }
1112
1113 VPRecipeWithIRFlags *clone() override = 0;
1114
1115 static inline bool classof(const VPSingleDefRecipe *R) {
1116 return classof(R: static_cast<const VPRecipeBase *>(R));
1117 }
1118
1119 void execute(VPTransformState &State) override = 0;
1120
1121 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1122 InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF,
1123 VPCostContext &Ctx) const;
1124};
1125
1126/// Helper to access the operand that contains the unroll part for this recipe
1127/// after unrolling.
1128template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
1129protected:
1130 /// Return the VPValue operand containing the unroll part or null if there is
1131 /// no such operand.
1132 VPValue *getUnrollPartOperand(const VPUser &U) const;
1133
1134 /// Return the unroll part.
1135 unsigned getUnrollPart(const VPUser &U) const;
1136};
1137
1138/// Helper to manage IR metadata for recipes. It filters out metadata that
1139/// cannot be propagated.
1140class VPIRMetadata {
1141 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
1142
1143public:
1144 VPIRMetadata() = default;
1145
1146 /// Adds metatadata that can be preserved from the original instruction
1147 /// \p I.
1148 VPIRMetadata(Instruction &I) { getMetadataToPropagate(Inst: &I, Metadata); }
1149
1150 /// Copy constructor for cloning.
1151 VPIRMetadata(const VPIRMetadata &Other) = default;
1152
1153 VPIRMetadata &operator=(const VPIRMetadata &Other) = default;
1154
1155 /// Add all metadata to \p I.
1156 void applyMetadata(Instruction &I) const;
1157
1158 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1159 /// already exists, it will be replaced. Otherwise, it will be added.
1160 void setMetadata(unsigned Kind, MDNode *Node) {
1161 auto It =
1162 llvm::find_if(Range&: Metadata, P: [Kind](const std::pair<unsigned, MDNode *> &P) {
1163 return P.first == Kind;
1164 });
1165 if (It != Metadata.end())
1166 It->second = Node;
1167 else
1168 Metadata.emplace_back(Args&: Kind, Args&: Node);
1169 }
1170
1171 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1172 /// nodes that are common to both.
1173 void intersect(const VPIRMetadata &MD);
1174
1175 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1176 MDNode *getMetadata(unsigned Kind) const {
1177 auto It =
1178 find_if(Range: Metadata, P: [Kind](const auto &P) { return P.first == Kind; });
1179 return It != Metadata.end() ? It->second : nullptr;
1180 }
1181
1182#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1183 /// Print metadata with node IDs.
1184 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1185#endif
1186};
1187
1188/// This is a concrete Recipe that models a single VPlan-level instruction.
1189/// While as any Recipe it may generate a sequence of IR instructions when
1190/// executed, these instructions would always form a single-def expression as
1191/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1192/// opcodes can take an optional mask. Masks may be assigned during
1193/// predication.
1194class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
1195 public VPIRMetadata {
1196 friend class VPlanSlp;
1197
1198public:
1199 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1200 enum {
1201 FirstOrderRecurrenceSplice =
1202 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1203 // values of a first-order recurrence.
1204 Not,
1205 SLPLoad,
1206 SLPStore,
1207 // Creates a mask where each lane is active (true) whilst the current
1208 // counter (first operand + index) is less than the second operand. i.e.
1209 // mask[i] = icmpt ult (op0 + i), op1
1210 // The size of the mask returned is VF * Multiplier (UF, third op).
1211 ActiveLaneMask,
1212 ExplicitVectorLength,
1213 CalculateTripCountMinusVF,
1214 // Increment the canonical IV separately for each unrolled part.
1215 CanonicalIVIncrementForPart,
1216 // Abstract instruction that compares two values and branches. This is
1217 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1218 BranchOnCount,
1219 BranchOnCond,
1220 // Branch with 2 boolean condition operands and 3 successors. If condition
1221 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1222 // successor 1; otherwise branches to successor 2. Expanded after region
1223 // dissolution into: (1) an OR of the two conditions branching to
1224 // middle.split or successor 2, and (2) middle.split branching to successor
1225 // 0 or successor 1 based on condition 0.
1226 BranchOnTwoConds,
1227 Broadcast,
1228 /// Given operands of (the same) struct type, creates a struct of fixed-
1229 /// width vectors each containing a struct field of all operands. The
1230 /// number of operands matches the element count of every vector.
1231 BuildStructVector,
1232 /// Creates a fixed-width vector containing all operands. The number of
1233 /// operands matches the vector element count.
1234 BuildVector,
1235 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1236 /// abstract VPInstruction whose single defined VPValue represents VF
1237 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1238 /// VPInstructions.
1239 Unpack,
1240 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1241 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1242 ComputeAnyOfResult,
1243 ComputeReductionResult,
1244 // Extracts the last part of its operand. Removed during unrolling.
1245 ExtractLastPart,
1246 // Extracts the last lane of its vector operand, per part.
1247 ExtractLastLane,
1248 // Extracts the second-to-last lane from its operand or the second-to-last
1249 // part if it is scalar. In the latter case, the recipe will be removed
1250 // during unrolling.
1251 ExtractPenultimateElement,
1252 LogicalAnd, // Non-poison propagating logical And.
1253 LogicalOr, // Non-poison propagating logical Or.
1254 // Add an offset in bytes (second operand) to a base pointer (first
1255 // operand). Only generates scalar values (either for the first lane only or
1256 // for all lanes, depending on its uses).
1257 PtrAdd,
1258 // Add a vector offset in bytes (second operand) to a scalar base pointer
1259 // (first operand).
1260 WidePtrAdd,
1261 // Returns a scalar boolean value, which is true if any lane of its
1262 // (boolean) vector operands is true. It produces the reduced value across
1263 // all unrolled iterations. Unrolling will add all copies of its original
1264 // operand as additional operands. AnyOf is poison-safe as all operands
1265 // will be frozen.
1266 AnyOf,
1267 // Calculates the first active lane index of the vector predicate operands.
1268 // It produces the lane index across all unrolled iterations. Unrolling will
1269 // add all copies of its original operand as additional operands.
1270 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1271 // result even with operands that are all zeroes.
1272 FirstActiveLane,
1273 // Calculates the last active lane index of the vector predicate operands.
1274 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1275 // tail-folding to extract the correct live-out value from the last active
1276 // iteration. It produces the lane index across all unrolled iterations.
1277 // Unrolling will add all copies of its original operand as additional
1278 // operands.
1279 LastActiveLane,
1280 // Returns a reversed vector for the operand.
1281 Reverse,
1282
1283 // The opcodes below are used for VPInstructionWithType.
1284 //
1285 /// Scale the first operand (vector step) by the second operand
1286 /// (scalar-step). Casts both operands to the result type if needed.
1287 WideIVStep,
1288 /// Start vector for reductions with 3 operands: the original start value,
1289 /// the identity value for the reduction and an integer indicating the
1290 /// scaling factor.
1291 ReductionStartVector,
1292 // Creates a step vector starting from 0 to VF with a step of 1.
1293 StepVector,
1294 /// Extracts a single lane (first operand) from a set of vector operands.
1295 /// The lane specifies an index into a vector formed by combining all vector
1296 /// operands (all operands after the first one).
1297 ExtractLane,
1298 /// Explicit user for the resume phi of the canonical induction in the main
1299 /// VPlan, used by the epilogue vector loop.
1300 ResumeForEpilogue,
1301 /// Extracts the last active lane from a set of vectors. The first operand
1302 /// is the default value if no lanes in the masks are active. Conceptually,
1303 /// this concatenates all data vectors (odd operands), concatenates all
1304 /// masks (even operands -- ignoring the default value), and returns the
1305 /// last active value from the combined data vector using the combined mask.
1306 ExtractLastActive,
1307
1308 /// Returns the value for vscale.
1309 VScale,
1310 /// Compute the exiting value of a wide induction after vectorization, that
1311 /// is the value of the last lane of the induction increment (i.e. its
1312 /// backedge value). Has the wide induction recipe as operand.
1313 ExitingIVValue,
1314 MaskedCond,
1315 OpsEnd = MaskedCond,
1316 };
1317
1318 /// Returns true if this VPInstruction generates scalar values for all lanes.
1319 /// Most VPInstructions generate a single value per part, either vector or
1320 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1321 /// values per all lanes, stemming from an original ingredient. This method
1322 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1323 /// underlying ingredient.
1324 bool doesGeneratePerAllLanes() const;
1325
1326 /// Return the number of operands determined by the opcode of the
1327 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1328 /// cannot be determined directly by the opcode.
1329 unsigned getNumOperandsForOpcode() const;
1330
1331private:
1332 typedef unsigned char OpcodeTy;
1333 OpcodeTy Opcode;
1334
1335 /// An optional name that can be used for the generated IR instruction.
1336 std::string Name;
1337
1338 /// Returns true if we can generate a scalar for the first lane only if
1339 /// needed.
1340 bool canGenerateScalarForFirstLane() const;
1341
1342 /// Utility methods serving execute(): generates a single vector instance of
1343 /// the modeled instruction. \returns the generated value. . In some cases an
1344 /// existing value is returned rather than a generated one.
1345 Value *generate(VPTransformState &State);
1346
1347 /// Returns true if the VPInstruction does not need masking.
1348 bool alwaysUnmasked() const {
1349 if (Opcode == VPInstruction::MaskedCond)
1350 return false;
1351
1352 // For now only VPInstructions with underlying values use masks.
1353 // TODO: provide masks to VPInstructions w/o underlying values.
1354 if (!getUnderlyingValue())
1355 return true;
1356
1357 return Opcode == Instruction::PHI || Opcode == Instruction::GetElementPtr;
1358 }
1359
1360public:
1361 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1362 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1363 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1364
1365 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1366
1367 VPInstruction *clone() override {
1368 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1369 getDebugLoc(), Name);
1370 if (getUnderlyingValue())
1371 New->setUnderlyingValue(getUnderlyingInstr());
1372 return New;
1373 }
1374
1375 unsigned getOpcode() const { return Opcode; }
1376
1377 /// Generate the instruction.
1378 /// TODO: We currently execute only per-part unless a specific instance is
1379 /// provided.
1380 void execute(VPTransformState &State) override;
1381
1382 /// Return the cost of this VPInstruction.
1383 InstructionCost computeCost(ElementCount VF,
1384 VPCostContext &Ctx) const override;
1385
1386#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1387 /// Print the VPInstruction to dbgs() (for debugging).
1388 LLVM_DUMP_METHOD void dump() const;
1389#endif
1390
1391 bool hasResult() const {
1392 // CallInst may or may not have a result, depending on the called function.
1393 // Conservatively return calls have results for now.
1394 switch (getOpcode()) {
1395 case Instruction::Ret:
1396 case Instruction::UncondBr:
1397 case Instruction::CondBr:
1398 case Instruction::Store:
1399 case Instruction::Switch:
1400 case Instruction::IndirectBr:
1401 case Instruction::Resume:
1402 case Instruction::CatchRet:
1403 case Instruction::Unreachable:
1404 case Instruction::Fence:
1405 case Instruction::AtomicRMW:
1406 case VPInstruction::BranchOnCond:
1407 case VPInstruction::BranchOnTwoConds:
1408 case VPInstruction::BranchOnCount:
1409 return false;
1410 default:
1411 return true;
1412 }
1413 }
1414
1415 /// Returns true if the VPInstruction has a mask operand.
1416 bool isMasked() const {
1417 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1418 // VPInstructions without a fixed number of operands cannot be masked.
1419 if (NumOpsForOpcode == -1u)
1420 return false;
1421 return NumOpsForOpcode + 1 == getNumOperands();
1422 }
1423
1424 /// Returns the number of operands, excluding the mask if the VPInstruction is
1425 /// masked.
1426 unsigned getNumOperandsWithoutMask() const {
1427 return getNumOperands() - isMasked();
1428 }
1429
1430 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1431 void addMask(VPValue *Mask) {
1432 assert(!isMasked() && "recipe is already masked");
1433 if (alwaysUnmasked())
1434 return;
1435 addOperand(Operand: Mask);
1436 }
1437
1438 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1439 /// VPInstructions.
1440 VPValue *getMask() const {
1441 return isMasked() ? getOperand(N: getNumOperands() - 1) : nullptr;
1442 }
1443
1444 /// Returns an iterator range over the operands excluding the mask operand
1445 /// if present.
1446 iterator_range<operand_iterator> operandsWithoutMask() {
1447 return make_range(x: op_begin(), y: op_begin() + getNumOperandsWithoutMask());
1448 }
1449 iterator_range<const_operand_iterator> operandsWithoutMask() const {
1450 return make_range(x: op_begin(), y: op_begin() + getNumOperandsWithoutMask());
1451 }
1452
1453 /// Returns true if the underlying opcode may read from or write to memory.
1454 bool opcodeMayReadOrWriteFromMemory() const;
1455
1456 /// Returns true if the recipe only uses the first lane of operand \p Op.
1457 bool usesFirstLaneOnly(const VPValue *Op) const override;
1458
1459 /// Returns true if the recipe only uses the first part of operand \p Op.
1460 bool usesFirstPartOnly(const VPValue *Op) const override;
1461
1462 /// Returns true if this VPInstruction produces a scalar value from a vector,
1463 /// e.g. by performing a reduction or extracting a lane.
1464 bool isVectorToScalar() const;
1465
1466 /// Returns true if this VPInstruction's operands are single scalars and the
1467 /// result is also a single scalar.
1468 bool isSingleScalar() const;
1469
1470 /// Returns the symbolic name assigned to the VPInstruction.
1471 StringRef getName() const { return Name; }
1472
1473 /// Set the symbolic name for the VPInstruction.
1474 void setName(StringRef NewName) { Name = NewName.str(); }
1475
1476protected:
1477#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1478 /// Print the VPInstruction to \p O.
1479 void printRecipe(raw_ostream &O, const Twine &Indent,
1480 VPSlotTracker &SlotTracker) const override;
1481#endif
1482};
1483
1484/// A specialization of VPInstruction augmenting it with a dedicated result
1485/// type, to be used when the opcode and operands of the VPInstruction don't
1486/// directly determine the result type. Note that there is no separate recipe ID
1487/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1488/// distinguished purely by the opcode.
1489class VPInstructionWithType : public VPInstruction {
1490 /// Scalar result type produced by the recipe.
1491 Type *ResultTy;
1492
1493public:
1494 VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
1495 Type *ResultTy, const VPIRFlags &Flags = {},
1496 const VPIRMetadata &Metadata = {},
1497 DebugLoc DL = DebugLoc::getUnknown(),
1498 const Twine &Name = "")
1499 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1500 ResultTy(ResultTy) {}
1501
1502 static inline bool classof(const VPRecipeBase *R) {
1503 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1504 // type information.
1505 if (R->isScalarCast())
1506 return true;
1507 auto *VPI = dyn_cast<VPInstruction>(Val: R);
1508 if (!VPI)
1509 return false;
1510 switch (VPI->getOpcode()) {
1511 case VPInstruction::WideIVStep:
1512 case VPInstruction::StepVector:
1513 case VPInstruction::VScale:
1514 case Instruction::Load:
1515 return true;
1516 default:
1517 return false;
1518 }
1519 }
1520
1521 static inline bool classof(const VPUser *R) {
1522 return isa<VPInstructionWithType>(Val: cast<VPRecipeBase>(Val: R));
1523 }
1524
1525 VPInstruction *clone() override {
1526 auto *New =
1527 new VPInstructionWithType(getOpcode(), operands(), getResultType(),
1528 *this, *this, getDebugLoc(), getName());
1529 New->setUnderlyingValue(getUnderlyingValue());
1530 return New;
1531 }
1532
1533 void execute(VPTransformState &State) override;
1534
1535 /// Return the cost of this VPInstruction.
1536 InstructionCost computeCost(ElementCount VF,
1537 VPCostContext &Ctx) const override {
1538 // TODO: Compute accurate cost after retiring the legacy cost model.
1539 return 0;
1540 }
1541
1542 Type *getResultType() const { return ResultTy; }
1543
1544protected:
1545#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1546 /// Print the recipe.
1547 void printRecipe(raw_ostream &O, const Twine &Indent,
1548 VPSlotTracker &SlotTracker) const override;
1549#endif
1550};
1551
1552/// Helper type to provide functions to access incoming values and blocks for
1553/// phi-like recipes.
1554class VPPhiAccessors {
1555protected:
1556 /// Return a VPRecipeBase* to the current object.
1557 virtual const VPRecipeBase *getAsRecipe() const = 0;
1558
1559public:
1560 virtual ~VPPhiAccessors() = default;
1561
1562 /// Returns the incoming VPValue with index \p Idx.
1563 VPValue *getIncomingValue(unsigned Idx) const {
1564 return getAsRecipe()->getOperand(N: Idx);
1565 }
1566
1567 /// Returns the incoming block with index \p Idx.
1568 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1569
1570 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1571 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1572
1573 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1574 /// block.
1575 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1576
1577 /// Returns the number of incoming values, also number of incoming blocks.
1578 virtual unsigned getNumIncoming() const {
1579 return getAsRecipe()->getNumOperands();
1580 }
1581
1582 /// Returns an interator range over the incoming values.
1583 VPUser::const_operand_range incoming_values() const {
1584 return make_range(x: getAsRecipe()->op_begin(),
1585 y: getAsRecipe()->op_begin() + getNumIncoming());
1586 }
1587
1588 using const_incoming_blocks_range = iterator_range<mapped_iterator<
1589 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1590
1591 /// Returns an iterator range over the incoming blocks.
1592 const_incoming_blocks_range incoming_blocks() const {
1593 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1594 return getIncomingBlock(Idx);
1595 };
1596 return map_range(C: index_range(0, getNumIncoming()), F: GetBlock);
1597 }
1598
1599 /// Returns an iterator range over pairs of incoming values and corresponding
1600 /// incoming blocks.
1601 detail::zippy<llvm::detail::zip_first, VPUser::const_operand_range,
1602 const_incoming_blocks_range>
1603 incoming_values_and_blocks() const {
1604 return zip_equal(t: incoming_values(), u: incoming_blocks());
1605 }
1606
1607 /// Removes the incoming value for \p IncomingBlock, which must be a
1608 /// predecessor.
1609 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1610
1611#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1612 /// Print the recipe.
1613 void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1614#endif
1615};
1616
1617struct LLVM_ABI_FOR_TEST VPPhi : public VPInstruction, public VPPhiAccessors {
1618 VPPhi(ArrayRef<VPValue *> Operands, const VPIRFlags &Flags, DebugLoc DL,
1619 const Twine &Name = "")
1620 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name) {}
1621
1622 static inline bool classof(const VPUser *U) {
1623 auto *VPI = dyn_cast<VPInstruction>(Val: U);
1624 return VPI && VPI->getOpcode() == Instruction::PHI;
1625 }
1626
1627 static inline bool classof(const VPValue *V) {
1628 auto *VPI = dyn_cast<VPInstruction>(Val: V);
1629 return VPI && VPI->getOpcode() == Instruction::PHI;
1630 }
1631
1632 static inline bool classof(const VPSingleDefRecipe *SDR) {
1633 auto *VPI = dyn_cast<VPInstruction>(Val: SDR);
1634 return VPI && VPI->getOpcode() == Instruction::PHI;
1635 }
1636
1637 VPPhi *clone() override {
1638 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1639 PhiR->setUnderlyingValue(getUnderlyingValue());
1640 return PhiR;
1641 }
1642
1643 void execute(VPTransformState &State) override;
1644
1645protected:
1646#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1647 /// Print the recipe.
1648 void printRecipe(raw_ostream &O, const Twine &Indent,
1649 VPSlotTracker &SlotTracker) const override;
1650#endif
1651
1652 const VPRecipeBase *getAsRecipe() const override { return this; }
1653};
1654
1655/// A recipe to wrap on original IR instruction not to be modified during
1656/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1657/// Expect PHIs, VPIRInstructions cannot have any operands.
1658class VPIRInstruction : public VPRecipeBase {
1659 Instruction &I;
1660
1661protected:
1662 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1663 /// subclasses may need to be created, e.g. VPIRPhi.
1664 VPIRInstruction(Instruction &I)
1665 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1666
1667public:
1668 ~VPIRInstruction() override = default;
1669
1670 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1671 /// VPIRInstruction.
1672 LLVM_ABI_FOR_TEST static VPIRInstruction *create(Instruction &I);
1673
1674 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1675
1676 VPIRInstruction *clone() override {
1677 auto *R = create(I);
1678 for (auto *Op : operands())
1679 R->addOperand(Operand: Op);
1680 return R;
1681 }
1682
1683 void execute(VPTransformState &State) override;
1684
1685 /// Return the cost of this VPIRInstruction.
1686 LLVM_ABI_FOR_TEST InstructionCost
1687 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1688
1689 Instruction &getInstruction() const { return I; }
1690
1691 bool usesScalars(const VPValue *Op) const override {
1692 assert(is_contained(operands(), Op) &&
1693 "Op must be an operand of the recipe");
1694 return true;
1695 }
1696
1697 bool usesFirstPartOnly(const VPValue *Op) const override {
1698 assert(is_contained(operands(), Op) &&
1699 "Op must be an operand of the recipe");
1700 return true;
1701 }
1702
1703 bool usesFirstLaneOnly(const VPValue *Op) const override {
1704 assert(is_contained(operands(), Op) &&
1705 "Op must be an operand of the recipe");
1706 return true;
1707 }
1708
1709protected:
1710#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1711 /// Print the recipe.
1712 void printRecipe(raw_ostream &O, const Twine &Indent,
1713 VPSlotTracker &SlotTracker) const override;
1714#endif
1715};
1716
1717/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1718/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1719/// allowed, and it is used to add a new incoming value for the single
1720/// predecessor VPBB.
1721struct LLVM_ABI_FOR_TEST VPIRPhi : public VPIRInstruction,
1722 public VPPhiAccessors {
1723 VPIRPhi(PHINode &PN) : VPIRInstruction(PN) {}
1724
1725 static inline bool classof(const VPRecipeBase *U) {
1726 auto *R = dyn_cast<VPIRInstruction>(Val: U);
1727 return R && isa<PHINode>(Val: R->getInstruction());
1728 }
1729
1730 static inline bool classof(const VPUser *U) {
1731 auto *R = dyn_cast<VPRecipeBase>(Val: U);
1732 return R && classof(U: R);
1733 }
1734
1735 PHINode &getIRPhi() { return cast<PHINode>(Val&: getInstruction()); }
1736
1737 void execute(VPTransformState &State) override;
1738
1739protected:
1740#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1741 /// Print the recipe.
1742 void printRecipe(raw_ostream &O, const Twine &Indent,
1743 VPSlotTracker &SlotTracker) const override;
1744#endif
1745
1746 const VPRecipeBase *getAsRecipe() const override { return this; }
1747};
1748
1749/// VPWidenRecipe is a recipe for producing a widened instruction using the
1750/// opcode and operands of the recipe. This recipe covers most of the
1751/// traditional vectorization cases where each recipe transforms into a
1752/// vectorized version of itself.
1753class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags,
1754 public VPIRMetadata {
1755 unsigned Opcode;
1756
1757public:
1758 VPWidenRecipe(Instruction &I, ArrayRef<VPValue *> Operands,
1759 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1760 DebugLoc DL = {})
1761 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1762 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1763 setUnderlyingValue(&I);
1764 }
1765
1766 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1767 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1768 DebugLoc DL = {})
1769 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1770 VPIRMetadata(Metadata), Opcode(Opcode) {}
1771
1772 ~VPWidenRecipe() override = default;
1773
1774 VPWidenRecipe *clone() override {
1775 if (auto *UV = getUnderlyingValue())
1776 return new VPWidenRecipe(*cast<Instruction>(Val: UV), operands(), *this, *this,
1777 getDebugLoc());
1778 return new VPWidenRecipe(Opcode, operands(), *this, *this, getDebugLoc());
1779 }
1780
1781 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1782
1783 /// Produce a widened instruction using the opcode and operands of the recipe,
1784 /// processing State.VF elements.
1785 void execute(VPTransformState &State) override;
1786
1787 /// Return the cost of this VPWidenRecipe.
1788 InstructionCost computeCost(ElementCount VF,
1789 VPCostContext &Ctx) const override;
1790
1791 unsigned getOpcode() const { return Opcode; }
1792
1793protected:
1794#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1795 /// Print the recipe.
1796 void printRecipe(raw_ostream &O, const Twine &Indent,
1797 VPSlotTracker &SlotTracker) const override;
1798#endif
1799
1800 /// Returns true if the recipe only uses the first lane of operand \p Op.
1801 bool usesFirstLaneOnly(const VPValue *Op) const override {
1802 assert(is_contained(operands(), Op) &&
1803 "Op must be an operand of the recipe");
1804 return Opcode == Instruction::Select && Op == getOperand(N: 0) &&
1805 Op->isDefinedOutsideLoopRegions();
1806 }
1807};
1808
1809/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1810class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
1811 /// Cast instruction opcode.
1812 Instruction::CastOps Opcode;
1813
1814 /// Result type for the cast.
1815 Type *ResultTy;
1816
1817public:
1818 VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1819 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1820 const VPIRMetadata &Metadata = {},
1821 DebugLoc DL = DebugLoc::getUnknown())
1822 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, Flags, DL),
1823 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1824 assert(flagsValidForOpcode(Opcode) &&
1825 "Set flags not supported for the provided opcode");
1826 assert(hasRequiredFlagsForOpcode(Opcode) &&
1827 "Opcode requires specific flags to be set");
1828 setUnderlyingValue(CI);
1829 }
1830
1831 ~VPWidenCastRecipe() override = default;
1832
1833 VPWidenCastRecipe *clone() override {
1834 return new VPWidenCastRecipe(Opcode, getOperand(N: 0), ResultTy,
1835 cast_or_null<CastInst>(Val: getUnderlyingValue()),
1836 *this, *this, getDebugLoc());
1837 }
1838
1839 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1840
1841 /// Produce widened copies of the cast.
1842 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1843
1844 /// Return the cost of this VPWidenCastRecipe.
1845 LLVM_ABI_FOR_TEST InstructionCost
1846 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1847
1848 Instruction::CastOps getOpcode() const { return Opcode; }
1849
1850 /// Returns the result type of the cast.
1851 Type *getResultType() const { return ResultTy; }
1852
1853protected:
1854#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1855 /// Print the recipe.
1856 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1857 VPSlotTracker &SlotTracker) const override;
1858#endif
1859};
1860
1861/// A recipe for widening vector intrinsics.
1862class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
1863 /// ID of the vector intrinsic to widen.
1864 Intrinsic::ID VectorIntrinsicID;
1865
1866 /// Scalar return type of the intrinsic.
1867 Type *ResultTy;
1868
1869 /// True if the intrinsic may read from memory.
1870 bool MayReadFromMemory;
1871
1872 /// True if the intrinsic may read write to memory.
1873 bool MayWriteToMemory;
1874
1875 /// True if the intrinsic may have side-effects.
1876 bool MayHaveSideEffects;
1877
1878public:
1879 VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
1880 ArrayRef<VPValue *> CallArguments, Type *Ty,
1881 const VPIRFlags &Flags = {},
1882 const VPIRMetadata &MD = {},
1883 DebugLoc DL = DebugLoc::getUnknown())
1884 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1885 Flags, DL),
1886 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1887 MayReadFromMemory(CI.mayReadFromMemory()),
1888 MayWriteToMemory(CI.mayWriteToMemory()),
1889 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1890 setUnderlyingValue(&CI);
1891 }
1892
1893 VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
1894 ArrayRef<VPValue *> CallArguments, Type *Ty,
1895 const VPIRFlags &Flags = {},
1896 const VPIRMetadata &Metadata = {},
1897 DebugLoc DL = DebugLoc::getUnknown())
1898 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1899 Flags, DL),
1900 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1901 ResultTy(Ty) {
1902 LLVMContext &Ctx = Ty->getContext();
1903 AttributeSet Attrs = Intrinsic::getFnAttributes(C&: Ctx, id: VectorIntrinsicID);
1904 MemoryEffects ME = Attrs.getMemoryEffects();
1905 MayReadFromMemory = !ME.onlyWritesMemory();
1906 MayWriteToMemory = !ME.onlyReadsMemory();
1907 MayHaveSideEffects = MayWriteToMemory ||
1908 !Attrs.hasAttribute(Kind: Attribute::NoUnwind) ||
1909 !Attrs.hasAttribute(Kind: Attribute::WillReturn);
1910 }
1911
1912 ~VPWidenIntrinsicRecipe() override = default;
1913
1914 VPWidenIntrinsicRecipe *clone() override {
1915 if (Value *CI = getUnderlyingValue())
1916 return new VPWidenIntrinsicRecipe(*cast<CallInst>(Val: CI), VectorIntrinsicID,
1917 operands(), ResultTy, *this, *this,
1918 getDebugLoc());
1919 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1920 *this, *this, getDebugLoc());
1921 }
1922
1923 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntrinsicSC)
1924
1925 /// Produce a widened version of the vector intrinsic.
1926 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1927
1928 /// Return the cost of this vector intrinsic.
1929 LLVM_ABI_FOR_TEST InstructionCost
1930 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1931
1932 /// Return the ID of the intrinsic.
1933 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1934
1935 /// Return the scalar return type of the intrinsic.
1936 Type *getResultType() const { return ResultTy; }
1937
1938 /// Return to name of the intrinsic as string.
1939 StringRef getIntrinsicName() const;
1940
1941 /// Returns true if the intrinsic may read from memory.
1942 bool mayReadFromMemory() const { return MayReadFromMemory; }
1943
1944 /// Returns true if the intrinsic may write to memory.
1945 bool mayWriteToMemory() const { return MayWriteToMemory; }
1946
1947 /// Returns true if the intrinsic may have side-effects.
1948 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1949
1950 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1951
1952protected:
1953#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1954 /// Print the recipe.
1955 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1956 VPSlotTracker &SlotTracker) const override;
1957#endif
1958};
1959
1960/// A recipe for widening Call instructions using library calls.
1961class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags,
1962 public VPIRMetadata {
1963 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1964 /// between a given VF and the chosen vectorized variant, so there will be a
1965 /// different VPlan for each VF with a valid variant.
1966 Function *Variant;
1967
1968public:
1969 VPWidenCallRecipe(Value *UV, Function *Variant,
1970 ArrayRef<VPValue *> CallArguments,
1971 const VPIRFlags &Flags = {},
1972 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1973 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments, Flags,
1974 DL),
1975 VPIRMetadata(Metadata), Variant(Variant) {
1976 setUnderlyingValue(UV);
1977 assert(
1978 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1979 "last operand must be the called function");
1980 }
1981
1982 ~VPWidenCallRecipe() override = default;
1983
1984 VPWidenCallRecipe *clone() override {
1985 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1986 *this, *this, getDebugLoc());
1987 }
1988
1989 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
1990
1991 /// Produce a widened version of the call instruction.
1992 void execute(VPTransformState &State) override;
1993
1994 /// Return the cost of this VPWidenCallRecipe.
1995 InstructionCost computeCost(ElementCount VF,
1996 VPCostContext &Ctx) const override;
1997
1998 Function *getCalledScalarFunction() const {
1999 return cast<Function>(Val: getOperand(N: getNumOperands() - 1)->getLiveInIRValue());
2000 }
2001
2002 operand_range args() { return drop_end(RangeOrContainer: operands()); }
2003 const_operand_range args() const { return drop_end(RangeOrContainer: operands()); }
2004
2005protected:
2006#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2007 /// Print the recipe.
2008 void printRecipe(raw_ostream &O, const Twine &Indent,
2009 VPSlotTracker &SlotTracker) const override;
2010#endif
2011};
2012
2013/// A recipe representing a sequence of load -> update -> store as part of
2014/// a histogram operation. This means there may be aliasing between vector
2015/// lanes, which is handled by the llvm.experimental.vector.histogram family
2016/// of intrinsics. The only update operations currently supported are
2017/// 'add' and 'sub' where the other term is loop-invariant.
2018class VPHistogramRecipe : public VPRecipeBase {
2019 /// Opcode of the update operation, currently either add or sub.
2020 unsigned Opcode;
2021
2022public:
2023 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2024 DebugLoc DL = DebugLoc::getUnknown())
2025 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2026 Opcode(Opcode) {}
2027
2028 ~VPHistogramRecipe() override = default;
2029
2030 VPHistogramRecipe *clone() override {
2031 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
2032 }
2033
2034 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2035
2036 /// Produce a vectorized histogram operation.
2037 void execute(VPTransformState &State) override;
2038
2039 /// Return the cost of this VPHistogramRecipe.
2040 InstructionCost computeCost(ElementCount VF,
2041 VPCostContext &Ctx) const override;
2042
2043 unsigned getOpcode() const { return Opcode; }
2044
2045 /// Return the mask operand if one was provided, or a null pointer if all
2046 /// lanes should be executed unconditionally.
2047 VPValue *getMask() const {
2048 return getNumOperands() == 3 ? getOperand(N: 2) : nullptr;
2049 }
2050
2051protected:
2052#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2053 /// Print the recipe
2054 void printRecipe(raw_ostream &O, const Twine &Indent,
2055 VPSlotTracker &SlotTracker) const override;
2056#endif
2057};
2058
2059/// A recipe for handling GEP instructions.
2060class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
2061 Type *SourceElementTy;
2062
2063 bool isPointerLoopInvariant() const {
2064 return getOperand(N: 0)->isDefinedOutsideLoopRegions();
2065 }
2066
2067 bool isIndexLoopInvariant(unsigned I) const {
2068 return getOperand(N: I + 1)->isDefinedOutsideLoopRegions();
2069 }
2070
2071public:
2072 VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands,
2073 const VPIRFlags &Flags = {},
2074 DebugLoc DL = DebugLoc::getUnknown())
2075 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands, Flags, DL),
2076 SourceElementTy(GEP->getSourceElementType()) {
2077 setUnderlyingValue(GEP);
2078 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
2079 (void)Metadata;
2080 getMetadataToPropagate(Inst: GEP, Metadata);
2081 assert(Metadata.empty() && "unexpected metadata on GEP");
2082 }
2083
2084 ~VPWidenGEPRecipe() override = default;
2085
2086 VPWidenGEPRecipe *clone() override {
2087 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(Val: getUnderlyingInstr()),
2088 operands(), *this, getDebugLoc());
2089 }
2090
2091 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2092
2093 /// This recipe generates a GEP instruction.
2094 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2095
2096 /// Generate the gep nodes.
2097 void execute(VPTransformState &State) override;
2098
2099 Type *getSourceElementType() const { return SourceElementTy; }
2100
2101 /// Return the cost of this VPWidenGEPRecipe.
2102 InstructionCost computeCost(ElementCount VF,
2103 VPCostContext &Ctx) const override {
2104 // TODO: Compute accurate cost after retiring the legacy cost model.
2105 return 0;
2106 }
2107
2108 /// Returns true if the recipe only uses the first lane of operand \p Op.
2109 bool usesFirstLaneOnly(const VPValue *Op) const override;
2110
2111protected:
2112#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2113 /// Print the recipe.
2114 void printRecipe(raw_ostream &O, const Twine &Indent,
2115 VPSlotTracker &SlotTracker) const override;
2116#endif
2117};
2118
2119/// A recipe to compute a pointer to the last element of each part of a widened
2120/// memory access for widened memory accesses of SourceElementTy. Used for
2121/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2122/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2123/// unroller otherwise.
2124class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags {
2125 Type *SourceElementTy;
2126
2127 /// The constant stride of the pointer computed by this recipe, expressed in
2128 /// units of SourceElementTy.
2129 int64_t Stride;
2130
2131public:
2132 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2133 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2134 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2135 GEPFlags, DL),
2136 SourceElementTy(SourceElementTy), Stride(Stride) {
2137 assert(Stride < 0 && "Stride must be negative");
2138 }
2139
2140 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2141
2142 Type *getSourceElementType() const { return SourceElementTy; }
2143 int64_t getStride() const { return Stride; }
2144 VPValue *getPointer() const { return getOperand(N: 0); }
2145 VPValue *getVFValue() const { return getOperand(N: 1); }
2146 VPValue *getOffset() const {
2147 return getNumOperands() == 3 ? getOperand(N: 2) : nullptr;
2148 }
2149
2150 /// Adds the offset operand to the recipe.
2151 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2152 void materializeOffset(unsigned Part = 0);
2153
2154 void execute(VPTransformState &State) override;
2155
2156 bool usesFirstLaneOnly(const VPValue *Op) const override {
2157 assert(is_contained(operands(), Op) &&
2158 "Op must be an operand of the recipe");
2159 return true;
2160 }
2161
2162 /// Return the cost of this VPVectorPointerRecipe.
2163 InstructionCost computeCost(ElementCount VF,
2164 VPCostContext &Ctx) const override {
2165 // TODO: Compute accurate cost after retiring the legacy cost model.
2166 return 0;
2167 }
2168
2169 /// Returns true if the recipe only uses the first part of operand \p Op.
2170 bool usesFirstPartOnly(const VPValue *Op) const override {
2171 assert(is_contained(operands(), Op) &&
2172 "Op must be an operand of the recipe");
2173 assert(getNumOperands() <= 2 && "must have at most two operands");
2174 return true;
2175 }
2176
2177 VPVectorEndPointerRecipe *clone() override {
2178 auto *VEPR = new VPVectorEndPointerRecipe(
2179 getPointer(), getVFValue(), getSourceElementType(), getStride(),
2180 getGEPNoWrapFlags(), getDebugLoc());
2181 if (auto *Offset = getOffset())
2182 VEPR->addOperand(Operand: Offset);
2183 return VEPR;
2184 }
2185
2186protected:
2187#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2188 /// Print the recipe.
2189 void printRecipe(raw_ostream &O, const Twine &Indent,
2190 VPSlotTracker &SlotTracker) const override;
2191#endif
2192};
2193
2194/// A recipe to compute the pointers for widened memory accesses of \p
2195/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
2196/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
2197class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
2198 Type *SourceElementTy;
2199
2200public:
2201 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
2202 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2203 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC, Ptr, GEPFlags, DL),
2204 SourceElementTy(SourceElementTy) {}
2205
2206 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2207
2208 VPValue *getOffset() {
2209 return getNumOperands() == 2 ? getOperand(N: 1) : nullptr;
2210 }
2211
2212 void execute(VPTransformState &State) override;
2213
2214 Type *getSourceElementType() const { return SourceElementTy; }
2215
2216 bool usesFirstLaneOnly(const VPValue *Op) const override {
2217 assert(is_contained(operands(), Op) &&
2218 "Op must be an operand of the recipe");
2219 return true;
2220 }
2221
2222 /// Returns true if the recipe only uses the first part of operand \p Op.
2223 bool usesFirstPartOnly(const VPValue *Op) const override {
2224 assert(is_contained(operands(), Op) &&
2225 "Op must be an operand of the recipe");
2226 assert(getNumOperands() <= 2 && "must have at most two operands");
2227 return true;
2228 }
2229
2230 VPVectorPointerRecipe *clone() override {
2231 auto *Clone = new VPVectorPointerRecipe(getOperand(N: 0), SourceElementTy,
2232 getGEPNoWrapFlags(), getDebugLoc());
2233 if (auto *Off = getOffset())
2234 Clone->addOperand(Operand: Off);
2235 return Clone;
2236 }
2237
2238 /// Return the cost of this VPHeaderPHIRecipe.
2239 InstructionCost computeCost(ElementCount VF,
2240 VPCostContext &Ctx) const override {
2241 // TODO: Compute accurate cost after retiring the legacy cost model.
2242 return 0;
2243 }
2244
2245protected:
2246#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2247 /// Print the recipe.
2248 void printRecipe(raw_ostream &O, const Twine &Indent,
2249 VPSlotTracker &SlotTracker) const override;
2250#endif
2251};
2252
2253/// A pure virtual base class for all recipes modeling header phis, including
2254/// phis for first order recurrences, pointer inductions and reductions. The
2255/// start value is the first operand of the recipe and the incoming value from
2256/// the backedge is the second operand.
2257///
2258/// Inductions are modeled using the following sub-classes:
2259/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2260/// starting at a specified value (zero for the main vector loop, the resume
2261/// value for the epilogue vector loop) and stepping by 1. The induction
2262/// controls exiting of the vector loop by comparing against the vector trip
2263/// count. Produces a single scalar PHI for the induction value per
2264/// iteration.
2265/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2266/// floating point inductions with arbitrary start and step values. Produces
2267/// a vector PHI per-part.
2268/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2269/// pointer induction. Produces either a vector PHI per-part or scalar values
2270/// per-lane based on the canonical induction.
2271/// * VPFirstOrderRecurrencePHIRecipe
2272/// * VPReductionPHIRecipe
2273/// * VPActiveLaneMaskPHIRecipe
2274/// * VPEVLBasedIVPHIRecipe
2275class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe,
2276 public VPPhiAccessors {
2277protected:
2278 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2279 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2280 : VPSingleDefRecipe(VPRecipeID, Start, UnderlyingInstr, DL) {}
2281
2282 const VPRecipeBase *getAsRecipe() const override { return this; }
2283
2284public:
2285 ~VPHeaderPHIRecipe() override = default;
2286
2287 /// Method to support type inquiry through isa, cast, and dyn_cast.
2288 static inline bool classof(const VPRecipeBase *R) {
2289 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2290 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2291 }
2292 static inline bool classof(const VPValue *V) {
2293 return isa<VPHeaderPHIRecipe>(Val: V->getDefiningRecipe());
2294 }
2295 static inline bool classof(const VPSingleDefRecipe *R) {
2296 return isa<VPHeaderPHIRecipe>(Val: static_cast<const VPRecipeBase *>(R));
2297 }
2298
2299 /// Generate the phi nodes.
2300 void execute(VPTransformState &State) override = 0;
2301
2302 /// Return the cost of this header phi recipe.
2303 InstructionCost computeCost(ElementCount VF,
2304 VPCostContext &Ctx) const override;
2305
2306 /// Returns the start value of the phi, if one is set.
2307 VPValue *getStartValue() {
2308 return getNumOperands() == 0 ? nullptr : getOperand(N: 0);
2309 }
2310 VPValue *getStartValue() const {
2311 return getNumOperands() == 0 ? nullptr : getOperand(N: 0);
2312 }
2313
2314 /// Update the start value of the recipe.
2315 void setStartValue(VPValue *V) { setOperand(I: 0, New: V); }
2316
2317 /// Returns the incoming value from the loop backedge.
2318 virtual VPValue *getBackedgeValue() {
2319 return getOperand(N: 1);
2320 }
2321
2322 /// Update the incoming value from the loop backedge.
2323 void setBackedgeValue(VPValue *V) { setOperand(I: 1, New: V); }
2324
2325 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2326 /// to be a recipe.
2327 virtual VPRecipeBase &getBackedgeRecipe() {
2328 return *getBackedgeValue()->getDefiningRecipe();
2329 }
2330
2331protected:
2332#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2333 /// Print the recipe.
2334 void printRecipe(raw_ostream &O, const Twine &Indent,
2335 VPSlotTracker &SlotTracker) const override = 0;
2336#endif
2337};
2338
2339/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2340/// VPWidenPointerInductionRecipe), providing shared functionality, including
2341/// retrieving the step value, induction descriptor and original phi node.
2342class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
2343 const InductionDescriptor &IndDesc;
2344
2345public:
2346 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2347 VPValue *Step, const InductionDescriptor &IndDesc,
2348 DebugLoc DL)
2349 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2350 addOperand(Operand: Step);
2351 }
2352
2353 static inline bool classof(const VPRecipeBase *R) {
2354 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2355 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2356 }
2357
2358 static inline bool classof(const VPValue *V) {
2359 auto *R = V->getDefiningRecipe();
2360 return R && classof(R);
2361 }
2362
2363 static inline bool classof(const VPSingleDefRecipe *R) {
2364 return classof(R: static_cast<const VPRecipeBase *>(R));
2365 }
2366
2367 void execute(VPTransformState &State) override = 0;
2368
2369 /// Returns the start value of the induction.
2370 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
2371
2372 /// Returns the step value of the induction.
2373 VPValue *getStepValue() { return getOperand(N: 1); }
2374 const VPValue *getStepValue() const { return getOperand(N: 1); }
2375
2376 /// Update the step value of the recipe.
2377 void setStepValue(VPValue *V) { setOperand(I: 1, New: V); }
2378
2379 VPValue *getVFValue() { return getOperand(N: 2); }
2380 const VPValue *getVFValue() const { return getOperand(N: 2); }
2381
2382 /// Returns the number of incoming values, also number of incoming blocks.
2383 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2384 /// incoming value, its start value.
2385 unsigned getNumIncoming() const override { return 1; }
2386
2387 /// Returns the underlying PHINode if one exists, or null otherwise.
2388 PHINode *getPHINode() const {
2389 return cast_if_present<PHINode>(Val: getUnderlyingValue());
2390 }
2391
2392 /// Returns the induction descriptor for the recipe.
2393 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2394
2395 VPValue *getBackedgeValue() override {
2396 // TODO: All operands of base recipe must exist and be at same index in
2397 // derived recipe.
2398 llvm_unreachable(
2399 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2400 }
2401
2402 VPRecipeBase &getBackedgeRecipe() override {
2403 // TODO: All operands of base recipe must exist and be at same index in
2404 // derived recipe.
2405 llvm_unreachable(
2406 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2407 }
2408
2409 /// Returns true if the recipe only uses the first lane of operand \p Op.
2410 bool usesFirstLaneOnly(const VPValue *Op) const override {
2411 assert(is_contained(operands(), Op) &&
2412 "Op must be an operand of the recipe");
2413 // The recipe creates its own wide start value, so it only requests the
2414 // first lane of the operand.
2415 // TODO: Remove once creating the start value is modeled separately.
2416 return Op == getStartValue() || Op == getStepValue();
2417 }
2418};
2419
2420/// A recipe for handling phi nodes of integer and floating-point inductions,
2421/// producing their vector values. This is an abstract recipe and must be
2422/// converted to concrete recipes before executing.
2423class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe,
2424 public VPIRFlags {
2425 TruncInst *Trunc;
2426
2427 // If this recipe is unrolled it will have 2 additional operands.
2428 bool isUnrolled() const { return getNumOperands() == 5; }
2429
2430public:
2431 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step,
2432 VPValue *VF, const InductionDescriptor &IndDesc,
2433 const VPIRFlags &Flags, DebugLoc DL)
2434 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2435 Start, Step, IndDesc, DL),
2436 VPIRFlags(Flags), Trunc(nullptr) {
2437 addOperand(Operand: VF);
2438 }
2439
2440 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step,
2441 VPValue *VF, const InductionDescriptor &IndDesc,
2442 TruncInst *Trunc, const VPIRFlags &Flags,
2443 DebugLoc DL)
2444 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2445 Start, Step, IndDesc, DL),
2446 VPIRFlags(Flags), Trunc(Trunc) {
2447 addOperand(Operand: VF);
2448 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
2449 (void)Metadata;
2450 if (Trunc)
2451 getMetadataToPropagate(Inst: Trunc, Metadata);
2452 assert(Metadata.empty() && "unexpected metadata on Trunc");
2453 }
2454
2455 ~VPWidenIntOrFpInductionRecipe() override = default;
2456
2457 VPWidenIntOrFpInductionRecipe *clone() override {
2458 return new VPWidenIntOrFpInductionRecipe(
2459 getPHINode(), getStartValue(), getStepValue(), getVFValue(),
2460 getInductionDescriptor(), Trunc, *this, getDebugLoc());
2461 }
2462
2463 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2464
2465 void execute(VPTransformState &State) override {
2466 llvm_unreachable("cannot execute this recipe, should be expanded via "
2467 "expandVPWidenIntOrFpInductionRecipe");
2468 }
2469
2470 /// Returns the start value of the induction.
2471 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
2472
2473 /// If the recipe has been unrolled, return the VPValue for the induction
2474 /// increment, otherwise return null.
2475 VPValue *getSplatVFValue() const {
2476 return isUnrolled() ? getOperand(N: getNumOperands() - 2) : nullptr;
2477 }
2478
2479 /// Returns the number of incoming values, also number of incoming blocks.
2480 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2481 /// incoming value, its start value.
2482 unsigned getNumIncoming() const override { return 1; }
2483
2484 /// Returns the first defined value as TruncInst, if it is one or nullptr
2485 /// otherwise.
2486 TruncInst *getTruncInst() { return Trunc; }
2487 const TruncInst *getTruncInst() const { return Trunc; }
2488
2489 /// Returns true if the induction is canonical, i.e. starting at 0 and
2490 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2491 /// same type as the canonical induction.
2492 bool isCanonical() const;
2493
2494 /// Returns the scalar type of the induction.
2495 Type *getScalarType() const {
2496 return Trunc ? Trunc->getType() : getStartValue()->getType();
2497 }
2498
2499 /// Returns the VPValue representing the value of this induction at
2500 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2501 /// take place.
2502 VPValue *getLastUnrolledPartOperand() {
2503 return isUnrolled() ? getOperand(N: getNumOperands() - 1) : this;
2504 }
2505
2506protected:
2507#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2508 /// Print the recipe.
2509 void printRecipe(raw_ostream &O, const Twine &Indent,
2510 VPSlotTracker &SlotTracker) const override;
2511#endif
2512};
2513
2514class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
2515public:
2516 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2517 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2518 /// VF*UF.
2519 VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
2520 VPValue *NumUnrolledElems,
2521 const InductionDescriptor &IndDesc, DebugLoc DL)
2522 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2523 Start, Step, IndDesc, DL) {
2524 addOperand(Operand: NumUnrolledElems);
2525 }
2526
2527 ~VPWidenPointerInductionRecipe() override = default;
2528
2529 VPWidenPointerInductionRecipe *clone() override {
2530 return new VPWidenPointerInductionRecipe(
2531 cast<PHINode>(Val: getUnderlyingInstr()), getOperand(N: 0), getOperand(N: 1),
2532 getOperand(N: 2), getInductionDescriptor(), getDebugLoc());
2533 }
2534
2535 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2536
2537 /// Generate vector values for the pointer induction.
2538 void execute(VPTransformState &State) override {
2539 llvm_unreachable("cannot execute this recipe, should be expanded via "
2540 "expandVPWidenPointerInduction");
2541 };
2542
2543 /// Returns true if only scalar values will be generated.
2544 bool onlyScalarsGenerated(bool IsScalable);
2545
2546protected:
2547#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2548 /// Print the recipe.
2549 void printRecipe(raw_ostream &O, const Twine &Indent,
2550 VPSlotTracker &SlotTracker) const override;
2551#endif
2552};
2553
2554/// A recipe for widened phis. Incoming values are operands of the recipe and
2555/// their operand index corresponds to the incoming predecessor block. If the
2556/// recipe is placed in an entry block to a (non-replicate) region, it must have
2557/// exactly 2 incoming values, the first from the predecessor of the region and
2558/// the second from the exiting block of the region.
2559class LLVM_ABI_FOR_TEST VPWidenPHIRecipe : public VPSingleDefRecipe,
2560 public VPPhiAccessors {
2561 /// Name to use for the generated IR instruction for the widened phi.
2562 std::string Name;
2563
2564public:
2565 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2566 /// debug location \p DL.
2567 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2568 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2569 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, {}, Phi, DL),
2570 Name(Name.str()) {
2571 if (Start)
2572 addOperand(Operand: Start);
2573 }
2574
2575 VPWidenPHIRecipe *clone() override {
2576 auto *C =
2577 new VPWidenPHIRecipe(cast_if_present<PHINode>(Val: getUnderlyingValue()),
2578 getOperand(N: 0), getDebugLoc(), Name);
2579 for (VPValue *Op : llvm::drop_begin(RangeOrContainer: operands()))
2580 C->addOperand(Operand: Op);
2581 return C;
2582 }
2583
2584 ~VPWidenPHIRecipe() override = default;
2585
2586 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2587
2588 /// Generate the phi/select nodes.
2589 void execute(VPTransformState &State) override;
2590
2591 /// Return the cost of this VPWidenPHIRecipe.
2592 InstructionCost computeCost(ElementCount VF,
2593 VPCostContext &Ctx) const override;
2594
2595protected:
2596#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2597 /// Print the recipe.
2598 void printRecipe(raw_ostream &O, const Twine &Indent,
2599 VPSlotTracker &SlotTracker) const override;
2600#endif
2601
2602 const VPRecipeBase *getAsRecipe() const override { return this; }
2603};
2604
2605/// A recipe for handling first-order recurrence phis. The start value is the
2606/// first operand of the recipe and the incoming value from the backedge is the
2607/// second operand.
2608struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
2609 VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start,
2610 VPValue &BackedgeValue)
2611 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2612 &Start) {
2613 addOperand(Operand: &BackedgeValue);
2614 }
2615
2616 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2617
2618 VPFirstOrderRecurrencePHIRecipe *clone() override {
2619 return new VPFirstOrderRecurrencePHIRecipe(
2620 cast<PHINode>(Val: getUnderlyingInstr()), *getOperand(N: 0), *getOperand(N: 1));
2621 }
2622
2623 void execute(VPTransformState &State) override;
2624
2625 /// Return the cost of this first-order recurrence phi recipe.
2626 InstructionCost computeCost(ElementCount VF,
2627 VPCostContext &Ctx) const override;
2628
2629 /// Returns true if the recipe only uses the first lane of operand \p Op.
2630 bool usesFirstLaneOnly(const VPValue *Op) const override {
2631 assert(is_contained(operands(), Op) &&
2632 "Op must be an operand of the recipe");
2633 return Op == getStartValue();
2634 }
2635
2636protected:
2637#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2638 /// Print the recipe.
2639 void printRecipe(raw_ostream &O, const Twine &Indent,
2640 VPSlotTracker &SlotTracker) const override;
2641#endif
2642};
2643
2644/// Possible variants of a reduction.
2645
2646/// This reduction is ordered and in-loop.
2647struct RdxOrdered {};
2648/// This reduction is in-loop.
2649struct RdxInLoop {};
2650/// This reduction is unordered with the partial result scaled down by some
2651/// factor.
2652struct RdxUnordered {
2653 unsigned VFScaleFactor;
2654};
2655using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2656
2657inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2658 unsigned ScaleFactor) {
2659 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2660 if (Ordered)
2661 return RdxOrdered{};
2662 if (InLoop)
2663 return RdxInLoop{};
2664 return RdxUnordered{/*VFScaleFactor=*/.VFScaleFactor: ScaleFactor};
2665}
2666
2667/// A recipe for handling reduction phis. The start value is the first operand
2668/// of the recipe and the incoming value from the backedge is the second
2669/// operand.
2670class VPReductionPHIRecipe : public VPHeaderPHIRecipe, public VPIRFlags {
2671 /// The recurrence kind of the reduction.
2672 const RecurKind Kind;
2673
2674 ReductionStyle Style;
2675
2676 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2677 /// patterns for argmin/argmax).
2678 /// TODO: Also support cases where the phi itself has a single use, but its
2679 /// compare has multiple uses.
2680 bool HasUsesOutsideReductionChain;
2681
2682public:
2683 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2684 VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start,
2685 VPValue &BackedgeValue, ReductionStyle Style,
2686 const VPIRFlags &Flags,
2687 bool HasUsesOutsideReductionChain = false)
2688 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2689 VPIRFlags(Flags), Kind(Kind), Style(Style),
2690 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2691 addOperand(Operand: &BackedgeValue);
2692 }
2693
2694 ~VPReductionPHIRecipe() override = default;
2695
2696 VPReductionPHIRecipe *clone() override {
2697 return new VPReductionPHIRecipe(
2698 dyn_cast_or_null<PHINode>(Val: getUnderlyingValue()), getRecurrenceKind(),
2699 *getOperand(N: 0), *getBackedgeValue(), Style, *this,
2700 HasUsesOutsideReductionChain);
2701 }
2702
2703 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2704
2705 /// Generate the phi/select nodes.
2706 void execute(VPTransformState &State) override;
2707
2708 /// Get the factor that the VF of this recipe's output should be scaled by, or
2709 /// 1 if it isn't scaled.
2710 unsigned getVFScaleFactor() const {
2711 auto *Partial = std::get_if<RdxUnordered>(ptr: &Style);
2712 return Partial ? Partial->VFScaleFactor : 1;
2713 }
2714
2715 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2716 /// > 1.
2717 void setVFScaleFactor(unsigned ScaleFactor) {
2718 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2719 Style = RdxUnordered{.VFScaleFactor: ScaleFactor};
2720 }
2721
2722 /// Returns the number of incoming values, also number of incoming blocks.
2723 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2724 /// incoming value, its start value.
2725 unsigned getNumIncoming() const override { return 2; }
2726
2727 /// Returns the recurrence kind of the reduction.
2728 RecurKind getRecurrenceKind() const { return Kind; }
2729
2730 /// Returns true, if the phi is part of an ordered reduction.
2731 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(v: Style); }
2732
2733 /// Returns true if the phi is part of an in-loop reduction.
2734 bool isInLoop() const {
2735 return std::holds_alternative<RdxInLoop>(v: Style) ||
2736 std::holds_alternative<RdxOrdered>(v: Style);
2737 }
2738
2739 /// Returns true if the reduction outputs a vector with a scaled down VF.
2740 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2741
2742 /// Returns true, if the phi is part of a multi-use reduction.
2743 bool hasUsesOutsideReductionChain() const {
2744 return HasUsesOutsideReductionChain;
2745 }
2746
2747 /// Returns true if the recipe only uses the first lane of operand \p Op.
2748 bool usesFirstLaneOnly(const VPValue *Op) const override {
2749 assert(is_contained(operands(), Op) &&
2750 "Op must be an operand of the recipe");
2751 return isOrdered() || isInLoop();
2752 }
2753
2754protected:
2755#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2756 /// Print the recipe.
2757 void printRecipe(raw_ostream &O, const Twine &Indent,
2758 VPSlotTracker &SlotTracker) const override;
2759#endif
2760};
2761
2762/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2763/// instructions.
2764class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPRecipeWithIRFlags {
2765public:
2766 /// The blend operation is a User of the incoming values and of their
2767 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2768 /// be omitted (implied by passing an odd number of operands) in which case
2769 /// all other incoming values are merged into it.
2770 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands,
2771 const VPIRFlags &Flags, DebugLoc DL)
2772 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands, Flags, DL) {
2773 assert(Operands.size() >= 2 && "Expected at least two operands!");
2774 setUnderlyingValue(Phi);
2775 }
2776
2777 VPBlendRecipe *clone() override {
2778 return new VPBlendRecipe(cast_or_null<PHINode>(Val: getUnderlyingValue()),
2779 operands(), *this, getDebugLoc());
2780 }
2781
2782 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2783
2784 /// A normalized blend is one that has an odd number of operands, whereby the
2785 /// first operand does not have an associated mask.
2786 bool isNormalized() const { return getNumOperands() % 2; }
2787
2788 /// Return the number of incoming values, taking into account when normalized
2789 /// the first incoming value will have no mask.
2790 unsigned getNumIncomingValues() const {
2791 return (getNumOperands() + isNormalized()) / 2;
2792 }
2793
2794 /// Return incoming value number \p Idx.
2795 VPValue *getIncomingValue(unsigned Idx) const {
2796 return Idx == 0 ? getOperand(N: 0) : getOperand(N: Idx * 2 - isNormalized());
2797 }
2798
2799 /// Return mask number \p Idx.
2800 VPValue *getMask(unsigned Idx) const {
2801 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2802 return Idx == 0 ? getOperand(N: 1) : getOperand(N: Idx * 2 + !isNormalized());
2803 }
2804
2805 /// Set mask number \p Idx to \p V.
2806 void setMask(unsigned Idx, VPValue *V) {
2807 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2808 Idx == 0 ? setOperand(I: 1, New: V) : setOperand(I: Idx * 2 + !isNormalized(), New: V);
2809 }
2810
2811 void execute(VPTransformState &State) override {
2812 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2813 }
2814
2815 /// Return the cost of this VPWidenMemoryRecipe.
2816 InstructionCost computeCost(ElementCount VF,
2817 VPCostContext &Ctx) const override;
2818
2819 /// Returns true if the recipe only uses the first lane of operand \p Op.
2820 bool usesFirstLaneOnly(const VPValue *Op) const override {
2821 assert(is_contained(operands(), Op) &&
2822 "Op must be an operand of the recipe");
2823 // Recursing through Blend recipes only, must terminate at header phi's the
2824 // latest.
2825 return all_of(Range: users(),
2826 P: [this](VPUser *U) { return U->usesFirstLaneOnly(Op: this); });
2827 }
2828
2829protected:
2830#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2831 /// Print the recipe.
2832 void printRecipe(raw_ostream &O, const Twine &Indent,
2833 VPSlotTracker &SlotTracker) const override;
2834#endif
2835};
2836
2837/// A common base class for interleaved memory operations.
2838/// An Interleaved memory operation is a memory access method that combines
2839/// multiple strided loads/stores into a single wide load/store with shuffles.
2840/// The first operand is the start address. The optional operands are, in order,
2841/// the stored values and the mask.
2842class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase,
2843 public VPIRMetadata {
2844 const InterleaveGroup<Instruction> *IG;
2845
2846 /// Indicates if the interleave group is in a conditional block and requires a
2847 /// mask.
2848 bool HasMask = false;
2849
2850 /// Indicates if gaps between members of the group need to be masked out or if
2851 /// unusued gaps can be loaded speculatively.
2852 bool NeedsMaskForGaps = false;
2853
2854protected:
2855 VPInterleaveBase(const unsigned char SC,
2856 const InterleaveGroup<Instruction> *IG,
2857 ArrayRef<VPValue *> Operands,
2858 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2859 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2860 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2861 NeedsMaskForGaps(NeedsMaskForGaps) {
2862 // TODO: extend the masked interleaved-group support to reversed access.
2863 assert((!Mask || !IG->isReverse()) &&
2864 "Reversed masked interleave-group not supported.");
2865 if (StoredValues.empty()) {
2866 for (unsigned I = 0; I < IG->getFactor(); ++I)
2867 if (Instruction *Inst = IG->getMember(Index: I)) {
2868 assert(!Inst->getType()->isVoidTy() && "must have result");
2869 new VPRecipeValue(this, Inst);
2870 }
2871 } else {
2872 for (auto *SV : StoredValues)
2873 addOperand(Operand: SV);
2874 }
2875 if (Mask) {
2876 HasMask = true;
2877 addOperand(Operand: Mask);
2878 }
2879 }
2880
2881public:
2882 VPInterleaveBase *clone() override = 0;
2883
2884 static inline bool classof(const VPRecipeBase *R) {
2885 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
2886 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
2887 }
2888
2889 static inline bool classof(const VPUser *U) {
2890 auto *R = dyn_cast<VPRecipeBase>(Val: U);
2891 return R && classof(R);
2892 }
2893
2894 /// Return the address accessed by this recipe.
2895 VPValue *getAddr() const {
2896 return getOperand(N: 0); // Address is the 1st, mandatory operand.
2897 }
2898
2899 /// Return the mask used by this recipe. Note that a full mask is represented
2900 /// by a nullptr.
2901 VPValue *getMask() const {
2902 // Mask is optional and the last operand.
2903 return HasMask ? getOperand(N: getNumOperands() - 1) : nullptr;
2904 }
2905
2906 /// Return true if the access needs a mask because of the gaps.
2907 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2908
2909 const InterleaveGroup<Instruction> *getInterleaveGroup() const { return IG; }
2910
2911 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2912
2913 void execute(VPTransformState &State) override {
2914 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2915 }
2916
2917 /// Return the cost of this recipe.
2918 InstructionCost computeCost(ElementCount VF,
2919 VPCostContext &Ctx) const override;
2920
2921 /// Returns true if the recipe only uses the first lane of operand \p Op.
2922 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2923
2924 /// Returns the number of stored operands of this interleave group. Returns 0
2925 /// for load interleave groups.
2926 virtual unsigned getNumStoreOperands() const = 0;
2927
2928 /// Return the VPValues stored by this interleave group. If it is a load
2929 /// interleave group, return an empty ArrayRef.
2930 ArrayRef<VPValue *> getStoredValues() const {
2931 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
2932 getNumStoreOperands()};
2933 }
2934};
2935
2936/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2937/// or stores into one wide load/store and shuffles. The first operand of a
2938/// VPInterleave recipe is the address, followed by the stored values, followed
2939/// by an optional mask.
2940class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase {
2941public:
2942 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2943 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2944 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2945 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
2946 Mask, NeedsMaskForGaps, MD, DL) {}
2947
2948 ~VPInterleaveRecipe() override = default;
2949
2950 VPInterleaveRecipe *clone() override {
2951 return new VPInterleaveRecipe(getInterleaveGroup(), getAddr(),
2952 getStoredValues(), getMask(),
2953 needsMaskForGaps(), *this, getDebugLoc());
2954 }
2955
2956 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
2957
2958 /// Generate the wide load or store, and shuffles.
2959 void execute(VPTransformState &State) override;
2960
2961 bool usesFirstLaneOnly(const VPValue *Op) const override {
2962 assert(is_contained(operands(), Op) &&
2963 "Op must be an operand of the recipe");
2964 return Op == getAddr() && !llvm::is_contained(Range: getStoredValues(), Element: Op);
2965 }
2966
2967 unsigned getNumStoreOperands() const override {
2968 return getNumOperands() - (getMask() ? 2 : 1);
2969 }
2970
2971protected:
2972#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2973 /// Print the recipe.
2974 void printRecipe(raw_ostream &O, const Twine &Indent,
2975 VPSlotTracker &SlotTracker) const override;
2976#endif
2977};
2978
2979/// A recipe for interleaved memory operations with vector-predication
2980/// intrinsics. The first operand is the address, the second operand is the
2981/// explicit vector length. Stored values and mask are optional operands.
2982class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
2983public:
2984 VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
2985 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
2986 R.getInterleaveGroup(), {R.getAddr(), &EVL},
2987 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2988 R.getDebugLoc()) {
2989 assert(!getInterleaveGroup()->isReverse() &&
2990 "Reversed interleave-group with tail folding is not supported.");
2991 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2992 "supported for scalable vector.");
2993 }
2994
2995 ~VPInterleaveEVLRecipe() override = default;
2996
2997 VPInterleaveEVLRecipe *clone() override {
2998 llvm_unreachable("cloning not implemented yet");
2999 }
3000
3001 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3002
3003 /// The VPValue of the explicit vector length.
3004 VPValue *getEVL() const { return getOperand(N: 1); }
3005
3006 /// Generate the wide load or store, and shuffles.
3007 void execute(VPTransformState &State) override;
3008
3009 /// The recipe only uses the first lane of the address, and EVL operand.
3010 bool usesFirstLaneOnly(const VPValue *Op) const override {
3011 assert(is_contained(operands(), Op) &&
3012 "Op must be an operand of the recipe");
3013 return (Op == getAddr() && !llvm::is_contained(Range: getStoredValues(), Element: Op)) ||
3014 Op == getEVL();
3015 }
3016
3017 unsigned getNumStoreOperands() const override {
3018 return getNumOperands() - (getMask() ? 3 : 2);
3019 }
3020
3021protected:
3022#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3023 /// Print the recipe.
3024 void printRecipe(raw_ostream &O, const Twine &Indent,
3025 VPSlotTracker &SlotTracker) const override;
3026#endif
3027};
3028
3029/// A recipe to represent inloop, ordered or partial reduction operations. It
3030/// performs a reduction on a vector operand into a scalar (vector in the case
3031/// of a partial reduction) value, and adds the result to a chain. The Operands
3032/// are {ChainOp, VecOp, [Condition]}.
3033class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
3034
3035 /// The recurrence kind for the reduction in question.
3036 RecurKind RdxKind;
3037 /// Whether the reduction is conditional.
3038 bool IsConditional = false;
3039 ReductionStyle Style;
3040
3041protected:
3042 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3043 FastMathFlags FMFs, Instruction *I,
3044 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3045 ReductionStyle Style, DebugLoc DL)
3046 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
3047 Style(Style) {
3048 if (CondOp) {
3049 IsConditional = true;
3050 addOperand(Operand: CondOp);
3051 }
3052 setUnderlyingValue(I);
3053 }
3054
3055public:
3056 VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
3057 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3058 ReductionStyle Style, DebugLoc DL = DebugLoc::getUnknown())
3059 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3060 {ChainOp, VecOp}, CondOp, Style, DL) {}
3061
3062 VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs,
3063 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3064 ReductionStyle Style, DebugLoc DL = DebugLoc::getUnknown())
3065 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3066 {ChainOp, VecOp}, CondOp, Style, DL) {}
3067
3068 ~VPReductionRecipe() override = default;
3069
3070 VPReductionRecipe *clone() override {
3071 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
3072 getUnderlyingInstr(), getChainOp(), getVecOp(),
3073 getCondOp(), Style, getDebugLoc());
3074 }
3075
3076 static inline bool classof(const VPRecipeBase *R) {
3077 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3078 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3079 }
3080
3081 static inline bool classof(const VPUser *U) {
3082 auto *R = dyn_cast<VPRecipeBase>(Val: U);
3083 return R && classof(R);
3084 }
3085
3086 static inline bool classof(const VPValue *VPV) {
3087 const VPRecipeBase *R = VPV->getDefiningRecipe();
3088 return R && classof(R);
3089 }
3090
3091 static inline bool classof(const VPSingleDefRecipe *R) {
3092 return classof(R: static_cast<const VPRecipeBase *>(R));
3093 }
3094
3095 /// Generate the reduction in the loop.
3096 void execute(VPTransformState &State) override;
3097
3098 /// Return the cost of VPReductionRecipe.
3099 InstructionCost computeCost(ElementCount VF,
3100 VPCostContext &Ctx) const override;
3101
3102 /// Return the recurrence kind for the in-loop reduction.
3103 RecurKind getRecurrenceKind() const { return RdxKind; }
3104 /// Return true if the in-loop reduction is ordered.
3105 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(v: Style); };
3106 /// Return true if the in-loop reduction is conditional.
3107 bool isConditional() const { return IsConditional; };
3108 /// Returns true if the reduction outputs a vector with a scaled down VF.
3109 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3110 /// Returns true if the reduction is in-loop.
3111 bool isInLoop() const {
3112 return std::holds_alternative<RdxInLoop>(v: Style) ||
3113 std::holds_alternative<RdxOrdered>(v: Style);
3114 }
3115 /// The VPValue of the scalar Chain being accumulated.
3116 VPValue *getChainOp() const { return getOperand(N: 0); }
3117 /// The VPValue of the vector value to be reduced.
3118 VPValue *getVecOp() const { return getOperand(N: 1); }
3119 /// The VPValue of the condition for the block.
3120 VPValue *getCondOp() const {
3121 return isConditional() ? getOperand(N: getNumOperands() - 1) : nullptr;
3122 }
3123 /// Get the factor that the VF of this recipe's output should be scaled by, or
3124 /// 1 if it isn't scaled.
3125 unsigned getVFScaleFactor() const {
3126 auto *Partial = std::get_if<RdxUnordered>(ptr: &Style);
3127 return Partial ? Partial->VFScaleFactor : 1;
3128 }
3129
3130protected:
3131#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3132 /// Print the recipe.
3133 void printRecipe(raw_ostream &O, const Twine &Indent,
3134 VPSlotTracker &SlotTracker) const override;
3135#endif
3136};
3137
3138/// A recipe to represent inloop reduction operations with vector-predication
3139/// intrinsics, performing a reduction on a vector operand with the explicit
3140/// vector length (EVL) into a scalar value, and adding the result to a chain.
3141/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3142class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe {
3143public:
3144 VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp,
3145 DebugLoc DL = DebugLoc::getUnknown())
3146 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3147 R.getFastMathFlags(),
3148 cast_or_null<Instruction>(Val: R.getUnderlyingValue()),
3149 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3150 getReductionStyle(/*InLoop=*/InLoop: true, Ordered: R.isOrdered(), ScaleFactor: 1),
3151 DL) {}
3152
3153 ~VPReductionEVLRecipe() override = default;
3154
3155 VPReductionEVLRecipe *clone() override {
3156 llvm_unreachable("cloning not implemented yet");
3157 }
3158
3159 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3160
3161 /// Generate the reduction in the loop
3162 void execute(VPTransformState &State) override;
3163
3164 /// The VPValue of the explicit vector length.
3165 VPValue *getEVL() const { return getOperand(N: 2); }
3166
3167 /// Returns true if the recipe only uses the first lane of operand \p Op.
3168 bool usesFirstLaneOnly(const VPValue *Op) const override {
3169 assert(is_contained(operands(), Op) &&
3170 "Op must be an operand of the recipe");
3171 return Op == getEVL();
3172 }
3173
3174protected:
3175#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3176 /// Print the recipe.
3177 void printRecipe(raw_ostream &O, const Twine &Indent,
3178 VPSlotTracker &SlotTracker) const override;
3179#endif
3180};
3181
3182/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3183/// copies of the original scalar type, one per lane, instead of producing a
3184/// single copy of widened type for all lanes. If the instruction is known to be
3185/// a single scalar, only one copy, per lane zero, will be generated.
3186class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags,
3187 public VPIRMetadata {
3188 /// Indicator if only a single replica per lane is needed.
3189 bool IsSingleScalar;
3190
3191 /// Indicator if the replicas are also predicated.
3192 bool IsPredicated;
3193
3194public:
3195 VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands,
3196 bool IsSingleScalar, VPValue *Mask = nullptr,
3197 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3198 DebugLoc DL = DebugLoc::getUnknown())
3199 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands, Flags, DL),
3200 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3201 IsPredicated(Mask) {
3202 setUnderlyingValue(I);
3203 if (Mask)
3204 addOperand(Operand: Mask);
3205 }
3206
3207 ~VPReplicateRecipe() override = default;
3208
3209 VPReplicateRecipe *clone() override {
3210 auto *Copy = new VPReplicateRecipe(
3211 getUnderlyingInstr(), operands(), IsSingleScalar,
3212 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3213 Copy->transferFlags(Other&: *this);
3214 return Copy;
3215 }
3216
3217 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3218
3219 /// Generate replicas of the desired Ingredient. Replicas will be generated
3220 /// for all parts and lanes unless a specific part and lane are specified in
3221 /// the \p State.
3222 void execute(VPTransformState &State) override;
3223
3224 /// Return the cost of this VPReplicateRecipe.
3225 InstructionCost computeCost(ElementCount VF,
3226 VPCostContext &Ctx) const override;
3227
3228 bool isSingleScalar() const { return IsSingleScalar; }
3229
3230 bool isPredicated() const { return IsPredicated; }
3231
3232 /// Returns true if the recipe only uses the first lane of operand \p Op.
3233 bool usesFirstLaneOnly(const VPValue *Op) const override {
3234 assert(is_contained(operands(), Op) &&
3235 "Op must be an operand of the recipe");
3236 return isSingleScalar();
3237 }
3238
3239 /// Returns true if the recipe uses scalars of operand \p Op.
3240 bool usesScalars(const VPValue *Op) const override {
3241 assert(is_contained(operands(), Op) &&
3242 "Op must be an operand of the recipe");
3243 return true;
3244 }
3245
3246 /// Returns true if the recipe is used by a widened recipe via an intervening
3247 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3248 /// in a vector.
3249 bool shouldPack() const;
3250
3251 /// Return the mask of a predicated VPReplicateRecipe.
3252 VPValue *getMask() {
3253 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3254 return getOperand(N: getNumOperands() - 1);
3255 }
3256
3257 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3258
3259protected:
3260#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3261 /// Print the recipe.
3262 void printRecipe(raw_ostream &O, const Twine &Indent,
3263 VPSlotTracker &SlotTracker) const override;
3264#endif
3265};
3266
3267/// A recipe for generating conditional branches on the bits of a mask.
3268class LLVM_ABI_FOR_TEST VPBranchOnMaskRecipe : public VPRecipeBase {
3269public:
3270 VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
3271 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3272
3273 VPBranchOnMaskRecipe *clone() override {
3274 return new VPBranchOnMaskRecipe(getOperand(N: 0), getDebugLoc());
3275 }
3276
3277 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3278
3279 /// Generate the extraction of the appropriate bit from the block mask and the
3280 /// conditional branch.
3281 void execute(VPTransformState &State) override;
3282
3283 /// Return the cost of this VPBranchOnMaskRecipe.
3284 InstructionCost computeCost(ElementCount VF,
3285 VPCostContext &Ctx) const override;
3286
3287#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3288 /// Print the recipe.
3289 void printRecipe(raw_ostream &O, const Twine &Indent,
3290 VPSlotTracker &SlotTracker) const override {
3291 O << Indent << "BRANCH-ON-MASK ";
3292 printOperands(O, SlotTracker);
3293 }
3294#endif
3295
3296 /// Returns true if the recipe uses scalars of operand \p Op.
3297 bool usesScalars(const VPValue *Op) const override {
3298 assert(is_contained(operands(), Op) &&
3299 "Op must be an operand of the recipe");
3300 return true;
3301 }
3302};
3303
3304/// A recipe to combine multiple recipes into a single 'expression' recipe,
3305/// which should be considered a single entity for cost-modeling and transforms.
3306/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3307/// expression recipes, before execute. The individual expression recipes are
3308/// completely disconnected from the def-use graph of other recipes not part of
3309/// the expression. Def-use edges between pairs of expression recipes remain
3310/// intact, whereas every edge between an expression recipe and a recipe outside
3311/// the expression is elevated to connect the non-expression recipe with the
3312/// VPExpressionRecipe itself.
3313class VPExpressionRecipe : public VPSingleDefRecipe {
3314 /// Recipes included in this VPExpressionRecipe. This could contain
3315 /// duplicates.
3316 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3317
3318 /// Temporary VPValues used for external operands of the expression, i.e.
3319 /// operands not defined by recipes in the expression.
3320 SmallVector<VPValue *> LiveInPlaceholders;
3321
3322 enum class ExpressionTypes {
3323 /// Represents an inloop extended reduction operation, performing a
3324 /// reduction on an extended vector operand into a scalar value, and adding
3325 /// the result to a chain.
3326 ExtendedReduction,
3327 /// Represent an inloop multiply-accumulate reduction, multiplying the
3328 /// extended vector operands, performing a reduction.add on the result, and
3329 /// adding the scalar result to a chain.
3330 ExtMulAccReduction,
3331 /// Represent an inloop multiply-accumulate reduction, multiplying the
3332 /// vector operands, performing a reduction.add on the result, and adding
3333 /// the scalar result to a chain.
3334 MulAccReduction,
3335 /// Represent an inloop multiply-accumulate reduction, multiplying the
3336 /// extended vector operands, negating the multiplication, performing a
3337 /// reduction.add on the result, and adding the scalar result to a chain.
3338 ExtNegatedMulAccReduction,
3339 };
3340
3341 /// Type of the expression.
3342 ExpressionTypes ExpressionType;
3343
3344 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3345 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3346 /// in the expression) are replaced by temporary VPValues and the original
3347 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3348 /// as needed (excluding last) to ensure they are only used by other recipes
3349 /// in the expression.
3350 VPExpressionRecipe(ExpressionTypes ExpressionType,
3351 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3352
3353public:
3354 VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
3355 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3356 VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
3357 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3358 VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
3359 VPWidenRecipe *Mul, VPReductionRecipe *Red)
3360 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3361 {Ext0, Ext1, Mul, Red}) {}
3362 VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
3363 VPWidenRecipe *Mul, VPWidenRecipe *Sub,
3364 VPReductionRecipe *Red)
3365 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3366 {Ext0, Ext1, Mul, Sub, Red}) {
3367 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3368 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3369 "Expected an add reduction");
3370 assert(getNumOperands() >= 3 && "Expected at least three operands");
3371 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(Val: getOperand(N: 2));
3372 assert(SubConst && SubConst->isZero() &&
3373 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3374 }
3375
3376 ~VPExpressionRecipe() override {
3377 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3378 for (auto *R : reverse(C&: ExpressionRecipes)) {
3379 if (ExpressionRecipesSeen.insert(Ptr: R).second)
3380 delete R;
3381 }
3382 for (VPValue *T : LiveInPlaceholders)
3383 delete T;
3384 }
3385
3386 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3387
3388 VPExpressionRecipe *clone() override {
3389 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3390 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3391 for (auto *R : ExpressionRecipes)
3392 NewExpressiondRecipes.push_back(Elt: R->clone());
3393 for (auto *New : NewExpressiondRecipes) {
3394 for (const auto &[Idx, Old] : enumerate(First&: ExpressionRecipes))
3395 New->replaceUsesOfWith(From: Old, To: NewExpressiondRecipes[Idx]);
3396 // Update placeholder operands in the cloned recipe to use the external
3397 // operands, to be internalized when the cloned expression is constructed.
3398 for (const auto &[Placeholder, OutsideOp] :
3399 zip(t&: LiveInPlaceholders, u: operands()))
3400 New->replaceUsesOfWith(From: Placeholder, To: OutsideOp);
3401 }
3402 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3403 }
3404
3405 /// Return the VPValue to use to infer the result type of the recipe.
3406 VPValue *getOperandOfResultType() const {
3407 unsigned OpIdx =
3408 cast<VPReductionRecipe>(Val: ExpressionRecipes.back())->isConditional() ? 2
3409 : 1;
3410 return getOperand(N: getNumOperands() - OpIdx);
3411 }
3412
3413 /// Insert the recipes of the expression back into the VPlan, directly before
3414 /// the current recipe. Leaves the expression recipe empty, which must be
3415 /// removed before codegen.
3416 void decompose();
3417
3418 unsigned getVFScaleFactor() const {
3419 auto *PR = dyn_cast<VPReductionRecipe>(Val: ExpressionRecipes.back());
3420 return PR ? PR->getVFScaleFactor() : 1;
3421 }
3422
3423 /// Method for generating code, must not be called as this recipe is abstract.
3424 void execute(VPTransformState &State) override {
3425 llvm_unreachable("recipe must be removed before execute");
3426 }
3427
3428 InstructionCost computeCost(ElementCount VF,
3429 VPCostContext &Ctx) const override;
3430
3431 /// Returns true if this expression contains recipes that may read from or
3432 /// write to memory.
3433 bool mayReadOrWriteMemory() const;
3434
3435 /// Returns true if this expression contains recipes that may have side
3436 /// effects.
3437 bool mayHaveSideEffects() const;
3438
3439 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3440 bool isSingleScalar() const;
3441
3442protected:
3443#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3444 /// Print the recipe.
3445 void printRecipe(raw_ostream &O, const Twine &Indent,
3446 VPSlotTracker &SlotTracker) const override;
3447#endif
3448};
3449
3450/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3451/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3452/// order to merge values that are set under such a branch and feed their uses.
3453/// The phi nodes can be scalar or vector depending on the users of the value.
3454/// This recipe works in concert with VPBranchOnMaskRecipe.
3455class LLVM_ABI_FOR_TEST VPPredInstPHIRecipe : public VPSingleDefRecipe {
3456public:
3457 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3458 /// nodes after merging back from a Branch-on-Mask.
3459 VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
3460 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV, DL) {}
3461 ~VPPredInstPHIRecipe() override = default;
3462
3463 VPPredInstPHIRecipe *clone() override {
3464 return new VPPredInstPHIRecipe(getOperand(N: 0), getDebugLoc());
3465 }
3466
3467 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3468
3469 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3470 /// retain SSA form.
3471 void execute(VPTransformState &State) override;
3472
3473 /// Return the cost of this VPPredInstPHIRecipe.
3474 InstructionCost computeCost(ElementCount VF,
3475 VPCostContext &Ctx) const override {
3476 // TODO: Compute accurate cost after retiring the legacy cost model.
3477 return 0;
3478 }
3479
3480 /// Returns true if the recipe uses scalars of operand \p Op.
3481 bool usesScalars(const VPValue *Op) const override {
3482 assert(is_contained(operands(), Op) &&
3483 "Op must be an operand of the recipe");
3484 return true;
3485 }
3486
3487protected:
3488#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3489 /// Print the recipe.
3490 void printRecipe(raw_ostream &O, const Twine &Indent,
3491 VPSlotTracker &SlotTracker) const override;
3492#endif
3493};
3494
3495/// A common base class for widening memory operations. An optional mask can be
3496/// provided as the last operand.
3497class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
3498 public VPIRMetadata {
3499protected:
3500 Instruction &Ingredient;
3501
3502 /// Alignment information for this memory access.
3503 Align Alignment;
3504
3505 /// Whether the accessed addresses are consecutive.
3506 bool Consecutive;
3507
3508 /// Whether the consecutive accessed addresses are in reverse order.
3509 bool Reverse;
3510
3511 /// Whether the memory access is masked.
3512 bool IsMasked = false;
3513
3514 void setMask(VPValue *Mask) {
3515 assert(!IsMasked && "cannot re-set mask");
3516 if (!Mask)
3517 return;
3518 addOperand(Operand: Mask);
3519 IsMasked = true;
3520 }
3521
3522 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3523 std::initializer_list<VPValue *> Operands,
3524 bool Consecutive, bool Reverse,
3525 const VPIRMetadata &Metadata, DebugLoc DL)
3526 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3527 Alignment(getLoadStoreAlignment(I: &I)), Consecutive(Consecutive),
3528 Reverse(Reverse) {
3529 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3530 assert((isa<VPVectorEndPointerRecipe>(getAddr()) || !Reverse) &&
3531 "Reversed acccess without VPVectorEndPointerRecipe address?");
3532 }
3533
3534public:
3535 VPWidenMemoryRecipe *clone() override {
3536 llvm_unreachable("cloning not supported");
3537 }
3538
3539 static inline bool classof(const VPRecipeBase *R) {
3540 return R->getVPRecipeID() == VPRecipeBase::VPWidenLoadSC ||
3541 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreSC ||
3542 R->getVPRecipeID() == VPRecipeBase::VPWidenLoadEVLSC ||
3543 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreEVLSC;
3544 }
3545
3546 static inline bool classof(const VPUser *U) {
3547 auto *R = dyn_cast<VPRecipeBase>(Val: U);
3548 return R && classof(R);
3549 }
3550
3551 /// Return whether the loaded-from / stored-to addresses are consecutive.
3552 bool isConsecutive() const { return Consecutive; }
3553
3554 /// Return whether the consecutive loaded/stored addresses are in reverse
3555 /// order.
3556 bool isReverse() const { return Reverse; }
3557
3558 /// Return the address accessed by this recipe.
3559 VPValue *getAddr() const { return getOperand(N: 0); }
3560
3561 /// Returns true if the recipe is masked.
3562 bool isMasked() const { return IsMasked; }
3563
3564 /// Return the mask used by this recipe. Note that a full mask is represented
3565 /// by a nullptr.
3566 VPValue *getMask() const {
3567 // Mask is optional and therefore the last operand.
3568 return isMasked() ? getOperand(N: getNumOperands() - 1) : nullptr;
3569 }
3570
3571 /// Returns the alignment of the memory access.
3572 Align getAlign() const { return Alignment; }
3573
3574 /// Generate the wide load/store.
3575 void execute(VPTransformState &State) override {
3576 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3577 }
3578
3579 /// Return the cost of this VPWidenMemoryRecipe.
3580 InstructionCost computeCost(ElementCount VF,
3581 VPCostContext &Ctx) const override;
3582
3583 Instruction &getIngredient() const { return Ingredient; }
3584};
3585
3586/// A recipe for widening load operations, using the address to load from and an
3587/// optional mask.
3588struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
3589 public VPRecipeValue {
3590 VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
3591 bool Consecutive, bool Reverse,
3592 const VPIRMetadata &Metadata, DebugLoc DL)
3593 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadSC, Load, {Addr},
3594 Consecutive, Reverse, Metadata, DL),
3595 VPRecipeValue(this, &Load) {
3596 setMask(Mask);
3597 }
3598
3599 VPWidenLoadRecipe *clone() override {
3600 return new VPWidenLoadRecipe(cast<LoadInst>(Val&: Ingredient), getAddr(),
3601 getMask(), Consecutive, Reverse, *this,
3602 getDebugLoc());
3603 }
3604
3605 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3606
3607 /// Generate a wide load or gather.
3608 void execute(VPTransformState &State) override;
3609
3610 /// Returns true if the recipe only uses the first lane of operand \p Op.
3611 bool usesFirstLaneOnly(const VPValue *Op) const override {
3612 assert(is_contained(operands(), Op) &&
3613 "Op must be an operand of the recipe");
3614 // Widened, consecutive loads operations only demand the first lane of
3615 // their address.
3616 return Op == getAddr() && isConsecutive();
3617 }
3618
3619protected:
3620#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3621 /// Print the recipe.
3622 void printRecipe(raw_ostream &O, const Twine &Indent,
3623 VPSlotTracker &SlotTracker) const override;
3624#endif
3625};
3626
3627/// A recipe for widening load operations with vector-predication intrinsics,
3628/// using the address to load from, the explicit vector length and an optional
3629/// mask.
3630struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe,
3631 public VPRecipeValue {
3632 VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL,
3633 VPValue *Mask)
3634 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadEVLSC, L.getIngredient(),
3635 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3636 L.getDebugLoc()),
3637 VPRecipeValue(this, &getIngredient()) {
3638 setMask(Mask);
3639 }
3640
3641 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3642
3643 /// Return the EVL operand.
3644 VPValue *getEVL() const { return getOperand(N: 1); }
3645
3646 /// Generate the wide load or gather.
3647 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3648
3649 /// Return the cost of this VPWidenLoadEVLRecipe.
3650 LLVM_ABI_FOR_TEST InstructionCost
3651 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3652
3653 /// Returns true if the recipe only uses the first lane of operand \p Op.
3654 bool usesFirstLaneOnly(const VPValue *Op) const override {
3655 assert(is_contained(operands(), Op) &&
3656 "Op must be an operand of the recipe");
3657 // Widened loads only demand the first lane of EVL and consecutive loads
3658 // only demand the first lane of their address.
3659 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3660 }
3661
3662protected:
3663#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3664 /// Print the recipe.
3665 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3666 VPSlotTracker &SlotTracker) const override;
3667#endif
3668};
3669
3670/// A recipe for widening store operations, using the stored value, the address
3671/// to store to and an optional mask.
3672struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
3673 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3674 VPValue *Mask, bool Consecutive, bool Reverse,
3675 const VPIRMetadata &Metadata, DebugLoc DL)
3676 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreSC, Store,
3677 {Addr, StoredVal}, Consecutive, Reverse, Metadata,
3678 DL) {
3679 setMask(Mask);
3680 }
3681
3682 VPWidenStoreRecipe *clone() override {
3683 return new VPWidenStoreRecipe(cast<StoreInst>(Val&: Ingredient), getAddr(),
3684 getStoredValue(), getMask(), Consecutive,
3685 Reverse, *this, getDebugLoc());
3686 }
3687
3688 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3689
3690 /// Return the value stored by this recipe.
3691 VPValue *getStoredValue() const { return getOperand(N: 1); }
3692
3693 /// Generate a wide store or scatter.
3694 void execute(VPTransformState &State) override;
3695
3696 /// Returns true if the recipe only uses the first lane of operand \p Op.
3697 bool usesFirstLaneOnly(const VPValue *Op) const override {
3698 assert(is_contained(operands(), Op) &&
3699 "Op must be an operand of the recipe");
3700 // Widened, consecutive stores only demand the first lane of their address,
3701 // unless the same operand is also stored.
3702 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3703 }
3704
3705protected:
3706#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3707 /// Print the recipe.
3708 void printRecipe(raw_ostream &O, const Twine &Indent,
3709 VPSlotTracker &SlotTracker) const override;
3710#endif
3711};
3712
3713/// A recipe for widening store operations with vector-predication intrinsics,
3714/// using the value to store, the address to store to, the explicit vector
3715/// length and an optional mask.
3716struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
3717 VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr,
3718 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3719 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreEVLSC, S.getIngredient(),
3720 {Addr, StoredVal, &EVL}, S.isConsecutive(),
3721 S.isReverse(), S, S.getDebugLoc()) {
3722 setMask(Mask);
3723 }
3724
3725 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3726
3727 /// Return the address accessed by this recipe.
3728 VPValue *getStoredValue() const { return getOperand(N: 1); }
3729
3730 /// Return the EVL operand.
3731 VPValue *getEVL() const { return getOperand(N: 2); }
3732
3733 /// Generate the wide store or scatter.
3734 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3735
3736 /// Return the cost of this VPWidenStoreEVLRecipe.
3737 LLVM_ABI_FOR_TEST InstructionCost
3738 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3739
3740 /// Returns true if the recipe only uses the first lane of operand \p Op.
3741 bool usesFirstLaneOnly(const VPValue *Op) const override {
3742 assert(is_contained(operands(), Op) &&
3743 "Op must be an operand of the recipe");
3744 if (Op == getEVL()) {
3745 assert(getStoredValue() != Op && "unexpected store of EVL");
3746 return true;
3747 }
3748 // Widened, consecutive memory operations only demand the first lane of
3749 // their address, unless the same operand is also stored. That latter can
3750 // happen with opaque pointers.
3751 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3752 }
3753
3754protected:
3755#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3756 /// Print the recipe.
3757 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3758 VPSlotTracker &SlotTracker) const override;
3759#endif
3760};
3761
3762/// Recipe to expand a SCEV expression.
3763class VPExpandSCEVRecipe : public VPSingleDefRecipe {
3764 const SCEV *Expr;
3765
3766public:
3767 VPExpandSCEVRecipe(const SCEV *Expr)
3768 : VPSingleDefRecipe(VPRecipeBase::VPExpandSCEVSC, {}), Expr(Expr) {}
3769
3770 ~VPExpandSCEVRecipe() override = default;
3771
3772 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3773
3774 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
3775
3776 void execute(VPTransformState &State) override {
3777 llvm_unreachable("SCEV expressions must be expanded before final execute");
3778 }
3779
3780 /// Return the cost of this VPExpandSCEVRecipe.
3781 InstructionCost computeCost(ElementCount VF,
3782 VPCostContext &Ctx) const override {
3783 // TODO: Compute accurate cost after retiring the legacy cost model.
3784 return 0;
3785 }
3786
3787 const SCEV *getSCEV() const { return Expr; }
3788
3789protected:
3790#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3791 /// Print the recipe.
3792 void printRecipe(raw_ostream &O, const Twine &Indent,
3793 VPSlotTracker &SlotTracker) const override;
3794#endif
3795};
3796
3797/// Canonical scalar induction phi of the vector loop. Starting at the specified
3798/// start value (either 0 or the resume value when vectorizing the epilogue
3799/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3800/// canonical induction variable.
3801class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
3802public:
3803 VPCanonicalIVPHIRecipe(VPIRValue *StartV, DebugLoc DL)
3804 : VPHeaderPHIRecipe(VPRecipeBase::VPCanonicalIVPHISC, nullptr, StartV,
3805 DL) {}
3806
3807 ~VPCanonicalIVPHIRecipe() override = default;
3808
3809 VPCanonicalIVPHIRecipe *clone() override {
3810 auto *R = new VPCanonicalIVPHIRecipe(getStartValue(), getDebugLoc());
3811 R->addOperand(Operand: getBackedgeValue());
3812 return R;
3813 }
3814
3815 VP_CLASSOF_IMPL(VPRecipeBase::VPCanonicalIVPHISC)
3816
3817 void execute(VPTransformState &State) override {
3818 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3819 "scalar phi recipe");
3820 }
3821
3822 /// Returns the start value of the canonical induction.
3823 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
3824
3825 /// Returns the scalar type of the induction.
3826 Type *getScalarType() const { return getStartValue()->getType(); }
3827
3828 /// Returns true if the recipe only uses the first lane of operand \p Op.
3829 bool usesFirstLaneOnly(const VPValue *Op) const override {
3830 assert(is_contained(operands(), Op) &&
3831 "Op must be an operand of the recipe");
3832 return true;
3833 }
3834
3835 /// Returns true if the recipe only uses the first part of operand \p Op.
3836 bool usesFirstPartOnly(const VPValue *Op) const override {
3837 assert(is_contained(operands(), Op) &&
3838 "Op must be an operand of the recipe");
3839 return true;
3840 }
3841
3842 /// Return the cost of this VPCanonicalIVPHIRecipe.
3843 InstructionCost computeCost(ElementCount VF,
3844 VPCostContext &Ctx) const override {
3845 // For now, match the behavior of the legacy cost model.
3846 return 0;
3847 }
3848
3849protected:
3850#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3851 /// Print the recipe.
3852 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3853 VPSlotTracker &SlotTracker) const override;
3854#endif
3855};
3856
3857/// A recipe for generating the active lane mask for the vector loop that is
3858/// used to predicate the vector operations.
3859class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
3860public:
3861 VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
3862 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
3863 StartMask, DL) {}
3864
3865 ~VPActiveLaneMaskPHIRecipe() override = default;
3866
3867 VPActiveLaneMaskPHIRecipe *clone() override {
3868 auto *R = new VPActiveLaneMaskPHIRecipe(getOperand(N: 0), getDebugLoc());
3869 if (getNumOperands() == 2)
3870 R->addOperand(Operand: getOperand(N: 1));
3871 return R;
3872 }
3873
3874 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
3875
3876 /// Generate the active lane mask phi of the vector loop.
3877 void execute(VPTransformState &State) override;
3878
3879protected:
3880#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3881 /// Print the recipe.
3882 void printRecipe(raw_ostream &O, const Twine &Indent,
3883 VPSlotTracker &SlotTracker) const override;
3884#endif
3885};
3886
3887/// A recipe for generating the phi node tracking the current scalar iteration
3888/// index. It starts at the start value of the canonical induction and gets
3889/// incremented by the number of scalar iterations processed by the vector loop
3890/// iteration. The increment does not have to be loop invariant.
3891class VPCurrentIterationPHIRecipe : public VPHeaderPHIRecipe {
3892public:
3893 VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
3894 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
3895 StartIV, DL) {}
3896
3897 ~VPCurrentIterationPHIRecipe() override = default;
3898
3899 VPCurrentIterationPHIRecipe *clone() override {
3900 llvm_unreachable("cloning not implemented yet");
3901 }
3902
3903 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
3904
3905 void execute(VPTransformState &State) override {
3906 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3907 "scalar phi recipe");
3908 }
3909
3910 /// Return the cost of this VPCurrentIterationPHIRecipe.
3911 InstructionCost computeCost(ElementCount VF,
3912 VPCostContext &Ctx) const override {
3913 // For now, match the behavior of the legacy cost model.
3914 return 0;
3915 }
3916
3917 /// Returns true if the recipe only uses the first lane of operand \p Op.
3918 bool usesFirstLaneOnly(const VPValue *Op) const override {
3919 assert(is_contained(operands(), Op) &&
3920 "Op must be an operand of the recipe");
3921 return true;
3922 }
3923
3924protected:
3925#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3926 /// Print the recipe.
3927 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3928 VPSlotTracker &SlotTracker) const override;
3929#endif
3930};
3931
3932/// A Recipe for widening the canonical induction variable of the vector loop.
3933class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe,
3934 public VPUnrollPartAccessor<1> {
3935public:
3936 VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
3937 : VPSingleDefRecipe(VPRecipeBase::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3938
3939 ~VPWidenCanonicalIVRecipe() override = default;
3940
3941 VPWidenCanonicalIVRecipe *clone() override {
3942 return new VPWidenCanonicalIVRecipe(
3943 cast<VPCanonicalIVPHIRecipe>(Val: getOperand(N: 0)));
3944 }
3945
3946 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
3947
3948 /// Generate a canonical vector induction variable of the vector loop, with
3949 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3950 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3951 void execute(VPTransformState &State) override;
3952
3953 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3954 InstructionCost computeCost(ElementCount VF,
3955 VPCostContext &Ctx) const override {
3956 // TODO: Compute accurate cost after retiring the legacy cost model.
3957 return 0;
3958 }
3959
3960protected:
3961#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3962 /// Print the recipe.
3963 void printRecipe(raw_ostream &O, const Twine &Indent,
3964 VPSlotTracker &SlotTracker) const override;
3965#endif
3966};
3967
3968/// A recipe for converting the input value \p IV value to the corresponding
3969/// value of an IV with different start and step values, using Start + IV *
3970/// Step.
3971class VPDerivedIVRecipe : public VPSingleDefRecipe {
3972 /// Kind of the induction.
3973 const InductionDescriptor::InductionKind Kind;
3974 /// If not nullptr, the floating point induction binary operator. Must be set
3975 /// for floating point inductions.
3976 const FPMathOperator *FPBinOp;
3977
3978 /// Name to use for the generated IR instruction for the derived IV.
3979 std::string Name;
3980
3981public:
3982 VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start,
3983 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3984 const Twine &Name = "")
3985 : VPDerivedIVRecipe(
3986 IndDesc.getKind(),
3987 dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp()),
3988 Start, CanonicalIV, Step, Name) {}
3989
3990 VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
3991 const FPMathOperator *FPBinOp, VPIRValue *Start,
3992 VPValue *IV, VPValue *Step, const Twine &Name = "")
3993 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step}),
3994 Kind(Kind), FPBinOp(FPBinOp), Name(Name.str()) {}
3995
3996 ~VPDerivedIVRecipe() override = default;
3997
3998 VPDerivedIVRecipe *clone() override {
3999 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(N: 1),
4000 getStepValue());
4001 }
4002
4003 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
4004
4005 /// Generate the transformed value of the induction at offset StartValue (1.
4006 /// operand) + IV (2. operand) * StepValue (3, operand).
4007 void execute(VPTransformState &State) override;
4008
4009 /// Return the cost of this VPDerivedIVRecipe.
4010 InstructionCost computeCost(ElementCount VF,
4011 VPCostContext &Ctx) const override {
4012 // TODO: Compute accurate cost after retiring the legacy cost model.
4013 return 0;
4014 }
4015
4016 Type *getScalarType() const { return getStartValue()->getType(); }
4017
4018 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
4019 VPValue *getStepValue() const { return getOperand(N: 2); }
4020
4021 /// Returns true if the recipe only uses the first lane of operand \p Op.
4022 bool usesFirstLaneOnly(const VPValue *Op) const override {
4023 assert(is_contained(operands(), Op) &&
4024 "Op must be an operand of the recipe");
4025 return true;
4026 }
4027
4028protected:
4029#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4030 /// Print the recipe.
4031 void printRecipe(raw_ostream &O, const Twine &Indent,
4032 VPSlotTracker &SlotTracker) const override;
4033#endif
4034};
4035
4036/// A recipe for handling phi nodes of integer and floating-point inductions,
4037/// producing their scalar values. Before unrolling by UF the recipe represents
4038/// the VF*UF scalar values to be produced, or UF scalar values if only first
4039/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
4040/// operand StartIndex to all unroll parts except part 0, as the recipe
4041/// represents the VF scalar values (this number of values is taken from
4042/// State.VF rather than from the VF operand) starting at IV + StartIndex.
4043class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
4044 Instruction::BinaryOps InductionOpcode;
4045
4046public:
4047 VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF,
4048 Instruction::BinaryOps Opcode, FastMathFlags FMFs,
4049 DebugLoc DL)
4050 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
4051 FMFs, DL),
4052 InductionOpcode(Opcode) {}
4053
4054 VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
4055 VPValue *Step, VPValue *VF,
4056 DebugLoc DL = DebugLoc::getUnknown())
4057 : VPScalarIVStepsRecipe(
4058 IV, Step, VF, IndDesc.getInductionOpcode(),
4059 dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp())
4060 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4061 : FastMathFlags(),
4062 DL) {}
4063
4064 ~VPScalarIVStepsRecipe() override = default;
4065
4066 VPScalarIVStepsRecipe *clone() override {
4067 auto *NewR = new VPScalarIVStepsRecipe(getOperand(N: 0), getOperand(N: 1),
4068 getOperand(N: 2), InductionOpcode,
4069 getFastMathFlags(), getDebugLoc());
4070 if (VPValue *StartIndex = getStartIndex())
4071 NewR->setStartIndex(StartIndex);
4072 return NewR;
4073 }
4074
4075 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4076
4077 /// Generate the scalarized versions of the phi node as needed by their users.
4078 void execute(VPTransformState &State) override;
4079
4080 /// Return the cost of this VPScalarIVStepsRecipe.
4081 InstructionCost computeCost(ElementCount VF,
4082 VPCostContext &Ctx) const override {
4083 // TODO: Compute accurate cost after retiring the legacy cost model.
4084 return 0;
4085 }
4086
4087 VPValue *getStepValue() const { return getOperand(N: 1); }
4088
4089 /// Return the number of scalars to produce per unroll part, used to compute
4090 /// StartIndex during unrolling.
4091 VPValue *getVFValue() const { return getOperand(N: 2); }
4092
4093 /// Return the StartIndex, or null if known to be zero, valid only after
4094 /// unrolling.
4095 VPValue *getStartIndex() const {
4096 return getNumOperands() == 4 ? getOperand(N: 3) : nullptr;
4097 }
4098
4099 /// Set or add the StartIndex operand.
4100 void setStartIndex(VPValue *StartIndex) {
4101 if (getNumOperands() == 4)
4102 setOperand(I: 3, New: StartIndex);
4103 else
4104 addOperand(Operand: StartIndex);
4105 }
4106
4107 /// Returns true if the recipe only uses the first lane of operand \p Op.
4108 bool usesFirstLaneOnly(const VPValue *Op) const override {
4109 assert(is_contained(operands(), Op) &&
4110 "Op must be an operand of the recipe");
4111 return true;
4112 }
4113
4114 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4115
4116protected:
4117#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4118 /// Print the recipe.
4119 void printRecipe(raw_ostream &O, const Twine &Indent,
4120 VPSlotTracker &SlotTracker) const override;
4121#endif
4122};
4123
4124/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
4125/// types implementing VPPhiAccessors. Used by isa<> & co.
4126template <> struct CastIsPossible<VPPhiAccessors, const VPRecipeBase *> {
4127 static inline bool isPossible(const VPRecipeBase *f) {
4128 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
4129 return isa<VPIRPhi, VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPhi>(Val: f);
4130 }
4131};
4132/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
4133/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
4134template <typename SrcTy>
4135struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
4136
4137 using Self = CastInfo<VPPhiAccessors, SrcTy>;
4138
4139 /// doCast is used by cast<>.
4140 static inline VPPhiAccessors *doCast(SrcTy R) {
4141 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
4142 switch (R->getVPRecipeID()) {
4143 case VPRecipeBase::VPInstructionSC:
4144 return cast<VPPhi>(R);
4145 case VPRecipeBase::VPIRInstructionSC:
4146 return cast<VPIRPhi>(R);
4147 case VPRecipeBase::VPWidenPHISC:
4148 return cast<VPWidenPHIRecipe>(R);
4149 default:
4150 return cast<VPHeaderPHIRecipe>(R);
4151 }
4152 }());
4153 }
4154
4155 /// doCastIfPossible is used by dyn_cast<>.
4156 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
4157 if (!Self::isPossible(f))
4158 return nullptr;
4159 return doCast(R: f);
4160 }
4161};
4162template <>
4163struct CastInfo<VPPhiAccessors, VPRecipeBase *>
4164 : CastInfoVPPhiAccessors<VPRecipeBase *> {};
4165template <>
4166struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
4167 : CastInfoVPPhiAccessors<const VPRecipeBase *> {};
4168
4169/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
4170/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
4171namespace detail {
4172template <typename DstTy, typename RecipeBasePtrTy>
4173static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
4174 switch (R->getVPRecipeID()) {
4175 case VPRecipeBase::VPInstructionSC:
4176 return cast<VPInstruction>(R);
4177 case VPRecipeBase::VPWidenSC:
4178 return cast<VPWidenRecipe>(R);
4179 case VPRecipeBase::VPWidenCastSC:
4180 return cast<VPWidenCastRecipe>(R);
4181 case VPRecipeBase::VPWidenIntrinsicSC:
4182 return cast<VPWidenIntrinsicRecipe>(R);
4183 case VPRecipeBase::VPWidenCallSC:
4184 return cast<VPWidenCallRecipe>(R);
4185 case VPRecipeBase::VPReplicateSC:
4186 return cast<VPReplicateRecipe>(R);
4187 case VPRecipeBase::VPInterleaveSC:
4188 case VPRecipeBase::VPInterleaveEVLSC:
4189 return cast<VPInterleaveBase>(R);
4190 case VPRecipeBase::VPWidenLoadSC:
4191 case VPRecipeBase::VPWidenLoadEVLSC:
4192 case VPRecipeBase::VPWidenStoreSC:
4193 case VPRecipeBase::VPWidenStoreEVLSC:
4194 return cast<VPWidenMemoryRecipe>(R);
4195 default:
4196 llvm_unreachable("invalid recipe for VPIRMetadata cast");
4197 }
4198}
4199} // namespace detail
4200
4201/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
4202/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
4203template <typename DstTy, typename SrcTy>
4204struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
4205 static inline bool isPossible(SrcTy R) {
4206 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
4207 // also handled in castToVPIRMetadata.
4208 return isa<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
4209 VPWidenIntrinsicRecipe, VPWidenCallRecipe, VPReplicateRecipe,
4210 VPInterleaveRecipe, VPInterleaveEVLRecipe, VPWidenLoadRecipe,
4211 VPWidenLoadEVLRecipe, VPWidenStoreRecipe, VPWidenStoreEVLRecipe>(
4212 R);
4213 }
4214
4215 using RetTy = DstTy *;
4216
4217 /// doCast is used by cast<>.
4218 static inline RetTy doCast(SrcTy R) {
4219 return detail::castToVPIRMetadata<RetTy, SrcTy>(R);
4220 }
4221
4222 /// doCastIfPossible is used by dyn_cast<>.
4223 static inline RetTy doCastIfPossible(SrcTy R) {
4224 if (!isPossible(R))
4225 return nullptr;
4226 return doCast(R);
4227 }
4228};
4229template <>
4230struct CastInfo<VPIRMetadata, VPRecipeBase *>
4231 : CastInfoVPIRMetadata<VPIRMetadata, VPRecipeBase *> {};
4232template <>
4233struct CastInfo<VPIRMetadata, const VPRecipeBase *>
4234 : CastInfoVPIRMetadata<const VPIRMetadata, const VPRecipeBase *> {};
4235
4236/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4237/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4238/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4239class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4240 friend class VPlan;
4241
4242 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4243 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4244 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4245 if (Recipe)
4246 appendRecipe(Recipe);
4247 }
4248
4249public:
4250 using RecipeListTy = iplist<VPRecipeBase>;
4251
4252protected:
4253 /// The VPRecipes held in the order of output instructions to generate.
4254 RecipeListTy Recipes;
4255
4256 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4257 : VPBlockBase(BlockSC, Name.str()) {}
4258
4259public:
4260 ~VPBasicBlock() override {
4261 while (!Recipes.empty())
4262 Recipes.pop_back();
4263 }
4264
4265 /// Instruction iterators...
4266 using iterator = RecipeListTy::iterator;
4267 using const_iterator = RecipeListTy::const_iterator;
4268 using reverse_iterator = RecipeListTy::reverse_iterator;
4269 using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
4270
4271 //===--------------------------------------------------------------------===//
4272 /// Recipe iterator methods
4273 ///
4274 inline iterator begin() { return Recipes.begin(); }
4275 inline const_iterator begin() const { return Recipes.begin(); }
4276 inline iterator end() { return Recipes.end(); }
4277 inline const_iterator end() const { return Recipes.end(); }
4278
4279 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4280 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4281 inline reverse_iterator rend() { return Recipes.rend(); }
4282 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4283
4284 inline size_t size() const { return Recipes.size(); }
4285 inline bool empty() const { return Recipes.empty(); }
4286 inline const VPRecipeBase &front() const { return Recipes.front(); }
4287 inline VPRecipeBase &front() { return Recipes.front(); }
4288 inline const VPRecipeBase &back() const { return Recipes.back(); }
4289 inline VPRecipeBase &back() { return Recipes.back(); }
4290
4291 /// Returns a reference to the list of recipes.
4292 RecipeListTy &getRecipeList() { return Recipes; }
4293
4294 /// Returns a pointer to a member of the recipe list.
4295 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4296 return &VPBasicBlock::Recipes;
4297 }
4298
4299 /// Method to support type inquiry through isa, cast, and dyn_cast.
4300 static inline bool classof(const VPBlockBase *V) {
4301 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4302 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4303 }
4304
4305 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4306 assert(Recipe && "No recipe to append.");
4307 assert(!Recipe->Parent && "Recipe already in VPlan");
4308 Recipe->Parent = this;
4309 Recipes.insert(where: InsertPt, New: Recipe);
4310 }
4311
4312 /// Augment the existing recipes of a VPBasicBlock with an additional
4313 /// \p Recipe as the last recipe.
4314 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, InsertPt: end()); }
4315
4316 /// The method which generates the output IR instructions that correspond to
4317 /// this VPBasicBlock, thereby "executing" the VPlan.
4318 void execute(VPTransformState *State) override;
4319
4320 /// Return the cost of this VPBasicBlock.
4321 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4322
4323 /// Return the position of the first non-phi node recipe in the block.
4324 iterator getFirstNonPhi();
4325
4326 /// Returns an iterator range over the PHI-like recipes in the block.
4327 iterator_range<iterator> phis() {
4328 return make_range(x: begin(), y: getFirstNonPhi());
4329 }
4330
4331 /// Split current block at \p SplitAt by inserting a new block between the
4332 /// current block and its successors and moving all recipes starting at
4333 /// SplitAt to the new block. Returns the new block.
4334 VPBasicBlock *splitAt(iterator SplitAt);
4335
4336 VPRegionBlock *getEnclosingLoopRegion();
4337 const VPRegionBlock *getEnclosingLoopRegion() const;
4338
4339#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4340 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4341 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4342 ///
4343 /// Note that the numbering is applied to the whole VPlan, so printing
4344 /// individual blocks is consistent with the whole VPlan printing.
4345 void print(raw_ostream &O, const Twine &Indent,
4346 VPSlotTracker &SlotTracker) const override;
4347 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4348#endif
4349
4350 /// If the block has multiple successors, return the branch recipe terminating
4351 /// the block. If there are no or only a single successor, return nullptr;
4352 VPRecipeBase *getTerminator();
4353 const VPRecipeBase *getTerminator() const;
4354
4355 /// Returns true if the block is exiting it's parent region.
4356 bool isExiting() const;
4357
4358 /// Clone the current block and it's recipes, without updating the operands of
4359 /// the cloned recipes.
4360 VPBasicBlock *clone() override;
4361
4362 /// Returns the predecessor block at index \p Idx with the predecessors as per
4363 /// the corresponding plain CFG. If the block is an entry block to a region,
4364 /// the first predecessor is the single predecessor of a region, and the
4365 /// second predecessor is the exiting block of the region.
4366 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4367
4368protected:
4369 /// Execute the recipes in the IR basic block \p BB.
4370 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4371
4372 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4373 /// generated for this VPBB.
4374 void connectToPredecessors(VPTransformState &State);
4375
4376private:
4377 /// Create an IR BasicBlock to hold the output instructions generated by this
4378 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4379 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4380};
4381
4382inline const VPBasicBlock *
4383VPPhiAccessors::getIncomingBlock(unsigned Idx) const {
4384 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4385}
4386
4387/// A special type of VPBasicBlock that wraps an existing IR basic block.
4388/// Recipes of the block get added before the first non-phi instruction in the
4389/// wrapped block.
4390/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4391/// preheader block.
4392class VPIRBasicBlock : public VPBasicBlock {
4393 friend class VPlan;
4394
4395 BasicBlock *IRBB;
4396
4397 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4398 VPIRBasicBlock(BasicBlock *IRBB)
4399 : VPBasicBlock(VPIRBasicBlockSC,
4400 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4401 IRBB(IRBB) {}
4402
4403public:
4404 ~VPIRBasicBlock() override = default;
4405
4406 static inline bool classof(const VPBlockBase *V) {
4407 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4408 }
4409
4410 /// The method which generates the output IR instructions that correspond to
4411 /// this VPBasicBlock, thereby "executing" the VPlan.
4412 void execute(VPTransformState *State) override;
4413
4414 VPIRBasicBlock *clone() override;
4415
4416 BasicBlock *getIRBasicBlock() const { return IRBB; }
4417};
4418
4419/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4420/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4421/// A VPRegionBlock may indicate that its contents are to be replicated several
4422/// times. This is designed to support predicated scalarization, in which a
4423/// scalar if-then code structure needs to be generated VF * UF times. Having
4424/// this replication indicator helps to keep a single model for multiple
4425/// candidate VF's. The actual replication takes place only once the desired VF
4426/// and UF have been determined.
4427class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4428 friend class VPlan;
4429
4430 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4431 VPBlockBase *Entry;
4432
4433 /// Hold the Single Exiting block of the SESE region modelled by the
4434 /// VPRegionBlock.
4435 VPBlockBase *Exiting;
4436
4437 /// An indicator whether this region is to generate multiple replicated
4438 /// instances of output IR corresponding to its VPBlockBases.
4439 bool IsReplicator;
4440
4441 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4442 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4443 const std::string &Name = "", bool IsReplicator = false)
4444 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4445 IsReplicator(IsReplicator) {
4446 if (Entry) {
4447 assert(!Entry->hasPredecessors() && "Entry block has predecessors.");
4448 assert(Exiting && "Must also pass Exiting if Entry is passed.");
4449 assert(!Exiting->hasSuccessors() && "Exit block has successors.");
4450 Entry->setParent(this);
4451 Exiting->setParent(this);
4452 }
4453 }
4454
4455public:
4456 ~VPRegionBlock() override = default;
4457
4458 /// Method to support type inquiry through isa, cast, and dyn_cast.
4459 static inline bool classof(const VPBlockBase *V) {
4460 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4461 }
4462
4463 const VPBlockBase *getEntry() const { return Entry; }
4464 VPBlockBase *getEntry() { return Entry; }
4465
4466 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4467 /// EntryBlock must have no predecessors.
4468 void setEntry(VPBlockBase *EntryBlock) {
4469 assert(!EntryBlock->hasPredecessors() &&
4470 "Entry block cannot have predecessors.");
4471 Entry = EntryBlock;
4472 EntryBlock->setParent(this);
4473 }
4474
4475 const VPBlockBase *getExiting() const { return Exiting; }
4476 VPBlockBase *getExiting() { return Exiting; }
4477
4478 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4479 /// ExitingBlock must have no successors.
4480 void setExiting(VPBlockBase *ExitingBlock) {
4481 assert(!ExitingBlock->hasSuccessors() &&
4482 "Exit block cannot have successors.");
4483 Exiting = ExitingBlock;
4484 ExitingBlock->setParent(this);
4485 }
4486
4487 /// Returns the pre-header VPBasicBlock of the loop region.
4488 VPBasicBlock *getPreheaderVPBB() {
4489 assert(!isReplicator() && "should only get pre-header of loop regions");
4490 return getSinglePredecessor()->getExitingBasicBlock();
4491 }
4492
4493 /// An indicator whether this region is to generate multiple replicated
4494 /// instances of output IR corresponding to its VPBlockBases.
4495 bool isReplicator() const { return IsReplicator; }
4496
4497 /// The method which generates the output IR instructions that correspond to
4498 /// this VPRegionBlock, thereby "executing" the VPlan.
4499 void execute(VPTransformState *State) override;
4500
4501 // Return the cost of this region.
4502 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4503
4504#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4505 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4506 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4507 /// consequtive numbers.
4508 ///
4509 /// Note that the numbering is applied to the whole VPlan, so printing
4510 /// individual regions is consistent with the whole VPlan printing.
4511 void print(raw_ostream &O, const Twine &Indent,
4512 VPSlotTracker &SlotTracker) const override;
4513 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4514#endif
4515
4516 /// Clone all blocks in the single-entry single-exit region of the block and
4517 /// their recipes without updating the operands of the cloned recipes.
4518 VPRegionBlock *clone() override;
4519
4520 /// Remove the current region from its VPlan, connecting its predecessor to
4521 /// its entry, and its exiting block to its successor.
4522 void dissolveToCFGLoop();
4523
4524 /// Returns the canonical induction recipe of the region.
4525 VPCanonicalIVPHIRecipe *getCanonicalIV() {
4526 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4527 if (EntryVPBB->empty()) {
4528 // VPlan native path. TODO: Unify both code paths.
4529 EntryVPBB = cast<VPBasicBlock>(Val: EntryVPBB->getSingleSuccessor());
4530 }
4531 return cast<VPCanonicalIVPHIRecipe>(Val: &*EntryVPBB->begin());
4532 }
4533 const VPCanonicalIVPHIRecipe *getCanonicalIV() const {
4534 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4535 }
4536
4537 /// Return the type of the canonical IV for loop regions.
4538 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4539 const Type *getCanonicalIVType() const {
4540 return getCanonicalIV()->getScalarType();
4541 }
4542};
4543
4544inline VPRegionBlock *VPRecipeBase::getRegion() {
4545 return getParent()->getParent();
4546}
4547
4548inline const VPRegionBlock *VPRecipeBase::getRegion() const {
4549 return getParent()->getParent();
4550}
4551
4552/// VPlan models a candidate for vectorization, encoding various decisions take
4553/// to produce efficient output IR, including which branches, basic-blocks and
4554/// output IR instructions to generate, and their cost. VPlan holds a
4555/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4556/// VPBasicBlock.
4557class VPlan {
4558 friend class VPlanPrinter;
4559 friend class VPSlotTracker;
4560
4561 /// VPBasicBlock corresponding to the original preheader. Used to place
4562 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4563 /// rest of VPlan execution.
4564 /// When this VPlan is used for the epilogue vector loop, the entry will be
4565 /// replaced by a new entry block created during skeleton creation.
4566 VPBasicBlock *Entry;
4567
4568 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4569 VPIRBasicBlock *ScalarHeader;
4570
4571 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4572 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4573 /// e.g. if the scalar epilogue always executes.
4574 SmallVector<VPIRBasicBlock *, 2> ExitBlocks;
4575
4576 /// Holds the VFs applicable to this VPlan.
4577 SmallSetVector<ElementCount, 2> VFs;
4578
4579 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4580 /// any UF.
4581 SmallSetVector<unsigned, 2> UFs;
4582
4583 /// Holds the name of the VPlan, for printing.
4584 std::string Name;
4585
4586 /// Represents the trip count of the original loop, for folding
4587 /// the tail.
4588 VPValue *TripCount = nullptr;
4589
4590 /// Represents the backedge taken count of the original loop, for folding
4591 /// the tail. It equals TripCount - 1.
4592 VPSymbolicValue *BackedgeTakenCount = nullptr;
4593
4594 /// Represents the vector trip count.
4595 VPSymbolicValue VectorTripCount;
4596
4597 /// Represents the vectorization factor of the loop.
4598 VPSymbolicValue VF;
4599
4600 /// Represents the unroll factor of the loop.
4601 VPSymbolicValue UF;
4602
4603 /// Represents the loop-invariant VF * UF of the vector loop region.
4604 VPSymbolicValue VFxUF;
4605
4606 /// Contains all the external definitions created for this VPlan, as a mapping
4607 /// from IR Values to VPIRValues.
4608 SmallMapVector<Value *, VPIRValue *, 16> LiveIns;
4609
4610 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4611 /// VPlan is destroyed.
4612 SmallVector<VPBlockBase *> CreatedBlocks;
4613
4614 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4615 /// wrapping the original header of the scalar loop.
4616 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4617 : Entry(Entry), ScalarHeader(ScalarHeader) {
4618 Entry->setPlan(this);
4619 assert(ScalarHeader->getNumSuccessors() == 0 &&
4620 "scalar header must be a leaf node");
4621 }
4622
4623public:
4624 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4625 /// original preheader and scalar header of \p L, to be used as entry and
4626 /// scalar header blocks of the new VPlan.
4627 VPlan(Loop *L);
4628
4629 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4630 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4631 VPlan(BasicBlock *ScalarHeaderBB) {
4632 setEntry(createVPBasicBlock(Name: "preheader"));
4633 ScalarHeader = createVPIRBasicBlock(IRBB: ScalarHeaderBB);
4634 }
4635
4636 LLVM_ABI_FOR_TEST ~VPlan();
4637
4638 void setEntry(VPBasicBlock *VPBB) {
4639 Entry = VPBB;
4640 VPBB->setPlan(this);
4641 }
4642
4643 /// Generate the IR code for this VPlan.
4644 void execute(VPTransformState *State);
4645
4646 /// Return the cost of this plan.
4647 InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
4648
4649 VPBasicBlock *getEntry() { return Entry; }
4650 const VPBasicBlock *getEntry() const { return Entry; }
4651
4652 /// Returns the preheader of the vector loop region, if one exists, or null
4653 /// otherwise.
4654 VPBasicBlock *getVectorPreheader() {
4655 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4656 return VectorRegion
4657 ? cast<VPBasicBlock>(Val: VectorRegion->getSinglePredecessor())
4658 : nullptr;
4659 }
4660
4661 /// Returns the VPRegionBlock of the vector loop.
4662 LLVM_ABI_FOR_TEST VPRegionBlock *getVectorLoopRegion();
4663 LLVM_ABI_FOR_TEST const VPRegionBlock *getVectorLoopRegion() const;
4664
4665 /// Returns the 'middle' block of the plan, that is the block that selects
4666 /// whether to execute the scalar tail loop or the exit block from the loop
4667 /// latch. If there is an early exit from the vector loop, the middle block
4668 /// conceptully has the early exit block as third successor, split accross 2
4669 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4670 /// tail loop or the exit block. If the scalar tail loop or exit block are
4671 /// known to always execute, the middle block may branch directly to that
4672 /// block. This function cannot be called once the vector loop region has been
4673 /// removed.
4674 VPBasicBlock *getMiddleBlock() {
4675 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4676 assert(
4677 LoopRegion &&
4678 "cannot call the function after vector loop region has been removed");
4679 // The middle block is always the last successor of the region.
4680 return cast<VPBasicBlock>(Val: LoopRegion->getSuccessors().back());
4681 }
4682
4683 const VPBasicBlock *getMiddleBlock() const {
4684 return const_cast<VPlan *>(this)->getMiddleBlock();
4685 }
4686
4687 /// Return the VPBasicBlock for the preheader of the scalar loop.
4688 VPBasicBlock *getScalarPreheader() const {
4689 return cast<VPBasicBlock>(Val: getScalarHeader()->getSinglePredecessor());
4690 }
4691
4692 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4693 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4694
4695 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4696 /// the original scalar loop.
4697 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4698
4699 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4700 /// exit block.
4701 VPIRBasicBlock *getExitBlock(BasicBlock *IRBB) const;
4702
4703 /// Returns true if \p VPBB is an exit block.
4704 bool isExitBlock(VPBlockBase *VPBB);
4705
4706 /// The trip count of the original loop.
4707 VPValue *getTripCount() const {
4708 assert(TripCount && "trip count needs to be set before accessing it");
4709 return TripCount;
4710 }
4711
4712 /// Set the trip count assuming it is currently null; if it is not - use
4713 /// resetTripCount().
4714 void setTripCount(VPValue *NewTripCount) {
4715 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4716 TripCount = NewTripCount;
4717 }
4718
4719 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4720 /// the original trip count have been replaced.
4721 void resetTripCount(VPValue *NewTripCount) {
4722 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4723 "TripCount must be set when resetting");
4724 TripCount = NewTripCount;
4725 }
4726
4727 /// The backedge taken count of the original loop.
4728 VPValue *getOrCreateBackedgeTakenCount() {
4729 if (!BackedgeTakenCount)
4730 BackedgeTakenCount = new VPSymbolicValue();
4731 return BackedgeTakenCount;
4732 }
4733 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4734
4735 /// The vector trip count.
4736 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4737
4738 /// Returns the VF of the vector loop region.
4739 VPSymbolicValue &getVF() { return VF; };
4740 const VPSymbolicValue &getVF() const { return VF; };
4741
4742 /// Returns the UF of the vector loop region.
4743 VPSymbolicValue &getUF() { return UF; };
4744
4745 /// Returns VF * UF of the vector loop region.
4746 VPSymbolicValue &getVFxUF() { return VFxUF; }
4747
4748 LLVMContext &getContext() const {
4749 return getScalarHeader()->getIRBasicBlock()->getContext();
4750 }
4751
4752 const DataLayout &getDataLayout() const {
4753 return getScalarHeader()->getIRBasicBlock()->getDataLayout();
4754 }
4755
4756 void addVF(ElementCount VF) { VFs.insert(X: VF); }
4757
4758 void setVF(ElementCount VF) {
4759 assert(hasVF(VF) && "Cannot set VF not already in plan");
4760 VFs.clear();
4761 VFs.insert(X: VF);
4762 }
4763
4764 /// Remove \p VF from the plan.
4765 void removeVF(ElementCount VF) {
4766 assert(hasVF(VF) && "tried to remove VF not present in plan");
4767 VFs.remove(X: VF);
4768 }
4769
4770 bool hasVF(ElementCount VF) const { return VFs.count(key: VF); }
4771 bool hasScalableVF() const {
4772 return any_of(Range: VFs, P: [](ElementCount VF) { return VF.isScalable(); });
4773 }
4774
4775 /// Returns an iterator range over all VFs of the plan.
4776 iterator_range<SmallSetVector<ElementCount, 2>::iterator>
4777 vectorFactors() const {
4778 return VFs;
4779 }
4780
4781 bool hasScalarVFOnly() const {
4782 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4783 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4784 "Plan with scalar VF should only have a single VF");
4785 return HasScalarVFOnly;
4786 }
4787
4788 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(key: UF); }
4789
4790 /// Returns the concrete UF of the plan, after unrolling.
4791 unsigned getConcreteUF() const {
4792 assert(UFs.size() == 1 && "Expected a single UF");
4793 return UFs[0];
4794 }
4795
4796 void setUF(unsigned UF) {
4797 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4798 UFs.clear();
4799 UFs.insert(X: UF);
4800 }
4801
4802 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4803 /// concrete UF.
4804 bool isUnrolled() const { return UFs.size() == 1; }
4805
4806 /// Return a string with the name of the plan and the applicable VFs and UFs.
4807 std::string getName() const;
4808
4809 void setName(const Twine &newName) { Name = newName.str(); }
4810
4811 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4812 /// yet) for \p V.
4813 VPIRValue *getOrAddLiveIn(Value *V) {
4814 assert(V && "Trying to get or add the VPIRValue of a null Value");
4815 auto [It, Inserted] = LiveIns.try_emplace(Key: V);
4816 if (Inserted) {
4817 if (auto *CI = dyn_cast<ConstantInt>(Val: V))
4818 It->second = new VPConstantInt(CI);
4819 else
4820 It->second = new VPIRValue(V);
4821 }
4822
4823 assert(isa<VPIRValue>(It->second) &&
4824 "Only VPIRValues should be in mapping");
4825 return It->second;
4826 }
4827 VPIRValue *getOrAddLiveIn(VPIRValue *V) {
4828 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4829 return getOrAddLiveIn(V: V->getValue());
4830 }
4831
4832 /// Return a VPIRValue wrapping i1 true.
4833 VPIRValue *getTrue() { return getConstantInt(BitWidth: 1, Val: 1); }
4834
4835 /// Return a VPIRValue wrapping i1 false.
4836 VPIRValue *getFalse() { return getConstantInt(BitWidth: 1, Val: 0); }
4837
4838 /// Return a VPIRValue wrapping the null value of type \p Ty.
4839 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, Val: 0); }
4840
4841 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
4842 VPIRValue *getAllOnesValue(Type *Ty) {
4843 return getConstantInt(Val: APInt::getAllOnes(numBits: Ty->getIntegerBitWidth()));
4844 }
4845
4846 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4847 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4848 return getOrAddLiveIn(V: ConstantInt::get(Ty, V: Val, IsSigned));
4849 }
4850
4851 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4852 /// value.
4853 VPIRValue *getConstantInt(unsigned BitWidth, uint64_t Val,
4854 bool IsSigned = false) {
4855 return getConstantInt(Val: APInt(BitWidth, Val, IsSigned));
4856 }
4857
4858 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4859 VPIRValue *getConstantInt(const APInt &Val) {
4860 return getOrAddLiveIn(V: ConstantInt::get(Context&: getContext(), V: Val));
4861 }
4862
4863 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4864 /// otherwise.
4865 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(Key: V); }
4866
4867 /// Return the list of live-in VPValues available in the VPlan.
4868 auto getLiveIns() const { return LiveIns.values(); }
4869
4870#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4871 /// Print the live-ins of this VPlan to \p O.
4872 void printLiveIns(raw_ostream &O) const;
4873
4874 /// Print this VPlan to \p O.
4875 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4876
4877 /// Print this VPlan in DOT format to \p O.
4878 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4879
4880 /// Dump the plan to stderr (for debugging).
4881 LLVM_DUMP_METHOD void dump() const;
4882#endif
4883
4884 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4885 /// recipes to refer to the clones, and return it.
4886 LLVM_ABI_FOR_TEST VPlan *duplicate();
4887
4888 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4889 /// present. The returned block is owned by the VPlan and deleted once the
4890 /// VPlan is destroyed.
4891 VPBasicBlock *createVPBasicBlock(const Twine &Name,
4892 VPRecipeBase *Recipe = nullptr) {
4893 auto *VPB = new VPBasicBlock(Name, Recipe);
4894 CreatedBlocks.push_back(Elt: VPB);
4895 return VPB;
4896 }
4897
4898 /// Create a new loop region with \p Name and entry and exiting blocks set
4899 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4900 /// owned by the VPlan and deleted once the VPlan is destroyed.
4901 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4902 VPBlockBase *Entry = nullptr,
4903 VPBlockBase *Exiting = nullptr) {
4904 auto *VPB = new VPRegionBlock(Entry, Exiting, Name);
4905 CreatedBlocks.push_back(Elt: VPB);
4906 return VPB;
4907 }
4908
4909 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4910 /// returned block is owned by the VPlan and deleted once the VPlan is
4911 /// destroyed.
4912 VPRegionBlock *createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting,
4913 const std::string &Name = "") {
4914 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4915 CreatedBlocks.push_back(Elt: VPB);
4916 return VPB;
4917 }
4918
4919 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4920 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4921 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4922 VPIRBasicBlock *createEmptyVPIRBasicBlock(BasicBlock *IRBB);
4923
4924 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4925 /// instructions in \p IRBB, except its terminator which is managed by the
4926 /// successors of the block in VPlan. The returned block is owned by the VPlan
4927 /// and deleted once the VPlan is destroyed.
4928 LLVM_ABI_FOR_TEST VPIRBasicBlock *createVPIRBasicBlock(BasicBlock *IRBB);
4929
4930 /// Returns true if the VPlan is based on a loop with an early exit. That is
4931 /// the case if the VPlan has either more than one exit block or a single exit
4932 /// block with multiple predecessors (one for the exit via the latch and one
4933 /// via the other early exit).
4934 bool hasEarlyExit() const {
4935 return count_if(Range: ExitBlocks,
4936 P: [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4937 1 ||
4938 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4939 }
4940
4941 /// Returns true if the scalar tail may execute after the vector loop. Note
4942 /// that this relies on unneeded branches to the scalar tail loop being
4943 /// removed.
4944 bool hasScalarTail() const {
4945 return !(!getScalarPreheader()->hasPredecessors() ||
4946 getScalarPreheader()->getSinglePredecessor() == getEntry());
4947 }
4948};
4949
4950#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4951inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4952 Plan.print(OS);
4953 return OS;
4954}
4955#endif
4956
4957} // end namespace llvm
4958
4959#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
4960