1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/MapVector.h"
29#include "llvm/ADT/SmallPtrSet.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
34#include "llvm/Analysis/IVDescriptors.h"
35#include "llvm/Analysis/MemoryLocation.h"
36#include "llvm/Analysis/VectorUtils.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/IR/FMF.h"
39#include "llvm/IR/Operator.h"
40#include "llvm/Support/Compiler.h"
41#include "llvm/Support/InstructionCost.h"
42#include <cassert>
43#include <cstddef>
44#include <functional>
45#include <string>
46#include <utility>
47#include <variant>
48
49namespace llvm {
50
51class BasicBlock;
52class DominatorTree;
53class InnerLoopVectorizer;
54class IRBuilderBase;
55struct VPTransformState;
56class raw_ostream;
57class RecurrenceDescriptor;
58class SCEV;
59class Type;
60class VPBasicBlock;
61class VPBuilder;
62class VPDominatorTree;
63class VPRegionBlock;
64class VPlan;
65class VPLane;
66class VPReplicateRecipe;
67class VPlanSlp;
68class Value;
69class LoopVectorizationCostModel;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77using VPlanPtr = std::unique_ptr<VPlan>;
78
79/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
80/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
81class LLVM_ABI_FOR_TEST VPBlockBase {
82 friend class VPBlockUtils;
83
84 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
85
86 /// An optional name for the block.
87 std::string Name;
88
89 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
90 /// it is a topmost VPBlockBase.
91 VPRegionBlock *Parent = nullptr;
92
93 /// List of predecessor blocks.
94 SmallVector<VPBlockBase *, 1> Predecessors;
95
96 /// List of successor blocks.
97 SmallVector<VPBlockBase *, 1> Successors;
98
99 /// VPlan containing the block. Can only be set on the entry block of the
100 /// plan.
101 VPlan *Plan = nullptr;
102
103 /// Add \p Successor as the last successor to this block.
104 void appendSuccessor(VPBlockBase *Successor) {
105 assert(Successor && "Cannot add nullptr successor!");
106 Successors.push_back(Elt: Successor);
107 }
108
109 /// Add \p Predecessor as the last predecessor to this block.
110 void appendPredecessor(VPBlockBase *Predecessor) {
111 assert(Predecessor && "Cannot add nullptr predecessor!");
112 Predecessors.push_back(Elt: Predecessor);
113 }
114
115 /// Remove \p Predecessor from the predecessors of this block.
116 void removePredecessor(VPBlockBase *Predecessor) {
117 auto Pos = find(Range&: Predecessors, Val: Predecessor);
118 assert(Pos && "Predecessor does not exist");
119 Predecessors.erase(CI: Pos);
120 }
121
122 /// Remove \p Successor from the successors of this block.
123 void removeSuccessor(VPBlockBase *Successor) {
124 auto Pos = find(Range&: Successors, Val: Successor);
125 assert(Pos && "Successor does not exist");
126 Successors.erase(CI: Pos);
127 }
128
129 /// This function replaces one predecessor with another, useful when
130 /// trying to replace an old block in the CFG with a new one.
131 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
132 auto I = find(Range&: Predecessors, Val: Old);
133 assert(I != Predecessors.end());
134 assert(Old->getParent() == New->getParent() &&
135 "replaced predecessor must have the same parent");
136 *I = New;
137 }
138
139 /// This function replaces one successor with another, useful when
140 /// trying to replace an old block in the CFG with a new one.
141 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
142 auto I = find(Range&: Successors, Val: Old);
143 assert(I != Successors.end());
144 assert(Old->getParent() == New->getParent() &&
145 "replaced successor must have the same parent");
146 *I = New;
147 }
148
149protected:
150 VPBlockBase(const unsigned char SC, const std::string &N)
151 : SubclassID(SC), Name(N) {}
152
153public:
154 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
155 /// that are actually instantiated. Values of this enumeration are kept in the
156 /// SubclassID field of the VPBlockBase objects. They are used for concrete
157 /// type identification.
158 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
159
160 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
161
162 virtual ~VPBlockBase() = default;
163
164 const std::string &getName() const { return Name; }
165
166 void setName(const Twine &newName) { Name = newName.str(); }
167
168 /// \return an ID for the concrete type of this object.
169 /// This is used to implement the classof checks. This should not be used
170 /// for any other purpose, as the values may change as LLVM evolves.
171 unsigned getVPBlockID() const { return SubclassID; }
172
173 VPRegionBlock *getParent() { return Parent; }
174 const VPRegionBlock *getParent() const { return Parent; }
175
176 /// \return A pointer to the plan containing the current block.
177 VPlan *getPlan();
178 const VPlan *getPlan() const;
179
180 /// Sets the pointer of the plan containing the block. The block must be the
181 /// entry block into the VPlan.
182 void setPlan(VPlan *ParentPlan);
183
184 void setParent(VPRegionBlock *P) { Parent = P; }
185
186 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
187 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
188 /// VPBlockBase is a VPBasicBlock, it is returned.
189 const VPBasicBlock *getEntryBasicBlock() const;
190 VPBasicBlock *getEntryBasicBlock();
191
192 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
193 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
194 /// VPBlockBase is a VPBasicBlock, it is returned.
195 const VPBasicBlock *getExitingBasicBlock() const;
196 VPBasicBlock *getExitingBasicBlock();
197
198 const VPBlocksTy &getSuccessors() const { return Successors; }
199 VPBlocksTy &getSuccessors() { return Successors; }
200
201 iterator_range<VPBlockBase **> successors() { return Successors; }
202 iterator_range<VPBlockBase **> predecessors() { return Predecessors; }
203
204 const VPBlocksTy &getPredecessors() const { return Predecessors; }
205 VPBlocksTy &getPredecessors() { return Predecessors; }
206
207 /// \return the successor of this VPBlockBase if it has a single successor.
208 /// Otherwise return a null pointer.
209 VPBlockBase *getSingleSuccessor() const {
210 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
211 }
212
213 /// \return the predecessor of this VPBlockBase if it has a single
214 /// predecessor. Otherwise return a null pointer.
215 VPBlockBase *getSinglePredecessor() const {
216 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
217 }
218
219 size_t getNumSuccessors() const { return Successors.size(); }
220 size_t getNumPredecessors() const { return Predecessors.size(); }
221
222 /// Returns true if this block has any predecessors.
223 bool hasPredecessors() const { return !Predecessors.empty(); }
224
225 /// An Enclosing Block of a block B is any block containing B, including B
226 /// itself. \return the closest enclosing block starting from "this", which
227 /// has successors. \return the root enclosing block if all enclosing blocks
228 /// have no successors.
229 VPBlockBase *getEnclosingBlockWithSuccessors();
230
231 /// \return the closest enclosing block starting from "this", which has
232 /// predecessors. \return the root enclosing block if all enclosing blocks
233 /// have no predecessors.
234 VPBlockBase *getEnclosingBlockWithPredecessors();
235
236 /// \return the successors either attached directly to this VPBlockBase or, if
237 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
238 /// successors of its own, search recursively for the first enclosing
239 /// VPRegionBlock that has successors and return them. If no such
240 /// VPRegionBlock exists, return the (empty) successors of the topmost
241 /// VPBlockBase reached.
242 const VPBlocksTy &getHierarchicalSuccessors() {
243 return getEnclosingBlockWithSuccessors()->getSuccessors();
244 }
245
246 /// \return the hierarchical successor of this VPBlockBase if it has a single
247 /// hierarchical successor. Otherwise return a null pointer.
248 VPBlockBase *getSingleHierarchicalSuccessor() {
249 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
250 }
251
252 /// \return the predecessors either attached directly to this VPBlockBase or,
253 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
254 /// predecessors of its own, search recursively for the first enclosing
255 /// VPRegionBlock that has predecessors and return them. If no such
256 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
257 /// VPBlockBase reached.
258 const VPBlocksTy &getHierarchicalPredecessors() {
259 return getEnclosingBlockWithPredecessors()->getPredecessors();
260 }
261
262 /// \return the hierarchical predecessor of this VPBlockBase if it has a
263 /// single hierarchical predecessor. Otherwise return a null pointer.
264 VPBlockBase *getSingleHierarchicalPredecessor() {
265 return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
266 }
267
268 /// Set a given VPBlockBase \p Successor as the single successor of this
269 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
270 /// This VPBlockBase must have no successors.
271 void setOneSuccessor(VPBlockBase *Successor) {
272 assert(Successors.empty() && "Setting one successor when others exist.");
273 assert(Successor->getParent() == getParent() &&
274 "connected blocks must have the same parent");
275 appendSuccessor(Successor);
276 }
277
278 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
279 /// successors of this VPBlockBase. This VPBlockBase is not added as
280 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
281 /// successors.
282 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
283 assert(Successors.empty() && "Setting two successors when others exist.");
284 appendSuccessor(Successor: IfTrue);
285 appendSuccessor(Successor: IfFalse);
286 }
287
288 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
289 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
290 /// as successor of any VPBasicBlock in \p NewPreds.
291 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
292 assert(Predecessors.empty() && "Block predecessors already set.");
293 for (auto *Pred : NewPreds)
294 appendPredecessor(Predecessor: Pred);
295 }
296
297 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
298 /// This VPBlockBase must have no successors. This VPBlockBase is not added
299 /// as predecessor of any VPBasicBlock in \p NewSuccs.
300 void setSuccessors(ArrayRef<VPBlockBase *> NewSuccs) {
301 assert(Successors.empty() && "Block successors already set.");
302 for (auto *Succ : NewSuccs)
303 appendSuccessor(Successor: Succ);
304 }
305
306 /// Remove all the predecessor of this block.
307 void clearPredecessors() { Predecessors.clear(); }
308
309 /// Remove all the successors of this block.
310 void clearSuccessors() { Successors.clear(); }
311
312 /// Swap predecessors of the block. The block must have exactly 2
313 /// predecessors.
314 void swapPredecessors() {
315 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
316 std::swap(a&: Predecessors[0], b&: Predecessors[1]);
317 }
318
319 /// Swap successors of the block. The block must have exactly 2 successors.
320 // TODO: This should be part of introducing conditional branch recipes rather
321 // than being independent.
322 void swapSuccessors() {
323 assert(Successors.size() == 2 && "must have 2 successors to swap");
324 std::swap(a&: Successors[0], b&: Successors[1]);
325 }
326
327 /// Returns the index for \p Pred in the blocks predecessors list.
328 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
329 assert(count(Predecessors, Pred) == 1 &&
330 "must have Pred exactly once in Predecessors");
331 return std::distance(first: Predecessors.begin(), last: find(Range: Predecessors, Val: Pred));
332 }
333
334 /// Returns the index for \p Succ in the blocks successor list.
335 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
336 assert(count(Successors, Succ) == 1 &&
337 "must have Succ exactly once in Successors");
338 return std::distance(first: Successors.begin(), last: find(Range: Successors, Val: Succ));
339 }
340
341 /// The method which generates the output IR that correspond to this
342 /// VPBlockBase, thereby "executing" the VPlan.
343 virtual void execute(VPTransformState *State) = 0;
344
345 /// Return the cost of the block.
346 virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
347
348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
349 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
350 OS << getName();
351 }
352
353 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
354 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
355 /// consequtive numbers.
356 ///
357 /// Note that the numbering is applied to the whole VPlan, so printing
358 /// individual blocks is consistent with the whole VPlan printing.
359 virtual void print(raw_ostream &O, const Twine &Indent,
360 VPSlotTracker &SlotTracker) const = 0;
361
362 /// Print plain-text dump of this VPlan to \p O.
363 void print(raw_ostream &O) const;
364
365 /// Print the successors of this block to \p O, prefixing all lines with \p
366 /// Indent.
367 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
368
369 /// Dump this VPBlockBase to dbgs().
370 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
371#endif
372
373 /// Clone the current block and it's recipes without updating the operands of
374 /// the cloned recipes, including all blocks in the single-entry single-exit
375 /// region for VPRegionBlocks.
376 virtual VPBlockBase *clone() = 0;
377};
378
379/// VPRecipeBase is a base class modeling a sequence of one or more output IR
380/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
381/// and is responsible for deleting its defined values. Single-value
382/// recipes must inherit from VPSingleDef instead of inheriting from both
383/// VPRecipeBase and VPValue separately.
384class LLVM_ABI_FOR_TEST VPRecipeBase
385 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
386 public VPDef,
387 public VPUser {
388 friend VPBasicBlock;
389 friend class VPBlockUtils;
390
391 /// Subclass identifier (for isa/dyn_cast).
392 const unsigned char SubclassID;
393
394 /// Each VPRecipe belongs to a single VPBasicBlock.
395 VPBasicBlock *Parent = nullptr;
396
397 /// The debug location for the recipe.
398 DebugLoc DL;
399
400public:
401 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
402 /// that is actually instantiated. Values of this enumeration are kept in the
403 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
404 /// type identification.
405 using VPRecipeTy = enum {
406 VPBranchOnMaskSC,
407 VPDerivedIVSC,
408 VPExpandSCEVSC,
409 VPExpressionSC,
410 VPIRInstructionSC,
411 VPInstructionSC,
412 VPInterleaveEVLSC,
413 VPInterleaveSC,
414 VPReductionEVLSC,
415 VPReductionSC,
416 VPReplicateSC,
417 VPScalarIVStepsSC,
418 VPVectorPointerSC,
419 VPVectorEndPointerSC,
420 VPWidenCallSC,
421 VPWidenCanonicalIVSC,
422 VPWidenCastSC,
423 VPWidenGEPSC,
424 VPWidenIntrinsicSC,
425 VPWidenLoadEVLSC,
426 VPWidenLoadSC,
427 VPWidenStoreEVLSC,
428 VPWidenStoreSC,
429 VPWidenSC,
430 VPBlendSC,
431 VPHistogramSC,
432 // START: Phi-like recipes. Need to be kept together.
433 VPWidenPHISC,
434 VPPredInstPHISC,
435 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
436 // VPHeaderPHIRecipe need to be kept together.
437 VPCanonicalIVPHISC,
438 VPActiveLaneMaskPHISC,
439 VPEVLBasedIVPHISC,
440 VPFirstOrderRecurrencePHISC,
441 VPWidenIntOrFpInductionSC,
442 VPWidenPointerInductionSC,
443 VPReductionPHISC,
444 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
445 // END: Phi-like recipes
446 VPFirstPHISC = VPWidenPHISC,
447 VPFirstHeaderPHISC = VPCanonicalIVPHISC,
448 VPLastHeaderPHISC = VPReductionPHISC,
449 VPLastPHISC = VPReductionPHISC,
450 };
451
452 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
453 DebugLoc DL = DebugLoc::getUnknown())
454 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
455
456 ~VPRecipeBase() override = default;
457
458 /// Clone the current recipe.
459 virtual VPRecipeBase *clone() = 0;
460
461 /// \return the VPBasicBlock which this VPRecipe belongs to.
462 VPBasicBlock *getParent() { return Parent; }
463 const VPBasicBlock *getParent() const { return Parent; }
464
465 /// \return the VPRegionBlock which the recipe belongs to.
466 VPRegionBlock *getRegion();
467 const VPRegionBlock *getRegion() const;
468
469 /// The method which generates the output IR instructions that correspond to
470 /// this VPRecipe, thereby "executing" the VPlan.
471 virtual void execute(VPTransformState &State) = 0;
472
473 /// Return the cost of this recipe, taking into account if the cost
474 /// computation should be skipped and the ForceTargetInstructionCost flag.
475 /// Also takes care of printing the cost for debugging.
476 InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
477
478 /// Insert an unlinked recipe into a basic block immediately before
479 /// the specified recipe.
480 void insertBefore(VPRecipeBase *InsertPos);
481 /// Insert an unlinked recipe into \p BB immediately before the insertion
482 /// point \p IP;
483 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
484
485 /// Insert an unlinked Recipe into a basic block immediately after
486 /// the specified Recipe.
487 void insertAfter(VPRecipeBase *InsertPos);
488
489 /// Unlink this recipe from its current VPBasicBlock and insert it into
490 /// the VPBasicBlock that MovePos lives in, right after MovePos.
491 void moveAfter(VPRecipeBase *MovePos);
492
493 /// Unlink this recipe and insert into BB before I.
494 ///
495 /// \pre I is a valid iterator into BB.
496 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
497
498 /// This method unlinks 'this' from the containing basic block, but does not
499 /// delete it.
500 void removeFromParent();
501
502 /// This method unlinks 'this' from the containing basic block and deletes it.
503 ///
504 /// \returns an iterator pointing to the element after the erased one
505 iplist<VPRecipeBase>::iterator eraseFromParent();
506
507 /// \return an ID for the concrete type of this object.
508 unsigned getVPRecipeID() const { return SubclassID; }
509
510 /// Method to support type inquiry through isa, cast, and dyn_cast.
511 static inline bool classof(const VPDef *D) {
512 // All VPDefs are also VPRecipeBases.
513 return true;
514 }
515
516 static inline bool classof(const VPUser *U) { return true; }
517
518 /// Returns true if the recipe may have side-effects.
519 bool mayHaveSideEffects() const;
520
521 /// Returns true for PHI-like recipes.
522 bool isPhi() const;
523
524 /// Returns true if the recipe may read from memory.
525 bool mayReadFromMemory() const;
526
527 /// Returns true if the recipe may write to memory.
528 bool mayWriteToMemory() const;
529
530 /// Returns true if the recipe may read from or write to memory.
531 bool mayReadOrWriteMemory() const {
532 return mayReadFromMemory() || mayWriteToMemory();
533 }
534
535 /// Returns the debug location of the recipe.
536 DebugLoc getDebugLoc() const { return DL; }
537
538 /// Return true if the recipe is a scalar cast.
539 bool isScalarCast() const;
540
541 /// Set the recipe's debug location to \p NewDL.
542 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
543
544#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
545 /// Dump the recipe to stderr (for debugging).
546 LLVM_ABI_FOR_TEST void dump() const;
547
548 /// Print the recipe, delegating to printRecipe().
549 void print(raw_ostream &O, const Twine &Indent,
550 VPSlotTracker &SlotTracker) const;
551#endif
552
553protected:
554 /// Compute the cost of this recipe either using a recipe's specialized
555 /// implementation or using the legacy cost model and the underlying
556 /// instructions.
557 virtual InstructionCost computeCost(ElementCount VF,
558 VPCostContext &Ctx) const;
559
560#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
561 /// Each concrete VPRecipe prints itself, without printing common information,
562 /// like debug info or metadata.
563 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
564 VPSlotTracker &SlotTracker) const = 0;
565#endif
566};
567
568// Helper macro to define common classof implementations for recipes.
569#define VP_CLASSOF_IMPL(VPRecipeID) \
570 static inline bool classof(const VPRecipeBase *R) { \
571 return R->getVPRecipeID() == VPRecipeID; \
572 } \
573 static inline bool classof(const VPValue *V) { \
574 auto *R = V->getDefiningRecipe(); \
575 return R && R->getVPRecipeID() == VPRecipeID; \
576 } \
577 static inline bool classof(const VPUser *U) { \
578 auto *R = dyn_cast<VPRecipeBase>(U); \
579 return R && R->getVPRecipeID() == VPRecipeID; \
580 } \
581 static inline bool classof(const VPSingleDefRecipe *R) { \
582 return R->getVPRecipeID() == VPRecipeID; \
583 }
584
585/// VPSingleDef is a base class for recipes for modeling a sequence of one or
586/// more output IR that define a single result VPValue.
587/// Note that VPRecipeBase must be inherited from before VPValue.
588class VPSingleDefRecipe : public VPRecipeBase, public VPRecipeValue {
589public:
590 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
591 DebugLoc DL = DebugLoc::getUnknown())
592 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
593
594 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
595 Value *UV, DebugLoc DL = DebugLoc::getUnknown())
596 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
597
598 static inline bool classof(const VPRecipeBase *R) {
599 switch (R->getVPRecipeID()) {
600 case VPRecipeBase::VPDerivedIVSC:
601 case VPRecipeBase::VPEVLBasedIVPHISC:
602 case VPRecipeBase::VPExpandSCEVSC:
603 case VPRecipeBase::VPExpressionSC:
604 case VPRecipeBase::VPInstructionSC:
605 case VPRecipeBase::VPReductionEVLSC:
606 case VPRecipeBase::VPReductionSC:
607 case VPRecipeBase::VPReplicateSC:
608 case VPRecipeBase::VPScalarIVStepsSC:
609 case VPRecipeBase::VPVectorPointerSC:
610 case VPRecipeBase::VPVectorEndPointerSC:
611 case VPRecipeBase::VPWidenCallSC:
612 case VPRecipeBase::VPWidenCanonicalIVSC:
613 case VPRecipeBase::VPWidenCastSC:
614 case VPRecipeBase::VPWidenGEPSC:
615 case VPRecipeBase::VPWidenIntrinsicSC:
616 case VPRecipeBase::VPWidenSC:
617 case VPRecipeBase::VPBlendSC:
618 case VPRecipeBase::VPPredInstPHISC:
619 case VPRecipeBase::VPCanonicalIVPHISC:
620 case VPRecipeBase::VPActiveLaneMaskPHISC:
621 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
622 case VPRecipeBase::VPWidenPHISC:
623 case VPRecipeBase::VPWidenIntOrFpInductionSC:
624 case VPRecipeBase::VPWidenPointerInductionSC:
625 case VPRecipeBase::VPReductionPHISC:
626 return true;
627 case VPRecipeBase::VPBranchOnMaskSC:
628 case VPRecipeBase::VPInterleaveEVLSC:
629 case VPRecipeBase::VPInterleaveSC:
630 case VPRecipeBase::VPIRInstructionSC:
631 case VPRecipeBase::VPWidenLoadEVLSC:
632 case VPRecipeBase::VPWidenLoadSC:
633 case VPRecipeBase::VPWidenStoreEVLSC:
634 case VPRecipeBase::VPWidenStoreSC:
635 case VPRecipeBase::VPHistogramSC:
636 // TODO: Widened stores don't define a value, but widened loads do. Split
637 // the recipes to be able to make widened loads VPSingleDefRecipes.
638 return false;
639 }
640 llvm_unreachable("Unhandled VPRecipeID");
641 }
642
643 static inline bool classof(const VPUser *U) {
644 auto *R = dyn_cast<VPRecipeBase>(Val: U);
645 return R && classof(R);
646 }
647
648 VPSingleDefRecipe *clone() override = 0;
649
650 /// Returns the underlying instruction.
651 Instruction *getUnderlyingInstr() {
652 return cast<Instruction>(Val: getUnderlyingValue());
653 }
654 const Instruction *getUnderlyingInstr() const {
655 return cast<Instruction>(Val: getUnderlyingValue());
656 }
657
658#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
659 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
660 LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const;
661#endif
662};
663
664/// Class to record and manage LLVM IR flags.
665class VPIRFlags {
666 enum class OperationType : unsigned char {
667 Cmp,
668 FCmp,
669 OverflowingBinOp,
670 Trunc,
671 DisjointOp,
672 PossiblyExactOp,
673 GEPOp,
674 FPMathOp,
675 NonNegOp,
676 ReductionOp,
677 Other
678 };
679
680public:
681 struct WrapFlagsTy {
682 char HasNUW : 1;
683 char HasNSW : 1;
684
685 WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
686 };
687
688 struct TruncFlagsTy {
689 char HasNUW : 1;
690 char HasNSW : 1;
691
692 TruncFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
693 };
694
695 struct DisjointFlagsTy {
696 char IsDisjoint : 1;
697 DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
698 };
699
700 struct NonNegFlagsTy {
701 char NonNeg : 1;
702 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
703 };
704
705private:
706 struct ExactFlagsTy {
707 char IsExact : 1;
708 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
709 };
710 struct FastMathFlagsTy {
711 char AllowReassoc : 1;
712 char NoNaNs : 1;
713 char NoInfs : 1;
714 char NoSignedZeros : 1;
715 char AllowReciprocal : 1;
716 char AllowContract : 1;
717 char ApproxFunc : 1;
718
719 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
720 };
721 /// Holds both the predicate and fast-math flags for floating-point
722 /// comparisons.
723 struct FCmpFlagsTy {
724 CmpInst::Predicate Pred;
725 FastMathFlagsTy FMFs;
726 };
727 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
728 struct ReductionFlagsTy {
729 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
730 // additional kinds.
731 unsigned char Kind : 6;
732 // TODO: Derive order/in-loop from plan and remove here.
733 unsigned char IsOrdered : 1;
734 unsigned char IsInLoop : 1;
735 FastMathFlagsTy FMFs;
736
737 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
738 FastMathFlags FMFs)
739 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
740 IsInLoop(IsInLoop), FMFs(FMFs) {}
741 };
742
743 OperationType OpType;
744
745 union {
746 CmpInst::Predicate CmpPredicate;
747 WrapFlagsTy WrapFlags;
748 TruncFlagsTy TruncFlags;
749 DisjointFlagsTy DisjointFlags;
750 ExactFlagsTy ExactFlags;
751 GEPNoWrapFlags GEPFlags;
752 NonNegFlagsTy NonNegFlags;
753 FastMathFlagsTy FMFs;
754 FCmpFlagsTy FCmpFlags;
755 ReductionFlagsTy ReductionFlags;
756 unsigned AllFlags;
757 };
758
759public:
760 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
761
762 VPIRFlags(Instruction &I) {
763 if (auto *FCmp = dyn_cast<FCmpInst>(Val: &I)) {
764 OpType = OperationType::FCmp;
765 FCmpFlags.Pred = FCmp->getPredicate();
766 FCmpFlags.FMFs = FCmp->getFastMathFlags();
767 } else if (auto *Op = dyn_cast<CmpInst>(Val: &I)) {
768 OpType = OperationType::Cmp;
769 CmpPredicate = Op->getPredicate();
770 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(Val: &I)) {
771 OpType = OperationType::DisjointOp;
772 DisjointFlags.IsDisjoint = Op->isDisjoint();
773 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(Val: &I)) {
774 OpType = OperationType::OverflowingBinOp;
775 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
776 } else if (auto *Op = dyn_cast<TruncInst>(Val: &I)) {
777 OpType = OperationType::Trunc;
778 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
779 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(Val: &I)) {
780 OpType = OperationType::PossiblyExactOp;
781 ExactFlags.IsExact = Op->isExact();
782 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) {
783 OpType = OperationType::GEPOp;
784 GEPFlags = GEP->getNoWrapFlags();
785 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(Val: &I)) {
786 OpType = OperationType::NonNegOp;
787 NonNegFlags.NonNeg = PNNI->hasNonNeg();
788 } else if (auto *Op = dyn_cast<FPMathOperator>(Val: &I)) {
789 OpType = OperationType::FPMathOp;
790 FMFs = Op->getFastMathFlags();
791 } else {
792 OpType = OperationType::Other;
793 AllFlags = 0;
794 }
795 }
796
797 VPIRFlags(CmpInst::Predicate Pred)
798 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
799
800 VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
801 : OpType(OperationType::FCmp) {
802 FCmpFlags.Pred = Pred;
803 FCmpFlags.FMFs = FMFs;
804 }
805
806 VPIRFlags(WrapFlagsTy WrapFlags)
807 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
808
809 VPIRFlags(TruncFlagsTy TruncFlags)
810 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
811
812 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
813
814 VPIRFlags(DisjointFlagsTy DisjointFlags)
815 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
816
817 VPIRFlags(NonNegFlagsTy NonNegFlags)
818 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
819
820 VPIRFlags(ExactFlagsTy ExactFlags)
821 : OpType(OperationType::PossiblyExactOp), ExactFlags(ExactFlags) {}
822
823 VPIRFlags(GEPNoWrapFlags GEPFlags)
824 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
825
826 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
827 : OpType(OperationType::ReductionOp),
828 ReductionFlags(Kind, IsOrdered, IsInLoop, FMFs) {}
829
830 void transferFlags(VPIRFlags &Other) {
831 OpType = Other.OpType;
832 AllFlags = Other.AllFlags;
833 }
834
835 /// Only keep flags also present in \p Other. \p Other must have the same
836 /// OpType as the current object.
837 void intersectFlags(const VPIRFlags &Other);
838
839 /// Drop all poison-generating flags.
840 void dropPoisonGeneratingFlags() {
841 // NOTE: This needs to be kept in-sync with
842 // Instruction::dropPoisonGeneratingFlags.
843 switch (OpType) {
844 case OperationType::OverflowingBinOp:
845 WrapFlags.HasNUW = false;
846 WrapFlags.HasNSW = false;
847 break;
848 case OperationType::Trunc:
849 TruncFlags.HasNUW = false;
850 TruncFlags.HasNSW = false;
851 break;
852 case OperationType::DisjointOp:
853 DisjointFlags.IsDisjoint = false;
854 break;
855 case OperationType::PossiblyExactOp:
856 ExactFlags.IsExact = false;
857 break;
858 case OperationType::GEPOp:
859 GEPFlags = GEPNoWrapFlags::none();
860 break;
861 case OperationType::FPMathOp:
862 case OperationType::FCmp:
863 case OperationType::ReductionOp:
864 getFMFsRef().NoNaNs = false;
865 getFMFsRef().NoInfs = false;
866 break;
867 case OperationType::NonNegOp:
868 NonNegFlags.NonNeg = false;
869 break;
870 case OperationType::Cmp:
871 case OperationType::Other:
872 break;
873 }
874 }
875
876 /// Apply the IR flags to \p I.
877 void applyFlags(Instruction &I) const {
878 switch (OpType) {
879 case OperationType::OverflowingBinOp:
880 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
881 I.setHasNoSignedWrap(WrapFlags.HasNSW);
882 break;
883 case OperationType::Trunc:
884 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
885 I.setHasNoSignedWrap(TruncFlags.HasNSW);
886 break;
887 case OperationType::DisjointOp:
888 cast<PossiblyDisjointInst>(Val: &I)->setIsDisjoint(DisjointFlags.IsDisjoint);
889 break;
890 case OperationType::PossiblyExactOp:
891 I.setIsExact(ExactFlags.IsExact);
892 break;
893 case OperationType::GEPOp:
894 cast<GetElementPtrInst>(Val: &I)->setNoWrapFlags(GEPFlags);
895 break;
896 case OperationType::FPMathOp:
897 case OperationType::FCmp: {
898 const FastMathFlagsTy &F = getFMFsRef();
899 I.setHasAllowReassoc(F.AllowReassoc);
900 I.setHasNoNaNs(F.NoNaNs);
901 I.setHasNoInfs(F.NoInfs);
902 I.setHasNoSignedZeros(F.NoSignedZeros);
903 I.setHasAllowReciprocal(F.AllowReciprocal);
904 I.setHasAllowContract(F.AllowContract);
905 I.setHasApproxFunc(F.ApproxFunc);
906 break;
907 }
908 case OperationType::NonNegOp:
909 I.setNonNeg(NonNegFlags.NonNeg);
910 break;
911 case OperationType::ReductionOp:
912 llvm_unreachable("reduction ops should not use applyFlags");
913 case OperationType::Cmp:
914 case OperationType::Other:
915 break;
916 }
917 }
918
919 CmpInst::Predicate getPredicate() const {
920 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
921 "recipe doesn't have a compare predicate");
922 return OpType == OperationType::FCmp ? FCmpFlags.Pred : CmpPredicate;
923 }
924
925 void setPredicate(CmpInst::Predicate Pred) {
926 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
927 "recipe doesn't have a compare predicate");
928 if (OpType == OperationType::FCmp)
929 FCmpFlags.Pred = Pred;
930 else
931 CmpPredicate = Pred;
932 }
933
934 GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; }
935
936 /// Returns true if the recipe has a comparison predicate.
937 bool hasPredicate() const {
938 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
939 }
940
941 /// Returns true if the recipe has fast-math flags.
942 bool hasFastMathFlags() const {
943 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
944 OpType == OperationType::ReductionOp;
945 }
946
947 LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const;
948
949 /// Returns true if the recipe has non-negative flag.
950 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
951
952 bool isNonNeg() const {
953 assert(OpType == OperationType::NonNegOp &&
954 "recipe doesn't have a NNEG flag");
955 return NonNegFlags.NonNeg;
956 }
957
958 bool hasNoUnsignedWrap() const {
959 switch (OpType) {
960 case OperationType::OverflowingBinOp:
961 return WrapFlags.HasNUW;
962 case OperationType::Trunc:
963 return TruncFlags.HasNUW;
964 default:
965 llvm_unreachable("recipe doesn't have a NUW flag");
966 }
967 }
968
969 bool hasNoSignedWrap() const {
970 switch (OpType) {
971 case OperationType::OverflowingBinOp:
972 return WrapFlags.HasNSW;
973 case OperationType::Trunc:
974 return TruncFlags.HasNSW;
975 default:
976 llvm_unreachable("recipe doesn't have a NSW flag");
977 }
978 }
979
980 bool isDisjoint() const {
981 assert(OpType == OperationType::DisjointOp &&
982 "recipe cannot have a disjoing flag");
983 return DisjointFlags.IsDisjoint;
984 }
985
986 RecurKind getRecurKind() const {
987 assert(OpType == OperationType::ReductionOp &&
988 "recipe doesn't have reduction flags");
989 return static_cast<RecurKind>(ReductionFlags.Kind);
990 }
991
992 bool isReductionOrdered() const {
993 assert(OpType == OperationType::ReductionOp &&
994 "recipe doesn't have reduction flags");
995 return ReductionFlags.IsOrdered;
996 }
997
998 bool isReductionInLoop() const {
999 assert(OpType == OperationType::ReductionOp &&
1000 "recipe doesn't have reduction flags");
1001 return ReductionFlags.IsInLoop;
1002 }
1003
1004private:
1005 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1006 FastMathFlagsTy &getFMFsRef() {
1007 if (OpType == OperationType::FCmp)
1008 return FCmpFlags.FMFs;
1009 if (OpType == OperationType::ReductionOp)
1010 return ReductionFlags.FMFs;
1011 return FMFs;
1012 }
1013 const FastMathFlagsTy &getFMFsRef() const {
1014 if (OpType == OperationType::FCmp)
1015 return FCmpFlags.FMFs;
1016 if (OpType == OperationType::ReductionOp)
1017 return ReductionFlags.FMFs;
1018 return FMFs;
1019 }
1020
1021public:
1022 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1023 /// otherwise. Opcodes not supporting default flags include compares and
1024 /// ComputeReductionResult.
1025 static VPIRFlags getDefaultFlags(unsigned Opcode);
1026
1027#if !defined(NDEBUG)
1028 /// Returns true if the set flags are valid for \p Opcode.
1029 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1030
1031 /// Returns true if \p Opcode has its required flags set.
1032 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1033#endif
1034
1035#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1036 void printFlags(raw_ostream &O) const;
1037#endif
1038};
1039
1040/// A pure-virtual common base class for recipes defining a single VPValue and
1041/// using IR flags.
1042struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
1043 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1044 const VPIRFlags &Flags,
1045 DebugLoc DL = DebugLoc::getUnknown())
1046 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1047
1048 static inline bool classof(const VPRecipeBase *R) {
1049 return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1050 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1051 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1052 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1053 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1054 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1055 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1056 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1057 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1058 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1059 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC;
1060 }
1061
1062 static inline bool classof(const VPUser *U) {
1063 auto *R = dyn_cast<VPRecipeBase>(Val: U);
1064 return R && classof(R);
1065 }
1066
1067 static inline bool classof(const VPValue *V) {
1068 auto *R = V->getDefiningRecipe();
1069 return R && classof(R);
1070 }
1071
1072 VPRecipeWithIRFlags *clone() override = 0;
1073
1074 static inline bool classof(const VPSingleDefRecipe *R) {
1075 return classof(R: static_cast<const VPRecipeBase *>(R));
1076 }
1077
1078 void execute(VPTransformState &State) override = 0;
1079
1080 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1081 InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF,
1082 VPCostContext &Ctx) const;
1083};
1084
1085/// Helper to access the operand that contains the unroll part for this recipe
1086/// after unrolling.
1087template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
1088protected:
1089 /// Return the VPValue operand containing the unroll part or null if there is
1090 /// no such operand.
1091 VPValue *getUnrollPartOperand(const VPUser &U) const;
1092
1093 /// Return the unroll part.
1094 unsigned getUnrollPart(const VPUser &U) const;
1095};
1096
1097/// Helper to manage IR metadata for recipes. It filters out metadata that
1098/// cannot be propagated.
1099class VPIRMetadata {
1100 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
1101
1102public:
1103 VPIRMetadata() = default;
1104
1105 /// Adds metatadata that can be preserved from the original instruction
1106 /// \p I.
1107 VPIRMetadata(Instruction &I) { getMetadataToPropagate(Inst: &I, Metadata); }
1108
1109 /// Copy constructor for cloning.
1110 VPIRMetadata(const VPIRMetadata &Other) = default;
1111
1112 VPIRMetadata &operator=(const VPIRMetadata &Other) = default;
1113
1114 /// Add all metadata to \p I.
1115 void applyMetadata(Instruction &I) const;
1116
1117 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1118 /// already exists, it will be replaced. Otherwise, it will be added.
1119 void setMetadata(unsigned Kind, MDNode *Node) {
1120 auto It =
1121 llvm::find_if(Range&: Metadata, P: [Kind](const std::pair<unsigned, MDNode *> &P) {
1122 return P.first == Kind;
1123 });
1124 if (It != Metadata.end())
1125 It->second = Node;
1126 else
1127 Metadata.emplace_back(Args&: Kind, Args&: Node);
1128 }
1129
1130 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1131 /// nodes that are common to both.
1132 void intersect(const VPIRMetadata &MD);
1133
1134 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1135 MDNode *getMetadata(unsigned Kind) const {
1136 auto It =
1137 find_if(Range: Metadata, P: [Kind](const auto &P) { return P.first == Kind; });
1138 return It != Metadata.end() ? It->second : nullptr;
1139 }
1140
1141#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1142 /// Print metadata with node IDs.
1143 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1144#endif
1145};
1146
1147/// This is a concrete Recipe that models a single VPlan-level instruction.
1148/// While as any Recipe it may generate a sequence of IR instructions when
1149/// executed, these instructions would always form a single-def expression as
1150/// the VPInstruction is also a single def-use vertex.
1151class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
1152 public VPIRMetadata,
1153 public VPUnrollPartAccessor<1> {
1154 friend class VPlanSlp;
1155
1156public:
1157 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1158 enum {
1159 FirstOrderRecurrenceSplice =
1160 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1161 // values of a first-order recurrence.
1162 Not,
1163 SLPLoad,
1164 SLPStore,
1165 // Creates a mask where each lane is active (true) whilst the current
1166 // counter (first operand + index) is less than the second operand. i.e.
1167 // mask[i] = icmpt ult (op0 + i), op1
1168 // The size of the mask returned is VF * Multiplier (UF, third op).
1169 ActiveLaneMask,
1170 ExplicitVectorLength,
1171 CalculateTripCountMinusVF,
1172 // Increment the canonical IV separately for each unrolled part.
1173 CanonicalIVIncrementForPart,
1174 // Abstract instruction that compares two values and branches. This is
1175 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1176 BranchOnCount,
1177 BranchOnCond,
1178 // Branch with 2 boolean condition operands and 3 successors. If condition
1179 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1180 // successor 1; otherwise branches to successor 2. Expanded after region
1181 // dissolution into: (1) an OR of the two conditions branching to
1182 // middle.split or successor 2, and (2) middle.split branching to successor
1183 // 0 or successor 1 based on condition 0.
1184 BranchOnTwoConds,
1185 Broadcast,
1186 /// Given operands of (the same) struct type, creates a struct of fixed-
1187 /// width vectors each containing a struct field of all operands. The
1188 /// number of operands matches the element count of every vector.
1189 BuildStructVector,
1190 /// Creates a fixed-width vector containing all operands. The number of
1191 /// operands matches the vector element count.
1192 BuildVector,
1193 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1194 /// abstract VPInstruction whose single defined VPValue represents VF
1195 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1196 /// VPInstructions.
1197 Unpack,
1198 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1199 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1200 ComputeAnyOfResult,
1201 ComputeReductionResult,
1202 // Extracts the last part of its operand. Removed during unrolling.
1203 ExtractLastPart,
1204 // Extracts the last lane of its vector operand, per part.
1205 ExtractLastLane,
1206 // Extracts the second-to-last lane from its operand or the second-to-last
1207 // part if it is scalar. In the latter case, the recipe will be removed
1208 // during unrolling.
1209 ExtractPenultimateElement,
1210 LogicalAnd, // Non-poison propagating logical And.
1211 // Add an offset in bytes (second operand) to a base pointer (first
1212 // operand). Only generates scalar values (either for the first lane only or
1213 // for all lanes, depending on its uses).
1214 PtrAdd,
1215 // Add a vector offset in bytes (second operand) to a scalar base pointer
1216 // (first operand).
1217 WidePtrAdd,
1218 // Returns a scalar boolean value, which is true if any lane of its
1219 // (boolean) vector operands is true. It produces the reduced value across
1220 // all unrolled iterations. Unrolling will add all copies of its original
1221 // operand as additional operands. AnyOf is poison-safe as all operands
1222 // will be frozen.
1223 AnyOf,
1224 // Calculates the first active lane index of the vector predicate operands.
1225 // It produces the lane index across all unrolled iterations. Unrolling will
1226 // add all copies of its original operand as additional operands.
1227 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1228 // result even with operands that are all zeroes.
1229 FirstActiveLane,
1230 // Calculates the last active lane index of the vector predicate operands.
1231 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1232 // tail-folding to extract the correct live-out value from the last active
1233 // iteration. It produces the lane index across all unrolled iterations.
1234 // Unrolling will add all copies of its original operand as additional
1235 // operands.
1236 LastActiveLane,
1237 // Returns a reversed vector for the operand.
1238 Reverse,
1239
1240 // The opcodes below are used for VPInstructionWithType.
1241 //
1242 /// Scale the first operand (vector step) by the second operand
1243 /// (scalar-step). Casts both operands to the result type if needed.
1244 WideIVStep,
1245 /// Start vector for reductions with 3 operands: the original start value,
1246 /// the identity value for the reduction and an integer indicating the
1247 /// scaling factor.
1248 ReductionStartVector,
1249 // Creates a step vector starting from 0 to VF with a step of 1.
1250 StepVector,
1251 /// Extracts a single lane (first operand) from a set of vector operands.
1252 /// The lane specifies an index into a vector formed by combining all vector
1253 /// operands (all operands after the first one).
1254 ExtractLane,
1255 /// Explicit user for the resume phi of the canonical induction in the main
1256 /// VPlan, used by the epilogue vector loop.
1257 ResumeForEpilogue,
1258 /// Extracts the lane from the first operand corresponding to the last
1259 /// active (non-zero) lane in the mask (second operand), or if no lanes
1260 /// were active in the mask, returns the default value (third operand).
1261 ExtractLastActive,
1262
1263 /// Returns the value for vscale.
1264 VScale,
1265 OpsEnd = VScale,
1266 };
1267
1268 /// Returns true if this VPInstruction generates scalar values for all lanes.
1269 /// Most VPInstructions generate a single value per part, either vector or
1270 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1271 /// values per all lanes, stemming from an original ingredient. This method
1272 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1273 /// underlying ingredient.
1274 bool doesGeneratePerAllLanes() const;
1275
1276 /// Return the number of operands determined by the opcode of the
1277 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1278 /// directly by the opcode.
1279 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1280
1281private:
1282 typedef unsigned char OpcodeTy;
1283 OpcodeTy Opcode;
1284
1285 /// An optional name that can be used for the generated IR instruction.
1286 std::string Name;
1287
1288 /// Returns true if we can generate a scalar for the first lane only if
1289 /// needed.
1290 bool canGenerateScalarForFirstLane() const;
1291
1292 /// Utility methods serving execute(): generates a single vector instance of
1293 /// the modeled instruction. \returns the generated value. . In some cases an
1294 /// existing value is returned rather than a generated one.
1295 Value *generate(VPTransformState &State);
1296
1297public:
1298 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1299 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1300 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1301
1302 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1303
1304 VPInstruction *clone() override {
1305 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1306 getDebugLoc(), Name);
1307 if (getUnderlyingValue())
1308 New->setUnderlyingValue(getUnderlyingInstr());
1309 return New;
1310 }
1311
1312 unsigned getOpcode() const { return Opcode; }
1313
1314 /// Generate the instruction.
1315 /// TODO: We currently execute only per-part unless a specific instance is
1316 /// provided.
1317 void execute(VPTransformState &State) override;
1318
1319 /// Return the cost of this VPInstruction.
1320 InstructionCost computeCost(ElementCount VF,
1321 VPCostContext &Ctx) const override;
1322
1323#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1324 /// Print the VPInstruction to dbgs() (for debugging).
1325 LLVM_DUMP_METHOD void dump() const;
1326#endif
1327
1328 bool hasResult() const {
1329 // CallInst may or may not have a result, depending on the called function.
1330 // Conservatively return calls have results for now.
1331 switch (getOpcode()) {
1332 case Instruction::Ret:
1333 case Instruction::Br:
1334 case Instruction::Store:
1335 case Instruction::Switch:
1336 case Instruction::IndirectBr:
1337 case Instruction::Resume:
1338 case Instruction::CatchRet:
1339 case Instruction::Unreachable:
1340 case Instruction::Fence:
1341 case Instruction::AtomicRMW:
1342 case VPInstruction::BranchOnCond:
1343 case VPInstruction::BranchOnTwoConds:
1344 case VPInstruction::BranchOnCount:
1345 return false;
1346 default:
1347 return true;
1348 }
1349 }
1350
1351 /// Returns true if the underlying opcode may read from or write to memory.
1352 bool opcodeMayReadOrWriteFromMemory() const;
1353
1354 /// Returns true if the recipe only uses the first lane of operand \p Op.
1355 bool usesFirstLaneOnly(const VPValue *Op) const override;
1356
1357 /// Returns true if the recipe only uses the first part of operand \p Op.
1358 bool usesFirstPartOnly(const VPValue *Op) const override;
1359
1360 /// Returns true if this VPInstruction produces a scalar value from a vector,
1361 /// e.g. by performing a reduction or extracting a lane.
1362 bool isVectorToScalar() const;
1363
1364 /// Returns true if this VPInstruction's operands are single scalars and the
1365 /// result is also a single scalar.
1366 bool isSingleScalar() const;
1367
1368 /// Returns the symbolic name assigned to the VPInstruction.
1369 StringRef getName() const { return Name; }
1370
1371 /// Set the symbolic name for the VPInstruction.
1372 void setName(StringRef NewName) { Name = NewName.str(); }
1373
1374protected:
1375#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1376 /// Print the VPInstruction to \p O.
1377 void printRecipe(raw_ostream &O, const Twine &Indent,
1378 VPSlotTracker &SlotTracker) const override;
1379#endif
1380};
1381
1382/// A specialization of VPInstruction augmenting it with a dedicated result
1383/// type, to be used when the opcode and operands of the VPInstruction don't
1384/// directly determine the result type. Note that there is no separate recipe ID
1385/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1386/// distinguished purely by the opcode.
1387class VPInstructionWithType : public VPInstruction {
1388 /// Scalar result type produced by the recipe.
1389 Type *ResultTy;
1390
1391public:
1392 VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
1393 Type *ResultTy, const VPIRFlags &Flags = {},
1394 const VPIRMetadata &Metadata = {},
1395 DebugLoc DL = DebugLoc::getUnknown(),
1396 const Twine &Name = "")
1397 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1398 ResultTy(ResultTy) {}
1399
1400 static inline bool classof(const VPRecipeBase *R) {
1401 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1402 // type information.
1403 if (R->isScalarCast())
1404 return true;
1405 auto *VPI = dyn_cast<VPInstruction>(Val: R);
1406 if (!VPI)
1407 return false;
1408 switch (VPI->getOpcode()) {
1409 case VPInstruction::WideIVStep:
1410 case VPInstruction::StepVector:
1411 case VPInstruction::VScale:
1412 return true;
1413 default:
1414 return false;
1415 }
1416 }
1417
1418 static inline bool classof(const VPUser *R) {
1419 return isa<VPInstructionWithType>(Val: cast<VPRecipeBase>(Val: R));
1420 }
1421
1422 VPInstruction *clone() override {
1423 auto *New =
1424 new VPInstructionWithType(getOpcode(), operands(), getResultType(),
1425 *this, *this, getDebugLoc(), getName());
1426 New->setUnderlyingValue(getUnderlyingValue());
1427 return New;
1428 }
1429
1430 void execute(VPTransformState &State) override;
1431
1432 /// Return the cost of this VPInstruction.
1433 InstructionCost computeCost(ElementCount VF,
1434 VPCostContext &Ctx) const override {
1435 // TODO: Compute accurate cost after retiring the legacy cost model.
1436 return 0;
1437 }
1438
1439 Type *getResultType() const { return ResultTy; }
1440
1441protected:
1442#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1443 /// Print the recipe.
1444 void printRecipe(raw_ostream &O, const Twine &Indent,
1445 VPSlotTracker &SlotTracker) const override;
1446#endif
1447};
1448
1449/// Helper type to provide functions to access incoming values and blocks for
1450/// phi-like recipes.
1451class VPPhiAccessors {
1452protected:
1453 /// Return a VPRecipeBase* to the current object.
1454 virtual const VPRecipeBase *getAsRecipe() const = 0;
1455
1456public:
1457 virtual ~VPPhiAccessors() = default;
1458
1459 /// Returns the incoming VPValue with index \p Idx.
1460 VPValue *getIncomingValue(unsigned Idx) const {
1461 return getAsRecipe()->getOperand(N: Idx);
1462 }
1463
1464 /// Returns the incoming block with index \p Idx.
1465 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1466
1467 /// Returns the number of incoming values, also number of incoming blocks.
1468 virtual unsigned getNumIncoming() const {
1469 return getAsRecipe()->getNumOperands();
1470 }
1471
1472 /// Returns an interator range over the incoming values.
1473 VPUser::const_operand_range incoming_values() const {
1474 return make_range(x: getAsRecipe()->op_begin(),
1475 y: getAsRecipe()->op_begin() + getNumIncoming());
1476 }
1477
1478 using const_incoming_blocks_range = iterator_range<mapped_iterator<
1479 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1480
1481 /// Returns an iterator range over the incoming blocks.
1482 const_incoming_blocks_range incoming_blocks() const {
1483 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1484 return getIncomingBlock(Idx);
1485 };
1486 return map_range(C: index_range(0, getNumIncoming()), F: GetBlock);
1487 }
1488
1489 /// Returns an iterator range over pairs of incoming values and corresponding
1490 /// incoming blocks.
1491 detail::zippy<llvm::detail::zip_first, VPUser::const_operand_range,
1492 const_incoming_blocks_range>
1493 incoming_values_and_blocks() const {
1494 return zip_equal(t: incoming_values(), u: incoming_blocks());
1495 }
1496
1497 /// Removes the incoming value for \p IncomingBlock, which must be a
1498 /// predecessor.
1499 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1500
1501#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1502 /// Print the recipe.
1503 void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1504#endif
1505};
1506
1507struct LLVM_ABI_FOR_TEST VPPhi : public VPInstruction, public VPPhiAccessors {
1508 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1509 : VPInstruction(Instruction::PHI, Operands, {}, {}, DL, Name) {}
1510
1511 static inline bool classof(const VPUser *U) {
1512 auto *VPI = dyn_cast<VPInstruction>(Val: U);
1513 return VPI && VPI->getOpcode() == Instruction::PHI;
1514 }
1515
1516 static inline bool classof(const VPValue *V) {
1517 auto *VPI = dyn_cast<VPInstruction>(Val: V);
1518 return VPI && VPI->getOpcode() == Instruction::PHI;
1519 }
1520
1521 static inline bool classof(const VPSingleDefRecipe *SDR) {
1522 auto *VPI = dyn_cast<VPInstruction>(Val: SDR);
1523 return VPI && VPI->getOpcode() == Instruction::PHI;
1524 }
1525
1526 VPPhi *clone() override {
1527 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1528 PhiR->setUnderlyingValue(getUnderlyingValue());
1529 return PhiR;
1530 }
1531
1532 void execute(VPTransformState &State) override;
1533
1534protected:
1535#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1536 /// Print the recipe.
1537 void printRecipe(raw_ostream &O, const Twine &Indent,
1538 VPSlotTracker &SlotTracker) const override;
1539#endif
1540
1541 const VPRecipeBase *getAsRecipe() const override { return this; }
1542};
1543
1544/// A recipe to wrap on original IR instruction not to be modified during
1545/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1546/// Expect PHIs, VPIRInstructions cannot have any operands.
1547class VPIRInstruction : public VPRecipeBase {
1548 Instruction &I;
1549
1550protected:
1551 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1552 /// subclasses may need to be created, e.g. VPIRPhi.
1553 VPIRInstruction(Instruction &I)
1554 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1555
1556public:
1557 ~VPIRInstruction() override = default;
1558
1559 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1560 /// VPIRInstruction.
1561 LLVM_ABI_FOR_TEST static VPIRInstruction *create(Instruction &I);
1562
1563 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1564
1565 VPIRInstruction *clone() override {
1566 auto *R = create(I);
1567 for (auto *Op : operands())
1568 R->addOperand(Operand: Op);
1569 return R;
1570 }
1571
1572 void execute(VPTransformState &State) override;
1573
1574 /// Return the cost of this VPIRInstruction.
1575 LLVM_ABI_FOR_TEST InstructionCost
1576 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1577
1578 Instruction &getInstruction() const { return I; }
1579
1580 bool usesScalars(const VPValue *Op) const override {
1581 assert(is_contained(operands(), Op) &&
1582 "Op must be an operand of the recipe");
1583 return true;
1584 }
1585
1586 bool usesFirstPartOnly(const VPValue *Op) const override {
1587 assert(is_contained(operands(), Op) &&
1588 "Op must be an operand of the recipe");
1589 return true;
1590 }
1591
1592 bool usesFirstLaneOnly(const VPValue *Op) const override {
1593 assert(is_contained(operands(), Op) &&
1594 "Op must be an operand of the recipe");
1595 return true;
1596 }
1597
1598 /// Update the recipe's first operand to the last lane of the last part of the
1599 /// operand using \p Builder. Must only be used for VPIRInstructions with at
1600 /// least one operand wrapping a PHINode.
1601 void extractLastLaneOfLastPartOfFirstOperand(VPBuilder &Builder);
1602
1603protected:
1604#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1605 /// Print the recipe.
1606 void printRecipe(raw_ostream &O, const Twine &Indent,
1607 VPSlotTracker &SlotTracker) const override;
1608#endif
1609};
1610
1611/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1612/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1613/// allowed, and it is used to add a new incoming value for the single
1614/// predecessor VPBB.
1615struct LLVM_ABI_FOR_TEST VPIRPhi : public VPIRInstruction,
1616 public VPPhiAccessors {
1617 VPIRPhi(PHINode &PN) : VPIRInstruction(PN) {}
1618
1619 static inline bool classof(const VPRecipeBase *U) {
1620 auto *R = dyn_cast<VPIRInstruction>(Val: U);
1621 return R && isa<PHINode>(Val: R->getInstruction());
1622 }
1623
1624 PHINode &getIRPhi() { return cast<PHINode>(Val&: getInstruction()); }
1625
1626 void execute(VPTransformState &State) override;
1627
1628protected:
1629#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1630 /// Print the recipe.
1631 void printRecipe(raw_ostream &O, const Twine &Indent,
1632 VPSlotTracker &SlotTracker) const override;
1633#endif
1634
1635 const VPRecipeBase *getAsRecipe() const override { return this; }
1636};
1637
1638/// VPWidenRecipe is a recipe for producing a widened instruction using the
1639/// opcode and operands of the recipe. This recipe covers most of the
1640/// traditional vectorization cases where each recipe transforms into a
1641/// vectorized version of itself.
1642class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags,
1643 public VPIRMetadata {
1644 unsigned Opcode;
1645
1646public:
1647 VPWidenRecipe(Instruction &I, ArrayRef<VPValue *> Operands,
1648 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1649 DebugLoc DL = {})
1650 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1651 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1652 setUnderlyingValue(&I);
1653 }
1654
1655 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1656 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1657 DebugLoc DL = {})
1658 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1659 VPIRMetadata(Metadata), Opcode(Opcode) {}
1660
1661 ~VPWidenRecipe() override = default;
1662
1663 VPWidenRecipe *clone() override {
1664 if (auto *UV = getUnderlyingValue())
1665 return new VPWidenRecipe(*cast<Instruction>(Val: UV), operands(), *this, *this,
1666 getDebugLoc());
1667 return new VPWidenRecipe(Opcode, operands(), *this, *this, getDebugLoc());
1668 }
1669
1670 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1671
1672 /// Produce a widened instruction using the opcode and operands of the recipe,
1673 /// processing State.VF elements.
1674 void execute(VPTransformState &State) override;
1675
1676 /// Return the cost of this VPWidenRecipe.
1677 InstructionCost computeCost(ElementCount VF,
1678 VPCostContext &Ctx) const override;
1679
1680 unsigned getOpcode() const { return Opcode; }
1681
1682protected:
1683#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1684 /// Print the recipe.
1685 void printRecipe(raw_ostream &O, const Twine &Indent,
1686 VPSlotTracker &SlotTracker) const override;
1687#endif
1688
1689 /// Returns true if the recipe only uses the first lane of operand \p Op.
1690 bool usesFirstLaneOnly(const VPValue *Op) const override {
1691 assert(is_contained(operands(), Op) &&
1692 "Op must be an operand of the recipe");
1693 return Opcode == Instruction::Select && Op == getOperand(N: 0) &&
1694 Op->isDefinedOutsideLoopRegions();
1695 }
1696};
1697
1698/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1699class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
1700 /// Cast instruction opcode.
1701 Instruction::CastOps Opcode;
1702
1703 /// Result type for the cast.
1704 Type *ResultTy;
1705
1706public:
1707 VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1708 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1709 const VPIRMetadata &Metadata = {},
1710 DebugLoc DL = DebugLoc::getUnknown())
1711 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, Flags, DL),
1712 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1713 assert(flagsValidForOpcode(Opcode) &&
1714 "Set flags not supported for the provided opcode");
1715 assert(hasRequiredFlagsForOpcode(Opcode) &&
1716 "Opcode requires specific flags to be set");
1717 setUnderlyingValue(CI);
1718 }
1719
1720 ~VPWidenCastRecipe() override = default;
1721
1722 VPWidenCastRecipe *clone() override {
1723 return new VPWidenCastRecipe(Opcode, getOperand(N: 0), ResultTy,
1724 cast_or_null<CastInst>(Val: getUnderlyingValue()),
1725 *this, *this, getDebugLoc());
1726 }
1727
1728 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1729
1730 /// Produce widened copies of the cast.
1731 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1732
1733 /// Return the cost of this VPWidenCastRecipe.
1734 LLVM_ABI_FOR_TEST InstructionCost
1735 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1736
1737 Instruction::CastOps getOpcode() const { return Opcode; }
1738
1739 /// Returns the result type of the cast.
1740 Type *getResultType() const { return ResultTy; }
1741
1742protected:
1743#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1744 /// Print the recipe.
1745 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1746 VPSlotTracker &SlotTracker) const override;
1747#endif
1748};
1749
1750/// A recipe for widening vector intrinsics.
1751class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
1752 /// ID of the vector intrinsic to widen.
1753 Intrinsic::ID VectorIntrinsicID;
1754
1755 /// Scalar return type of the intrinsic.
1756 Type *ResultTy;
1757
1758 /// True if the intrinsic may read from memory.
1759 bool MayReadFromMemory;
1760
1761 /// True if the intrinsic may read write to memory.
1762 bool MayWriteToMemory;
1763
1764 /// True if the intrinsic may have side-effects.
1765 bool MayHaveSideEffects;
1766
1767public:
1768 VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
1769 ArrayRef<VPValue *> CallArguments, Type *Ty,
1770 const VPIRFlags &Flags = {},
1771 const VPIRMetadata &MD = {},
1772 DebugLoc DL = DebugLoc::getUnknown())
1773 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1774 Flags, DL),
1775 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1776 MayReadFromMemory(CI.mayReadFromMemory()),
1777 MayWriteToMemory(CI.mayWriteToMemory()),
1778 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1779 setUnderlyingValue(&CI);
1780 }
1781
1782 VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
1783 ArrayRef<VPValue *> CallArguments, Type *Ty,
1784 const VPIRFlags &Flags = {},
1785 const VPIRMetadata &Metadata = {},
1786 DebugLoc DL = DebugLoc::getUnknown())
1787 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1788 Flags, DL),
1789 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1790 ResultTy(Ty) {
1791 LLVMContext &Ctx = Ty->getContext();
1792 AttributeSet Attrs = Intrinsic::getFnAttributes(C&: Ctx, id: VectorIntrinsicID);
1793 MemoryEffects ME = Attrs.getMemoryEffects();
1794 MayReadFromMemory = !ME.onlyWritesMemory();
1795 MayWriteToMemory = !ME.onlyReadsMemory();
1796 MayHaveSideEffects = MayWriteToMemory ||
1797 !Attrs.hasAttribute(Kind: Attribute::NoUnwind) ||
1798 !Attrs.hasAttribute(Kind: Attribute::WillReturn);
1799 }
1800
1801 ~VPWidenIntrinsicRecipe() override = default;
1802
1803 VPWidenIntrinsicRecipe *clone() override {
1804 if (Value *CI = getUnderlyingValue())
1805 return new VPWidenIntrinsicRecipe(*cast<CallInst>(Val: CI), VectorIntrinsicID,
1806 operands(), ResultTy, *this, *this,
1807 getDebugLoc());
1808 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1809 *this, *this, getDebugLoc());
1810 }
1811
1812 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntrinsicSC)
1813
1814 /// Produce a widened version of the vector intrinsic.
1815 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1816
1817 /// Return the cost of this vector intrinsic.
1818 LLVM_ABI_FOR_TEST InstructionCost
1819 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1820
1821 /// Return the ID of the intrinsic.
1822 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1823
1824 /// Return the scalar return type of the intrinsic.
1825 Type *getResultType() const { return ResultTy; }
1826
1827 /// Return to name of the intrinsic as string.
1828 StringRef getIntrinsicName() const;
1829
1830 /// Returns true if the intrinsic may read from memory.
1831 bool mayReadFromMemory() const { return MayReadFromMemory; }
1832
1833 /// Returns true if the intrinsic may write to memory.
1834 bool mayWriteToMemory() const { return MayWriteToMemory; }
1835
1836 /// Returns true if the intrinsic may have side-effects.
1837 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1838
1839 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1840
1841protected:
1842#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1843 /// Print the recipe.
1844 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1845 VPSlotTracker &SlotTracker) const override;
1846#endif
1847};
1848
1849/// A recipe for widening Call instructions using library calls.
1850class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags,
1851 public VPIRMetadata {
1852 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1853 /// between a given VF and the chosen vectorized variant, so there will be a
1854 /// different VPlan for each VF with a valid variant.
1855 Function *Variant;
1856
1857public:
1858 VPWidenCallRecipe(Value *UV, Function *Variant,
1859 ArrayRef<VPValue *> CallArguments,
1860 const VPIRFlags &Flags = {},
1861 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1862 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments, Flags,
1863 DL),
1864 VPIRMetadata(Metadata), Variant(Variant) {
1865 setUnderlyingValue(UV);
1866 assert(
1867 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1868 "last operand must be the called function");
1869 }
1870
1871 ~VPWidenCallRecipe() override = default;
1872
1873 VPWidenCallRecipe *clone() override {
1874 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1875 *this, *this, getDebugLoc());
1876 }
1877
1878 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
1879
1880 /// Produce a widened version of the call instruction.
1881 void execute(VPTransformState &State) override;
1882
1883 /// Return the cost of this VPWidenCallRecipe.
1884 InstructionCost computeCost(ElementCount VF,
1885 VPCostContext &Ctx) const override;
1886
1887 Function *getCalledScalarFunction() const {
1888 return cast<Function>(Val: getOperand(N: getNumOperands() - 1)->getLiveInIRValue());
1889 }
1890
1891 operand_range args() { return drop_end(RangeOrContainer: operands()); }
1892 const_operand_range args() const { return drop_end(RangeOrContainer: operands()); }
1893
1894protected:
1895#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1896 /// Print the recipe.
1897 void printRecipe(raw_ostream &O, const Twine &Indent,
1898 VPSlotTracker &SlotTracker) const override;
1899#endif
1900};
1901
1902/// A recipe representing a sequence of load -> update -> store as part of
1903/// a histogram operation. This means there may be aliasing between vector
1904/// lanes, which is handled by the llvm.experimental.vector.histogram family
1905/// of intrinsics. The only update operations currently supported are
1906/// 'add' and 'sub' where the other term is loop-invariant.
1907class VPHistogramRecipe : public VPRecipeBase {
1908 /// Opcode of the update operation, currently either add or sub.
1909 unsigned Opcode;
1910
1911public:
1912 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1913 DebugLoc DL = DebugLoc::getUnknown())
1914 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
1915 Opcode(Opcode) {}
1916
1917 ~VPHistogramRecipe() override = default;
1918
1919 VPHistogramRecipe *clone() override {
1920 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1921 }
1922
1923 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
1924
1925 /// Produce a vectorized histogram operation.
1926 void execute(VPTransformState &State) override;
1927
1928 /// Return the cost of this VPHistogramRecipe.
1929 InstructionCost computeCost(ElementCount VF,
1930 VPCostContext &Ctx) const override;
1931
1932 unsigned getOpcode() const { return Opcode; }
1933
1934 /// Return the mask operand if one was provided, or a null pointer if all
1935 /// lanes should be executed unconditionally.
1936 VPValue *getMask() const {
1937 return getNumOperands() == 3 ? getOperand(N: 2) : nullptr;
1938 }
1939
1940protected:
1941#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1942 /// Print the recipe
1943 void printRecipe(raw_ostream &O, const Twine &Indent,
1944 VPSlotTracker &SlotTracker) const override;
1945#endif
1946};
1947
1948/// A recipe for handling GEP instructions.
1949class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
1950 Type *SourceElementTy;
1951
1952 bool isPointerLoopInvariant() const {
1953 return getOperand(N: 0)->isDefinedOutsideLoopRegions();
1954 }
1955
1956 bool isIndexLoopInvariant(unsigned I) const {
1957 return getOperand(N: I + 1)->isDefinedOutsideLoopRegions();
1958 }
1959
1960public:
1961 VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands,
1962 const VPIRFlags &Flags = {},
1963 DebugLoc DL = DebugLoc::getUnknown())
1964 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands, Flags, DL),
1965 SourceElementTy(GEP->getSourceElementType()) {
1966 setUnderlyingValue(GEP);
1967 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
1968 (void)Metadata;
1969 getMetadataToPropagate(Inst: GEP, Metadata);
1970 assert(Metadata.empty() && "unexpected metadata on GEP");
1971 }
1972
1973 ~VPWidenGEPRecipe() override = default;
1974
1975 VPWidenGEPRecipe *clone() override {
1976 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(Val: getUnderlyingInstr()),
1977 operands(), *this, getDebugLoc());
1978 }
1979
1980 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
1981
1982 /// This recipe generates a GEP instruction.
1983 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1984
1985 /// Generate the gep nodes.
1986 void execute(VPTransformState &State) override;
1987
1988 Type *getSourceElementType() const { return SourceElementTy; }
1989
1990 /// Return the cost of this VPWidenGEPRecipe.
1991 InstructionCost computeCost(ElementCount VF,
1992 VPCostContext &Ctx) const override {
1993 // TODO: Compute accurate cost after retiring the legacy cost model.
1994 return 0;
1995 }
1996
1997 /// Returns true if the recipe only uses the first lane of operand \p Op.
1998 bool usesFirstLaneOnly(const VPValue *Op) const override;
1999
2000protected:
2001#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2002 /// Print the recipe.
2003 void printRecipe(raw_ostream &O, const Twine &Indent,
2004 VPSlotTracker &SlotTracker) const override;
2005#endif
2006};
2007
2008/// A recipe to compute a pointer to the last element of each part of a widened
2009/// memory access for widened memory accesses of SourceElementTy. Used for
2010/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
2011class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags,
2012 public VPUnrollPartAccessor<2> {
2013 Type *SourceElementTy;
2014
2015 /// The constant stride of the pointer computed by this recipe, expressed in
2016 /// units of SourceElementTy.
2017 int64_t Stride;
2018
2019public:
2020 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2021 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2022 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2023 GEPFlags, DL),
2024 SourceElementTy(SourceElementTy), Stride(Stride) {
2025 assert(Stride < 0 && "Stride must be negative");
2026 }
2027
2028 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2029
2030 Type *getSourceElementType() const { return SourceElementTy; }
2031 VPValue *getVFValue() { return getOperand(N: 1); }
2032 const VPValue *getVFValue() const { return getOperand(N: 1); }
2033
2034 void execute(VPTransformState &State) override;
2035
2036 bool usesFirstLaneOnly(const VPValue *Op) const override {
2037 assert(is_contained(operands(), Op) &&
2038 "Op must be an operand of the recipe");
2039 return true;
2040 }
2041
2042 /// Return the cost of this VPVectorPointerRecipe.
2043 InstructionCost computeCost(ElementCount VF,
2044 VPCostContext &Ctx) const override {
2045 // TODO: Compute accurate cost after retiring the legacy cost model.
2046 return 0;
2047 }
2048
2049 /// Returns true if the recipe only uses the first part of operand \p Op.
2050 bool usesFirstPartOnly(const VPValue *Op) const override {
2051 assert(is_contained(operands(), Op) &&
2052 "Op must be an operand of the recipe");
2053 assert(getNumOperands() <= 2 && "must have at most two operands");
2054 return true;
2055 }
2056
2057 VPVectorEndPointerRecipe *clone() override {
2058 return new VPVectorEndPointerRecipe(getOperand(N: 0), getVFValue(),
2059 getSourceElementType(), Stride,
2060 getGEPNoWrapFlags(), getDebugLoc());
2061 }
2062
2063protected:
2064#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2065 /// Print the recipe.
2066 void printRecipe(raw_ostream &O, const Twine &Indent,
2067 VPSlotTracker &SlotTracker) const override;
2068#endif
2069};
2070
2071/// A recipe to compute the pointers for widened memory accesses of \p
2072/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
2073/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
2074class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
2075 Type *SourceElementTy;
2076
2077public:
2078 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
2079 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2080 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC, Ptr, GEPFlags, DL),
2081 SourceElementTy(SourceElementTy) {}
2082
2083 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2084
2085 VPValue *getOffset() {
2086 return getNumOperands() == 2 ? getOperand(N: 1) : nullptr;
2087 }
2088
2089 void execute(VPTransformState &State) override;
2090
2091 Type *getSourceElementType() const { return SourceElementTy; }
2092
2093 bool usesFirstLaneOnly(const VPValue *Op) const override {
2094 assert(is_contained(operands(), Op) &&
2095 "Op must be an operand of the recipe");
2096 return true;
2097 }
2098
2099 /// Returns true if the recipe only uses the first part of operand \p Op.
2100 bool usesFirstPartOnly(const VPValue *Op) const override {
2101 assert(is_contained(operands(), Op) &&
2102 "Op must be an operand of the recipe");
2103 assert(getNumOperands() <= 2 && "must have at most two operands");
2104 return true;
2105 }
2106
2107 VPVectorPointerRecipe *clone() override {
2108 auto *Clone = new VPVectorPointerRecipe(getOperand(N: 0), SourceElementTy,
2109 getGEPNoWrapFlags(), getDebugLoc());
2110 if (auto *Off = getOffset())
2111 Clone->addOperand(Operand: Off);
2112 return Clone;
2113 }
2114
2115 /// Return the cost of this VPHeaderPHIRecipe.
2116 InstructionCost computeCost(ElementCount VF,
2117 VPCostContext &Ctx) const override {
2118 // TODO: Compute accurate cost after retiring the legacy cost model.
2119 return 0;
2120 }
2121
2122protected:
2123#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2124 /// Print the recipe.
2125 void printRecipe(raw_ostream &O, const Twine &Indent,
2126 VPSlotTracker &SlotTracker) const override;
2127#endif
2128};
2129
2130/// A pure virtual base class for all recipes modeling header phis, including
2131/// phis for first order recurrences, pointer inductions and reductions. The
2132/// start value is the first operand of the recipe and the incoming value from
2133/// the backedge is the second operand.
2134///
2135/// Inductions are modeled using the following sub-classes:
2136/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2137/// starting at a specified value (zero for the main vector loop, the resume
2138/// value for the epilogue vector loop) and stepping by 1. The induction
2139/// controls exiting of the vector loop by comparing against the vector trip
2140/// count. Produces a single scalar PHI for the induction value per
2141/// iteration.
2142/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2143/// floating point inductions with arbitrary start and step values. Produces
2144/// a vector PHI per-part.
2145/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2146/// value of an IV with different start and step values. Produces a single
2147/// scalar value per iteration
2148/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2149/// canonical or derived induction.
2150/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2151/// pointer induction. Produces either a vector PHI per-part or scalar values
2152/// per-lane based on the canonical induction.
2153class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe,
2154 public VPPhiAccessors {
2155protected:
2156 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2157 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2158 : VPSingleDefRecipe(VPRecipeID, Start, UnderlyingInstr, DL) {}
2159
2160 const VPRecipeBase *getAsRecipe() const override { return this; }
2161
2162public:
2163 ~VPHeaderPHIRecipe() override = default;
2164
2165 /// Method to support type inquiry through isa, cast, and dyn_cast.
2166 static inline bool classof(const VPRecipeBase *R) {
2167 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2168 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2169 }
2170 static inline bool classof(const VPValue *V) {
2171 return isa<VPHeaderPHIRecipe>(Val: V->getDefiningRecipe());
2172 }
2173 static inline bool classof(const VPSingleDefRecipe *R) {
2174 return isa<VPHeaderPHIRecipe>(Val: static_cast<const VPRecipeBase *>(R));
2175 }
2176
2177 /// Generate the phi nodes.
2178 void execute(VPTransformState &State) override = 0;
2179
2180 /// Return the cost of this header phi recipe.
2181 InstructionCost computeCost(ElementCount VF,
2182 VPCostContext &Ctx) const override;
2183
2184 /// Returns the start value of the phi, if one is set.
2185 VPValue *getStartValue() {
2186 return getNumOperands() == 0 ? nullptr : getOperand(N: 0);
2187 }
2188 VPValue *getStartValue() const {
2189 return getNumOperands() == 0 ? nullptr : getOperand(N: 0);
2190 }
2191
2192 /// Update the start value of the recipe.
2193 void setStartValue(VPValue *V) { setOperand(I: 0, New: V); }
2194
2195 /// Returns the incoming value from the loop backedge.
2196 virtual VPValue *getBackedgeValue() {
2197 return getOperand(N: 1);
2198 }
2199
2200 /// Update the incoming value from the loop backedge.
2201 void setBackedgeValue(VPValue *V) { setOperand(I: 1, New: V); }
2202
2203 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2204 /// to be a recipe.
2205 virtual VPRecipeBase &getBackedgeRecipe() {
2206 return *getBackedgeValue()->getDefiningRecipe();
2207 }
2208
2209protected:
2210#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2211 /// Print the recipe.
2212 void printRecipe(raw_ostream &O, const Twine &Indent,
2213 VPSlotTracker &SlotTracker) const override = 0;
2214#endif
2215};
2216
2217/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2218/// VPWidenPointerInductionRecipe), providing shared functionality, including
2219/// retrieving the step value, induction descriptor and original phi node.
2220class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
2221 const InductionDescriptor &IndDesc;
2222
2223public:
2224 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2225 VPValue *Step, const InductionDescriptor &IndDesc,
2226 DebugLoc DL)
2227 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2228 addOperand(Operand: Step);
2229 }
2230
2231 static inline bool classof(const VPRecipeBase *R) {
2232 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2233 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2234 }
2235
2236 static inline bool classof(const VPValue *V) {
2237 auto *R = V->getDefiningRecipe();
2238 return R && classof(R);
2239 }
2240
2241 static inline bool classof(const VPSingleDefRecipe *R) {
2242 return classof(R: static_cast<const VPRecipeBase *>(R));
2243 }
2244
2245 void execute(VPTransformState &State) override = 0;
2246
2247 /// Returns the start value of the induction.
2248 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
2249
2250 /// Returns the step value of the induction.
2251 VPValue *getStepValue() { return getOperand(N: 1); }
2252 const VPValue *getStepValue() const { return getOperand(N: 1); }
2253
2254 /// Update the step value of the recipe.
2255 void setStepValue(VPValue *V) { setOperand(I: 1, New: V); }
2256
2257 VPValue *getVFValue() { return getOperand(N: 2); }
2258 const VPValue *getVFValue() const { return getOperand(N: 2); }
2259
2260 /// Returns the number of incoming values, also number of incoming blocks.
2261 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2262 /// incoming value, its start value.
2263 unsigned getNumIncoming() const override { return 1; }
2264
2265 PHINode *getPHINode() const { return cast<PHINode>(Val: getUnderlyingValue()); }
2266
2267 /// Returns the induction descriptor for the recipe.
2268 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2269
2270 VPValue *getBackedgeValue() override {
2271 // TODO: All operands of base recipe must exist and be at same index in
2272 // derived recipe.
2273 llvm_unreachable(
2274 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2275 }
2276
2277 VPRecipeBase &getBackedgeRecipe() override {
2278 // TODO: All operands of base recipe must exist and be at same index in
2279 // derived recipe.
2280 llvm_unreachable(
2281 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2282 }
2283
2284 /// Returns true if the recipe only uses the first lane of operand \p Op.
2285 bool usesFirstLaneOnly(const VPValue *Op) const override {
2286 assert(is_contained(operands(), Op) &&
2287 "Op must be an operand of the recipe");
2288 // The recipe creates its own wide start value, so it only requests the
2289 // first lane of the operand.
2290 // TODO: Remove once creating the start value is modeled separately.
2291 return Op == getStartValue() || Op == getStepValue();
2292 }
2293};
2294
2295/// A recipe for handling phi nodes of integer and floating-point inductions,
2296/// producing their vector values. This is an abstract recipe and must be
2297/// converted to concrete recipes before executing.
2298class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe,
2299 public VPIRFlags {
2300 TruncInst *Trunc;
2301
2302 // If this recipe is unrolled it will have 2 additional operands.
2303 bool isUnrolled() const { return getNumOperands() == 5; }
2304
2305public:
2306 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step,
2307 VPValue *VF, const InductionDescriptor &IndDesc,
2308 const VPIRFlags &Flags, DebugLoc DL)
2309 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2310 Start, Step, IndDesc, DL),
2311 VPIRFlags(Flags), Trunc(nullptr) {
2312 addOperand(Operand: VF);
2313 }
2314
2315 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step,
2316 VPValue *VF, const InductionDescriptor &IndDesc,
2317 TruncInst *Trunc, const VPIRFlags &Flags,
2318 DebugLoc DL)
2319 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2320 Start, Step, IndDesc, DL),
2321 VPIRFlags(Flags), Trunc(Trunc) {
2322 addOperand(Operand: VF);
2323 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
2324 (void)Metadata;
2325 if (Trunc)
2326 getMetadataToPropagate(Inst: Trunc, Metadata);
2327 assert(Metadata.empty() && "unexpected metadata on Trunc");
2328 }
2329
2330 ~VPWidenIntOrFpInductionRecipe() override = default;
2331
2332 VPWidenIntOrFpInductionRecipe *clone() override {
2333 return new VPWidenIntOrFpInductionRecipe(
2334 getPHINode(), getStartValue(), getStepValue(), getVFValue(),
2335 getInductionDescriptor(), Trunc, *this, getDebugLoc());
2336 }
2337
2338 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2339
2340 void execute(VPTransformState &State) override {
2341 llvm_unreachable("cannot execute this recipe, should be expanded via "
2342 "expandVPWidenIntOrFpInductionRecipe");
2343 }
2344
2345 /// Returns the start value of the induction.
2346 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
2347
2348 /// If the recipe has been unrolled, return the VPValue for the induction
2349 /// increment, otherwise return null.
2350 VPValue *getSplatVFValue() const {
2351 return isUnrolled() ? getOperand(N: getNumOperands() - 2) : nullptr;
2352 }
2353
2354 /// Returns the number of incoming values, also number of incoming blocks.
2355 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2356 /// incoming value, its start value.
2357 unsigned getNumIncoming() const override { return 1; }
2358
2359 /// Returns the first defined value as TruncInst, if it is one or nullptr
2360 /// otherwise.
2361 TruncInst *getTruncInst() { return Trunc; }
2362 const TruncInst *getTruncInst() const { return Trunc; }
2363
2364 /// Returns true if the induction is canonical, i.e. starting at 0 and
2365 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2366 /// same type as the canonical induction.
2367 bool isCanonical() const;
2368
2369 /// Returns the scalar type of the induction.
2370 Type *getScalarType() const {
2371 return Trunc ? Trunc->getType() : getStartValue()->getType();
2372 }
2373
2374 /// Returns the VPValue representing the value of this induction at
2375 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2376 /// take place.
2377 VPValue *getLastUnrolledPartOperand() {
2378 return isUnrolled() ? getOperand(N: getNumOperands() - 1) : this;
2379 }
2380
2381protected:
2382#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2383 /// Print the recipe.
2384 void printRecipe(raw_ostream &O, const Twine &Indent,
2385 VPSlotTracker &SlotTracker) const override;
2386#endif
2387};
2388
2389class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
2390public:
2391 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2392 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2393 /// VF*UF.
2394 VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
2395 VPValue *NumUnrolledElems,
2396 const InductionDescriptor &IndDesc, DebugLoc DL)
2397 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2398 Start, Step, IndDesc, DL) {
2399 addOperand(Operand: NumUnrolledElems);
2400 }
2401
2402 ~VPWidenPointerInductionRecipe() override = default;
2403
2404 VPWidenPointerInductionRecipe *clone() override {
2405 return new VPWidenPointerInductionRecipe(
2406 cast<PHINode>(Val: getUnderlyingInstr()), getOperand(N: 0), getOperand(N: 1),
2407 getOperand(N: 2), getInductionDescriptor(), getDebugLoc());
2408 }
2409
2410 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2411
2412 /// Generate vector values for the pointer induction.
2413 void execute(VPTransformState &State) override {
2414 llvm_unreachable("cannot execute this recipe, should be expanded via "
2415 "expandVPWidenPointerInduction");
2416 };
2417
2418 /// Returns true if only scalar values will be generated.
2419 bool onlyScalarsGenerated(bool IsScalable);
2420
2421protected:
2422#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2423 /// Print the recipe.
2424 void printRecipe(raw_ostream &O, const Twine &Indent,
2425 VPSlotTracker &SlotTracker) const override;
2426#endif
2427};
2428
2429/// A recipe for widened phis. Incoming values are operands of the recipe and
2430/// their operand index corresponds to the incoming predecessor block. If the
2431/// recipe is placed in an entry block to a (non-replicate) region, it must have
2432/// exactly 2 incoming values, the first from the predecessor of the region and
2433/// the second from the exiting block of the region.
2434class LLVM_ABI_FOR_TEST VPWidenPHIRecipe : public VPSingleDefRecipe,
2435 public VPPhiAccessors {
2436 /// Name to use for the generated IR instruction for the widened phi.
2437 std::string Name;
2438
2439public:
2440 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2441 /// debug location \p DL.
2442 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2443 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2444 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, {}, Phi, DL),
2445 Name(Name.str()) {
2446 if (Start)
2447 addOperand(Operand: Start);
2448 }
2449
2450 VPWidenPHIRecipe *clone() override {
2451 auto *C = new VPWidenPHIRecipe(cast<PHINode>(Val: getUnderlyingValue()),
2452 getOperand(N: 0), getDebugLoc(), Name);
2453 for (VPValue *Op : llvm::drop_begin(RangeOrContainer: operands()))
2454 C->addOperand(Operand: Op);
2455 return C;
2456 }
2457
2458 ~VPWidenPHIRecipe() override = default;
2459
2460 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2461
2462 /// Generate the phi/select nodes.
2463 void execute(VPTransformState &State) override;
2464
2465 /// Return the cost of this VPWidenPHIRecipe.
2466 InstructionCost computeCost(ElementCount VF,
2467 VPCostContext &Ctx) const override;
2468
2469protected:
2470#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2471 /// Print the recipe.
2472 void printRecipe(raw_ostream &O, const Twine &Indent,
2473 VPSlotTracker &SlotTracker) const override;
2474#endif
2475
2476 const VPRecipeBase *getAsRecipe() const override { return this; }
2477};
2478
2479/// A recipe for handling first-order recurrence phis. The start value is the
2480/// first operand of the recipe and the incoming value from the backedge is the
2481/// second operand.
2482struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
2483 VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start,
2484 VPValue &BackedgeValue)
2485 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2486 &Start) {
2487 addOperand(Operand: &BackedgeValue);
2488 }
2489
2490 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2491
2492 VPFirstOrderRecurrencePHIRecipe *clone() override {
2493 return new VPFirstOrderRecurrencePHIRecipe(
2494 cast<PHINode>(Val: getUnderlyingInstr()), *getOperand(N: 0), *getOperand(N: 1));
2495 }
2496
2497 void execute(VPTransformState &State) override;
2498
2499 /// Return the cost of this first-order recurrence phi recipe.
2500 InstructionCost computeCost(ElementCount VF,
2501 VPCostContext &Ctx) const override;
2502
2503 /// Returns true if the recipe only uses the first lane of operand \p Op.
2504 bool usesFirstLaneOnly(const VPValue *Op) const override {
2505 assert(is_contained(operands(), Op) &&
2506 "Op must be an operand of the recipe");
2507 return Op == getStartValue();
2508 }
2509
2510protected:
2511#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2512 /// Print the recipe.
2513 void printRecipe(raw_ostream &O, const Twine &Indent,
2514 VPSlotTracker &SlotTracker) const override;
2515#endif
2516};
2517
2518/// Possible variants of a reduction.
2519
2520/// This reduction is ordered and in-loop.
2521struct RdxOrdered {};
2522/// This reduction is in-loop.
2523struct RdxInLoop {};
2524/// This reduction is unordered with the partial result scaled down by some
2525/// factor.
2526struct RdxUnordered {
2527 unsigned VFScaleFactor;
2528};
2529using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2530
2531inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2532 unsigned ScaleFactor) {
2533 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2534 if (Ordered)
2535 return RdxOrdered{};
2536 if (InLoop)
2537 return RdxInLoop{};
2538 return RdxUnordered{/*VFScaleFactor=*/.VFScaleFactor: ScaleFactor};
2539}
2540
2541/// A recipe for handling reduction phis. The start value is the first operand
2542/// of the recipe and the incoming value from the backedge is the second
2543/// operand.
2544class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
2545 public VPUnrollPartAccessor<2> {
2546 /// The recurrence kind of the reduction.
2547 const RecurKind Kind;
2548
2549 ReductionStyle Style;
2550
2551 /// The phi is part of a multi-use reduction (e.g., used in FindLastIV
2552 /// patterns for argmin/argmax).
2553 /// TODO: Also support cases where the phi itself has a single use, but its
2554 /// compare has multiple uses.
2555 bool HasUsesOutsideReductionChain;
2556
2557public:
2558 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2559 VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start,
2560 VPValue &BackedgeValue, ReductionStyle Style,
2561 bool HasUsesOutsideReductionChain = false)
2562 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2563 Kind(Kind), Style(Style),
2564 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2565 addOperand(Operand: &BackedgeValue);
2566 }
2567
2568 ~VPReductionPHIRecipe() override = default;
2569
2570 VPReductionPHIRecipe *clone() override {
2571 return new VPReductionPHIRecipe(
2572 dyn_cast_or_null<PHINode>(Val: getUnderlyingValue()), getRecurrenceKind(),
2573 *getOperand(N: 0), *getBackedgeValue(), Style,
2574 HasUsesOutsideReductionChain);
2575 }
2576
2577 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2578
2579 /// Generate the phi/select nodes.
2580 void execute(VPTransformState &State) override;
2581
2582 /// Get the factor that the VF of this recipe's output should be scaled by, or
2583 /// 1 if it isn't scaled.
2584 unsigned getVFScaleFactor() const {
2585 auto *Partial = std::get_if<RdxUnordered>(ptr: &Style);
2586 return Partial ? Partial->VFScaleFactor : 1;
2587 }
2588
2589 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2590 /// > 1.
2591 void setVFScaleFactor(unsigned ScaleFactor) {
2592 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2593 Style = RdxUnordered{.VFScaleFactor: ScaleFactor};
2594 }
2595
2596 /// Returns the number of incoming values, also number of incoming blocks.
2597 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2598 /// incoming value, its start value.
2599 unsigned getNumIncoming() const override { return 2; }
2600
2601 /// Returns the recurrence kind of the reduction.
2602 RecurKind getRecurrenceKind() const { return Kind; }
2603
2604 /// Returns true, if the phi is part of an ordered reduction.
2605 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(v: Style); }
2606
2607 /// Returns true if the phi is part of an in-loop reduction.
2608 bool isInLoop() const {
2609 return std::holds_alternative<RdxInLoop>(v: Style) ||
2610 std::holds_alternative<RdxOrdered>(v: Style);
2611 }
2612
2613 /// Returns true if the reduction outputs a vector with a scaled down VF.
2614 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2615
2616 /// Returns true, if the phi is part of a multi-use reduction.
2617 bool hasUsesOutsideReductionChain() const {
2618 return HasUsesOutsideReductionChain;
2619 }
2620
2621 /// Returns true if the recipe only uses the first lane of operand \p Op.
2622 bool usesFirstLaneOnly(const VPValue *Op) const override {
2623 assert(is_contained(operands(), Op) &&
2624 "Op must be an operand of the recipe");
2625 return isOrdered() || isInLoop();
2626 }
2627
2628protected:
2629#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2630 /// Print the recipe.
2631 void printRecipe(raw_ostream &O, const Twine &Indent,
2632 VPSlotTracker &SlotTracker) const override;
2633#endif
2634};
2635
2636/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2637/// instructions.
2638class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
2639public:
2640 /// The blend operation is a User of the incoming values and of their
2641 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2642 /// be omitted (implied by passing an odd number of operands) in which case
2643 /// all other incoming values are merged into it.
2644 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands, DebugLoc DL)
2645 : VPSingleDefRecipe(VPRecipeBase::VPBlendSC, Operands, Phi, DL) {
2646 assert(Operands.size() >= 2 && "Expected at least two operands!");
2647 }
2648
2649 VPBlendRecipe *clone() override {
2650 return new VPBlendRecipe(cast_or_null<PHINode>(Val: getUnderlyingValue()),
2651 operands(), getDebugLoc());
2652 }
2653
2654 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2655
2656 /// A normalized blend is one that has an odd number of operands, whereby the
2657 /// first operand does not have an associated mask.
2658 bool isNormalized() const { return getNumOperands() % 2; }
2659
2660 /// Return the number of incoming values, taking into account when normalized
2661 /// the first incoming value will have no mask.
2662 unsigned getNumIncomingValues() const {
2663 return (getNumOperands() + isNormalized()) / 2;
2664 }
2665
2666 /// Return incoming value number \p Idx.
2667 VPValue *getIncomingValue(unsigned Idx) const {
2668 return Idx == 0 ? getOperand(N: 0) : getOperand(N: Idx * 2 - isNormalized());
2669 }
2670
2671 /// Return mask number \p Idx.
2672 VPValue *getMask(unsigned Idx) const {
2673 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2674 return Idx == 0 ? getOperand(N: 1) : getOperand(N: Idx * 2 + !isNormalized());
2675 }
2676
2677 /// Set mask number \p Idx to \p V.
2678 void setMask(unsigned Idx, VPValue *V) {
2679 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2680 Idx == 0 ? setOperand(I: 1, New: V) : setOperand(I: Idx * 2 + !isNormalized(), New: V);
2681 }
2682
2683 void execute(VPTransformState &State) override {
2684 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2685 }
2686
2687 /// Return the cost of this VPWidenMemoryRecipe.
2688 InstructionCost computeCost(ElementCount VF,
2689 VPCostContext &Ctx) const override;
2690
2691 /// Returns true if the recipe only uses the first lane of operand \p Op.
2692 bool usesFirstLaneOnly(const VPValue *Op) const override {
2693 assert(is_contained(operands(), Op) &&
2694 "Op must be an operand of the recipe");
2695 // Recursing through Blend recipes only, must terminate at header phi's the
2696 // latest.
2697 return all_of(Range: users(),
2698 P: [this](VPUser *U) { return U->usesFirstLaneOnly(Op: this); });
2699 }
2700
2701protected:
2702#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2703 /// Print the recipe.
2704 void printRecipe(raw_ostream &O, const Twine &Indent,
2705 VPSlotTracker &SlotTracker) const override;
2706#endif
2707};
2708
2709/// A common base class for interleaved memory operations.
2710/// An Interleaved memory operation is a memory access method that combines
2711/// multiple strided loads/stores into a single wide load/store with shuffles.
2712/// The first operand is the start address. The optional operands are, in order,
2713/// the stored values and the mask.
2714class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase,
2715 public VPIRMetadata {
2716 const InterleaveGroup<Instruction> *IG;
2717
2718 /// Indicates if the interleave group is in a conditional block and requires a
2719 /// mask.
2720 bool HasMask = false;
2721
2722 /// Indicates if gaps between members of the group need to be masked out or if
2723 /// unusued gaps can be loaded speculatively.
2724 bool NeedsMaskForGaps = false;
2725
2726protected:
2727 VPInterleaveBase(const unsigned char SC,
2728 const InterleaveGroup<Instruction> *IG,
2729 ArrayRef<VPValue *> Operands,
2730 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2731 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2732 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2733 NeedsMaskForGaps(NeedsMaskForGaps) {
2734 // TODO: extend the masked interleaved-group support to reversed access.
2735 assert((!Mask || !IG->isReverse()) &&
2736 "Reversed masked interleave-group not supported.");
2737 if (StoredValues.empty()) {
2738 for (unsigned I = 0; I < IG->getFactor(); ++I)
2739 if (Instruction *Inst = IG->getMember(Index: I)) {
2740 assert(!Inst->getType()->isVoidTy() && "must have result");
2741 new VPRecipeValue(this, Inst);
2742 }
2743 } else {
2744 for (auto *SV : StoredValues)
2745 addOperand(Operand: SV);
2746 }
2747 if (Mask) {
2748 HasMask = true;
2749 addOperand(Operand: Mask);
2750 }
2751 }
2752
2753public:
2754 VPInterleaveBase *clone() override = 0;
2755
2756 static inline bool classof(const VPRecipeBase *R) {
2757 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
2758 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
2759 }
2760
2761 static inline bool classof(const VPUser *U) {
2762 auto *R = dyn_cast<VPRecipeBase>(Val: U);
2763 return R && classof(R);
2764 }
2765
2766 /// Return the address accessed by this recipe.
2767 VPValue *getAddr() const {
2768 return getOperand(N: 0); // Address is the 1st, mandatory operand.
2769 }
2770
2771 /// Return the mask used by this recipe. Note that a full mask is represented
2772 /// by a nullptr.
2773 VPValue *getMask() const {
2774 // Mask is optional and the last operand.
2775 return HasMask ? getOperand(N: getNumOperands() - 1) : nullptr;
2776 }
2777
2778 /// Return true if the access needs a mask because of the gaps.
2779 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2780
2781 const InterleaveGroup<Instruction> *getInterleaveGroup() const { return IG; }
2782
2783 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2784
2785 void execute(VPTransformState &State) override {
2786 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2787 }
2788
2789 /// Return the cost of this recipe.
2790 InstructionCost computeCost(ElementCount VF,
2791 VPCostContext &Ctx) const override;
2792
2793 /// Returns true if the recipe only uses the first lane of operand \p Op.
2794 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2795
2796 /// Returns the number of stored operands of this interleave group. Returns 0
2797 /// for load interleave groups.
2798 virtual unsigned getNumStoreOperands() const = 0;
2799
2800 /// Return the VPValues stored by this interleave group. If it is a load
2801 /// interleave group, return an empty ArrayRef.
2802 ArrayRef<VPValue *> getStoredValues() const {
2803 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
2804 getNumStoreOperands()};
2805 }
2806};
2807
2808/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2809/// or stores into one wide load/store and shuffles. The first operand of a
2810/// VPInterleave recipe is the address, followed by the stored values, followed
2811/// by an optional mask.
2812class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase {
2813public:
2814 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2815 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2816 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2817 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
2818 Mask, NeedsMaskForGaps, MD, DL) {}
2819
2820 ~VPInterleaveRecipe() override = default;
2821
2822 VPInterleaveRecipe *clone() override {
2823 return new VPInterleaveRecipe(getInterleaveGroup(), getAddr(),
2824 getStoredValues(), getMask(),
2825 needsMaskForGaps(), *this, getDebugLoc());
2826 }
2827
2828 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
2829
2830 /// Generate the wide load or store, and shuffles.
2831 void execute(VPTransformState &State) override;
2832
2833 bool usesFirstLaneOnly(const VPValue *Op) const override {
2834 assert(is_contained(operands(), Op) &&
2835 "Op must be an operand of the recipe");
2836 return Op == getAddr() && !llvm::is_contained(Range: getStoredValues(), Element: Op);
2837 }
2838
2839 unsigned getNumStoreOperands() const override {
2840 return getNumOperands() - (getMask() ? 2 : 1);
2841 }
2842
2843protected:
2844#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2845 /// Print the recipe.
2846 void printRecipe(raw_ostream &O, const Twine &Indent,
2847 VPSlotTracker &SlotTracker) const override;
2848#endif
2849};
2850
2851/// A recipe for interleaved memory operations with vector-predication
2852/// intrinsics. The first operand is the address, the second operand is the
2853/// explicit vector length. Stored values and mask are optional operands.
2854class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
2855public:
2856 VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
2857 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
2858 R.getInterleaveGroup(), {R.getAddr(), &EVL},
2859 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2860 R.getDebugLoc()) {
2861 assert(!getInterleaveGroup()->isReverse() &&
2862 "Reversed interleave-group with tail folding is not supported.");
2863 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2864 "supported for scalable vector.");
2865 }
2866
2867 ~VPInterleaveEVLRecipe() override = default;
2868
2869 VPInterleaveEVLRecipe *clone() override {
2870 llvm_unreachable("cloning not implemented yet");
2871 }
2872
2873 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
2874
2875 /// The VPValue of the explicit vector length.
2876 VPValue *getEVL() const { return getOperand(N: 1); }
2877
2878 /// Generate the wide load or store, and shuffles.
2879 void execute(VPTransformState &State) override;
2880
2881 /// The recipe only uses the first lane of the address, and EVL operand.
2882 bool usesFirstLaneOnly(const VPValue *Op) const override {
2883 assert(is_contained(operands(), Op) &&
2884 "Op must be an operand of the recipe");
2885 return (Op == getAddr() && !llvm::is_contained(Range: getStoredValues(), Element: Op)) ||
2886 Op == getEVL();
2887 }
2888
2889 unsigned getNumStoreOperands() const override {
2890 return getNumOperands() - (getMask() ? 3 : 2);
2891 }
2892
2893protected:
2894#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2895 /// Print the recipe.
2896 void printRecipe(raw_ostream &O, const Twine &Indent,
2897 VPSlotTracker &SlotTracker) const override;
2898#endif
2899};
2900
2901/// A recipe to represent inloop, ordered or partial reduction operations. It
2902/// performs a reduction on a vector operand into a scalar (vector in the case
2903/// of a partial reduction) value, and adds the result to a chain. The Operands
2904/// are {ChainOp, VecOp, [Condition]}.
2905class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
2906
2907 /// The recurrence kind for the reduction in question.
2908 RecurKind RdxKind;
2909 /// Whether the reduction is conditional.
2910 bool IsConditional = false;
2911 ReductionStyle Style;
2912
2913protected:
2914 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2915 FastMathFlags FMFs, Instruction *I,
2916 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2917 ReductionStyle Style, DebugLoc DL)
2918 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2919 Style(Style) {
2920 if (CondOp) {
2921 IsConditional = true;
2922 addOperand(Operand: CondOp);
2923 }
2924 setUnderlyingValue(I);
2925 }
2926
2927public:
2928 VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
2929 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2930 ReductionStyle Style, DebugLoc DL = DebugLoc::getUnknown())
2931 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
2932 {ChainOp, VecOp}, CondOp, Style, DL) {}
2933
2934 VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs,
2935 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2936 ReductionStyle Style, DebugLoc DL = DebugLoc::getUnknown())
2937 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
2938 {ChainOp, VecOp}, CondOp, Style, DL) {}
2939
2940 ~VPReductionRecipe() override = default;
2941
2942 VPReductionRecipe *clone() override {
2943 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2944 getUnderlyingInstr(), getChainOp(), getVecOp(),
2945 getCondOp(), Style, getDebugLoc());
2946 }
2947
2948 static inline bool classof(const VPRecipeBase *R) {
2949 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
2950 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
2951 }
2952
2953 static inline bool classof(const VPUser *U) {
2954 auto *R = dyn_cast<VPRecipeBase>(Val: U);
2955 return R && classof(R);
2956 }
2957
2958 static inline bool classof(const VPValue *VPV) {
2959 const VPRecipeBase *R = VPV->getDefiningRecipe();
2960 return R && classof(R);
2961 }
2962
2963 static inline bool classof(const VPSingleDefRecipe *R) {
2964 return classof(R: static_cast<const VPRecipeBase *>(R));
2965 }
2966
2967 /// Generate the reduction in the loop.
2968 void execute(VPTransformState &State) override;
2969
2970 /// Return the cost of VPReductionRecipe.
2971 InstructionCost computeCost(ElementCount VF,
2972 VPCostContext &Ctx) const override;
2973
2974 /// Return the recurrence kind for the in-loop reduction.
2975 RecurKind getRecurrenceKind() const { return RdxKind; }
2976 /// Return true if the in-loop reduction is ordered.
2977 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(v: Style); };
2978 /// Return true if the in-loop reduction is conditional.
2979 bool isConditional() const { return IsConditional; };
2980 /// Returns true if the reduction outputs a vector with a scaled down VF.
2981 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2982 /// Returns true if the reduction is in-loop.
2983 bool isInLoop() const {
2984 return std::holds_alternative<RdxInLoop>(v: Style) ||
2985 std::holds_alternative<RdxOrdered>(v: Style);
2986 }
2987 /// The VPValue of the scalar Chain being accumulated.
2988 VPValue *getChainOp() const { return getOperand(N: 0); }
2989 /// The VPValue of the vector value to be reduced.
2990 VPValue *getVecOp() const { return getOperand(N: 1); }
2991 /// The VPValue of the condition for the block.
2992 VPValue *getCondOp() const {
2993 return isConditional() ? getOperand(N: getNumOperands() - 1) : nullptr;
2994 }
2995 /// Get the factor that the VF of this recipe's output should be scaled by, or
2996 /// 1 if it isn't scaled.
2997 unsigned getVFScaleFactor() const {
2998 auto *Partial = std::get_if<RdxUnordered>(ptr: &Style);
2999 return Partial ? Partial->VFScaleFactor : 1;
3000 }
3001
3002protected:
3003#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3004 /// Print the recipe.
3005 void printRecipe(raw_ostream &O, const Twine &Indent,
3006 VPSlotTracker &SlotTracker) const override;
3007#endif
3008};
3009
3010/// A recipe to represent inloop reduction operations with vector-predication
3011/// intrinsics, performing a reduction on a vector operand with the explicit
3012/// vector length (EVL) into a scalar value, and adding the result to a chain.
3013/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3014class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe {
3015public:
3016 VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp,
3017 DebugLoc DL = DebugLoc::getUnknown())
3018 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3019 R.getFastMathFlags(),
3020 cast_or_null<Instruction>(Val: R.getUnderlyingValue()),
3021 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3022 getReductionStyle(/*InLoop=*/InLoop: true, Ordered: R.isOrdered(), ScaleFactor: 1),
3023 DL) {}
3024
3025 ~VPReductionEVLRecipe() override = default;
3026
3027 VPReductionEVLRecipe *clone() override {
3028 llvm_unreachable("cloning not implemented yet");
3029 }
3030
3031 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3032
3033 /// Generate the reduction in the loop
3034 void execute(VPTransformState &State) override;
3035
3036 /// The VPValue of the explicit vector length.
3037 VPValue *getEVL() const { return getOperand(N: 2); }
3038
3039 /// Returns true if the recipe only uses the first lane of operand \p Op.
3040 bool usesFirstLaneOnly(const VPValue *Op) const override {
3041 assert(is_contained(operands(), Op) &&
3042 "Op must be an operand of the recipe");
3043 return Op == getEVL();
3044 }
3045
3046protected:
3047#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3048 /// Print the recipe.
3049 void printRecipe(raw_ostream &O, const Twine &Indent,
3050 VPSlotTracker &SlotTracker) const override;
3051#endif
3052};
3053
3054/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3055/// copies of the original scalar type, one per lane, instead of producing a
3056/// single copy of widened type for all lanes. If the instruction is known to be
3057/// a single scalar, only one copy, per lane zero, will be generated.
3058class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags,
3059 public VPIRMetadata {
3060 /// Indicator if only a single replica per lane is needed.
3061 bool IsSingleScalar;
3062
3063 /// Indicator if the replicas are also predicated.
3064 bool IsPredicated;
3065
3066public:
3067 VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands,
3068 bool IsSingleScalar, VPValue *Mask = nullptr,
3069 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3070 DebugLoc DL = DebugLoc::getUnknown())
3071 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands, Flags, DL),
3072 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3073 IsPredicated(Mask) {
3074 setUnderlyingValue(I);
3075 if (Mask)
3076 addOperand(Operand: Mask);
3077 }
3078
3079 ~VPReplicateRecipe() override = default;
3080
3081 VPReplicateRecipe *clone() override {
3082 auto *Copy = new VPReplicateRecipe(
3083 getUnderlyingInstr(), operands(), IsSingleScalar,
3084 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3085 Copy->transferFlags(Other&: *this);
3086 return Copy;
3087 }
3088
3089 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3090
3091 /// Generate replicas of the desired Ingredient. Replicas will be generated
3092 /// for all parts and lanes unless a specific part and lane are specified in
3093 /// the \p State.
3094 void execute(VPTransformState &State) override;
3095
3096 /// Return the cost of this VPReplicateRecipe.
3097 InstructionCost computeCost(ElementCount VF,
3098 VPCostContext &Ctx) const override;
3099
3100 bool isSingleScalar() const { return IsSingleScalar; }
3101
3102 bool isPredicated() const { return IsPredicated; }
3103
3104 /// Returns true if the recipe only uses the first lane of operand \p Op.
3105 bool usesFirstLaneOnly(const VPValue *Op) const override {
3106 assert(is_contained(operands(), Op) &&
3107 "Op must be an operand of the recipe");
3108 return isSingleScalar();
3109 }
3110
3111 /// Returns true if the recipe uses scalars of operand \p Op.
3112 bool usesScalars(const VPValue *Op) const override {
3113 assert(is_contained(operands(), Op) &&
3114 "Op must be an operand of the recipe");
3115 return true;
3116 }
3117
3118 /// Returns true if the recipe is used by a widened recipe via an intervening
3119 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3120 /// in a vector.
3121 bool shouldPack() const;
3122
3123 /// Return the mask of a predicated VPReplicateRecipe.
3124 VPValue *getMask() {
3125 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3126 return getOperand(N: getNumOperands() - 1);
3127 }
3128
3129 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3130
3131protected:
3132#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3133 /// Print the recipe.
3134 void printRecipe(raw_ostream &O, const Twine &Indent,
3135 VPSlotTracker &SlotTracker) const override;
3136#endif
3137};
3138
3139/// A recipe for generating conditional branches on the bits of a mask.
3140class LLVM_ABI_FOR_TEST VPBranchOnMaskRecipe : public VPRecipeBase {
3141public:
3142 VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
3143 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3144
3145 VPBranchOnMaskRecipe *clone() override {
3146 return new VPBranchOnMaskRecipe(getOperand(N: 0), getDebugLoc());
3147 }
3148
3149 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3150
3151 /// Generate the extraction of the appropriate bit from the block mask and the
3152 /// conditional branch.
3153 void execute(VPTransformState &State) override;
3154
3155 /// Return the cost of this VPBranchOnMaskRecipe.
3156 InstructionCost computeCost(ElementCount VF,
3157 VPCostContext &Ctx) const override;
3158
3159#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3160 /// Print the recipe.
3161 void printRecipe(raw_ostream &O, const Twine &Indent,
3162 VPSlotTracker &SlotTracker) const override {
3163 O << Indent << "BRANCH-ON-MASK ";
3164 printOperands(O, SlotTracker);
3165 }
3166#endif
3167
3168 /// Returns true if the recipe uses scalars of operand \p Op.
3169 bool usesScalars(const VPValue *Op) const override {
3170 assert(is_contained(operands(), Op) &&
3171 "Op must be an operand of the recipe");
3172 return true;
3173 }
3174};
3175
3176/// A recipe to combine multiple recipes into a single 'expression' recipe,
3177/// which should be considered a single entity for cost-modeling and transforms.
3178/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3179/// expression recipes, before execute. The individual expression recipes are
3180/// completely disconnected from the def-use graph of other recipes not part of
3181/// the expression. Def-use edges between pairs of expression recipes remain
3182/// intact, whereas every edge between an expression recipe and a recipe outside
3183/// the expression is elevated to connect the non-expression recipe with the
3184/// VPExpressionRecipe itself.
3185class VPExpressionRecipe : public VPSingleDefRecipe {
3186 /// Recipes included in this VPExpressionRecipe. This could contain
3187 /// duplicates.
3188 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3189
3190 /// Temporary VPValues used for external operands of the expression, i.e.
3191 /// operands not defined by recipes in the expression.
3192 SmallVector<VPValue *> LiveInPlaceholders;
3193
3194 enum class ExpressionTypes {
3195 /// Represents an inloop extended reduction operation, performing a
3196 /// reduction on an extended vector operand into a scalar value, and adding
3197 /// the result to a chain.
3198 ExtendedReduction,
3199 /// Represent an inloop multiply-accumulate reduction, multiplying the
3200 /// extended vector operands, performing a reduction.add on the result, and
3201 /// adding the scalar result to a chain.
3202 ExtMulAccReduction,
3203 /// Represent an inloop multiply-accumulate reduction, multiplying the
3204 /// vector operands, performing a reduction.add on the result, and adding
3205 /// the scalar result to a chain.
3206 MulAccReduction,
3207 /// Represent an inloop multiply-accumulate reduction, multiplying the
3208 /// extended vector operands, negating the multiplication, performing a
3209 /// reduction.add on the result, and adding the scalar result to a chain.
3210 ExtNegatedMulAccReduction,
3211 };
3212
3213 /// Type of the expression.
3214 ExpressionTypes ExpressionType;
3215
3216 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3217 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3218 /// in the expression) are replaced by temporary VPValues and the original
3219 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3220 /// as needed (excluding last) to ensure they are only used by other recipes
3221 /// in the expression.
3222 VPExpressionRecipe(ExpressionTypes ExpressionType,
3223 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3224
3225public:
3226 VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
3227 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3228 VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
3229 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3230 VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
3231 VPWidenRecipe *Mul, VPReductionRecipe *Red)
3232 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3233 {Ext0, Ext1, Mul, Red}) {}
3234 VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
3235 VPWidenRecipe *Mul, VPWidenRecipe *Sub,
3236 VPReductionRecipe *Red)
3237 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3238 {Ext0, Ext1, Mul, Sub, Red}) {
3239 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3240 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3241 "Expected an add reduction");
3242 assert(getNumOperands() >= 3 && "Expected at least three operands");
3243 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(Val: getOperand(N: 2));
3244 assert(SubConst && SubConst->isZero() &&
3245 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3246 }
3247
3248 ~VPExpressionRecipe() override {
3249 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3250 for (auto *R : reverse(C&: ExpressionRecipes)) {
3251 if (ExpressionRecipesSeen.insert(Ptr: R).second)
3252 delete R;
3253 }
3254 for (VPValue *T : LiveInPlaceholders)
3255 delete T;
3256 }
3257
3258 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3259
3260 VPExpressionRecipe *clone() override {
3261 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3262 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3263 for (auto *R : ExpressionRecipes)
3264 NewExpressiondRecipes.push_back(Elt: R->clone());
3265 for (auto *New : NewExpressiondRecipes) {
3266 for (const auto &[Idx, Old] : enumerate(First&: ExpressionRecipes))
3267 New->replaceUsesOfWith(From: Old, To: NewExpressiondRecipes[Idx]);
3268 // Update placeholder operands in the cloned recipe to use the external
3269 // operands, to be internalized when the cloned expression is constructed.
3270 for (const auto &[Placeholder, OutsideOp] :
3271 zip(t&: LiveInPlaceholders, u: operands()))
3272 New->replaceUsesOfWith(From: Placeholder, To: OutsideOp);
3273 }
3274 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3275 }
3276
3277 /// Return the VPValue to use to infer the result type of the recipe.
3278 VPValue *getOperandOfResultType() const {
3279 unsigned OpIdx =
3280 cast<VPReductionRecipe>(Val: ExpressionRecipes.back())->isConditional() ? 2
3281 : 1;
3282 return getOperand(N: getNumOperands() - OpIdx);
3283 }
3284
3285 /// Insert the recipes of the expression back into the VPlan, directly before
3286 /// the current recipe. Leaves the expression recipe empty, which must be
3287 /// removed before codegen.
3288 void decompose();
3289
3290 unsigned getVFScaleFactor() const {
3291 auto *PR = dyn_cast<VPReductionRecipe>(Val: ExpressionRecipes.back());
3292 return PR ? PR->getVFScaleFactor() : 1;
3293 }
3294
3295 /// Method for generating code, must not be called as this recipe is abstract.
3296 void execute(VPTransformState &State) override {
3297 llvm_unreachable("recipe must be removed before execute");
3298 }
3299
3300 InstructionCost computeCost(ElementCount VF,
3301 VPCostContext &Ctx) const override;
3302
3303 /// Returns true if this expression contains recipes that may read from or
3304 /// write to memory.
3305 bool mayReadOrWriteMemory() const;
3306
3307 /// Returns true if this expression contains recipes that may have side
3308 /// effects.
3309 bool mayHaveSideEffects() const;
3310
3311 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3312 bool isSingleScalar() const;
3313
3314protected:
3315#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3316 /// Print the recipe.
3317 void printRecipe(raw_ostream &O, const Twine &Indent,
3318 VPSlotTracker &SlotTracker) const override;
3319#endif
3320};
3321
3322/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3323/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3324/// order to merge values that are set under such a branch and feed their uses.
3325/// The phi nodes can be scalar or vector depending on the users of the value.
3326/// This recipe works in concert with VPBranchOnMaskRecipe.
3327class LLVM_ABI_FOR_TEST VPPredInstPHIRecipe : public VPSingleDefRecipe {
3328public:
3329 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3330 /// nodes after merging back from a Branch-on-Mask.
3331 VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
3332 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV, DL) {}
3333 ~VPPredInstPHIRecipe() override = default;
3334
3335 VPPredInstPHIRecipe *clone() override {
3336 return new VPPredInstPHIRecipe(getOperand(N: 0), getDebugLoc());
3337 }
3338
3339 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3340
3341 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3342 /// retain SSA form.
3343 void execute(VPTransformState &State) override;
3344
3345 /// Return the cost of this VPPredInstPHIRecipe.
3346 InstructionCost computeCost(ElementCount VF,
3347 VPCostContext &Ctx) const override {
3348 // TODO: Compute accurate cost after retiring the legacy cost model.
3349 return 0;
3350 }
3351
3352 /// Returns true if the recipe uses scalars of operand \p Op.
3353 bool usesScalars(const VPValue *Op) const override {
3354 assert(is_contained(operands(), Op) &&
3355 "Op must be an operand of the recipe");
3356 return true;
3357 }
3358
3359protected:
3360#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3361 /// Print the recipe.
3362 void printRecipe(raw_ostream &O, const Twine &Indent,
3363 VPSlotTracker &SlotTracker) const override;
3364#endif
3365};
3366
3367/// A common base class for widening memory operations. An optional mask can be
3368/// provided as the last operand.
3369class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
3370 public VPIRMetadata {
3371protected:
3372 Instruction &Ingredient;
3373
3374 /// Alignment information for this memory access.
3375 Align Alignment;
3376
3377 /// Whether the accessed addresses are consecutive.
3378 bool Consecutive;
3379
3380 /// Whether the consecutive accessed addresses are in reverse order.
3381 bool Reverse;
3382
3383 /// Whether the memory access is masked.
3384 bool IsMasked = false;
3385
3386 void setMask(VPValue *Mask) {
3387 assert(!IsMasked && "cannot re-set mask");
3388 if (!Mask)
3389 return;
3390 addOperand(Operand: Mask);
3391 IsMasked = true;
3392 }
3393
3394 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3395 std::initializer_list<VPValue *> Operands,
3396 bool Consecutive, bool Reverse,
3397 const VPIRMetadata &Metadata, DebugLoc DL)
3398 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3399 Alignment(getLoadStoreAlignment(I: &I)), Consecutive(Consecutive),
3400 Reverse(Reverse) {
3401 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3402 assert((isa<VPVectorEndPointerRecipe>(getAddr()) || !Reverse) &&
3403 "Reversed acccess without VPVectorEndPointerRecipe address?");
3404 }
3405
3406public:
3407 VPWidenMemoryRecipe *clone() override {
3408 llvm_unreachable("cloning not supported");
3409 }
3410
3411 static inline bool classof(const VPRecipeBase *R) {
3412 return R->getVPRecipeID() == VPRecipeBase::VPWidenLoadSC ||
3413 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreSC ||
3414 R->getVPRecipeID() == VPRecipeBase::VPWidenLoadEVLSC ||
3415 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreEVLSC;
3416 }
3417
3418 static inline bool classof(const VPUser *U) {
3419 auto *R = dyn_cast<VPRecipeBase>(Val: U);
3420 return R && classof(R);
3421 }
3422
3423 /// Return whether the loaded-from / stored-to addresses are consecutive.
3424 bool isConsecutive() const { return Consecutive; }
3425
3426 /// Return whether the consecutive loaded/stored addresses are in reverse
3427 /// order.
3428 bool isReverse() const { return Reverse; }
3429
3430 /// Return the address accessed by this recipe.
3431 VPValue *getAddr() const { return getOperand(N: 0); }
3432
3433 /// Returns true if the recipe is masked.
3434 bool isMasked() const { return IsMasked; }
3435
3436 /// Return the mask used by this recipe. Note that a full mask is represented
3437 /// by a nullptr.
3438 VPValue *getMask() const {
3439 // Mask is optional and therefore the last operand.
3440 return isMasked() ? getOperand(N: getNumOperands() - 1) : nullptr;
3441 }
3442
3443 /// Returns the alignment of the memory access.
3444 Align getAlign() const { return Alignment; }
3445
3446 /// Generate the wide load/store.
3447 void execute(VPTransformState &State) override {
3448 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3449 }
3450
3451 /// Return the cost of this VPWidenMemoryRecipe.
3452 InstructionCost computeCost(ElementCount VF,
3453 VPCostContext &Ctx) const override;
3454
3455 Instruction &getIngredient() const { return Ingredient; }
3456};
3457
3458/// A recipe for widening load operations, using the address to load from and an
3459/// optional mask.
3460struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
3461 public VPRecipeValue {
3462 VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
3463 bool Consecutive, bool Reverse,
3464 const VPIRMetadata &Metadata, DebugLoc DL)
3465 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadSC, Load, {Addr},
3466 Consecutive, Reverse, Metadata, DL),
3467 VPRecipeValue(this, &Load) {
3468 setMask(Mask);
3469 }
3470
3471 VPWidenLoadRecipe *clone() override {
3472 return new VPWidenLoadRecipe(cast<LoadInst>(Val&: Ingredient), getAddr(),
3473 getMask(), Consecutive, Reverse, *this,
3474 getDebugLoc());
3475 }
3476
3477 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3478
3479 /// Generate a wide load or gather.
3480 void execute(VPTransformState &State) override;
3481
3482 /// Returns true if the recipe only uses the first lane of operand \p Op.
3483 bool usesFirstLaneOnly(const VPValue *Op) const override {
3484 assert(is_contained(operands(), Op) &&
3485 "Op must be an operand of the recipe");
3486 // Widened, consecutive loads operations only demand the first lane of
3487 // their address.
3488 return Op == getAddr() && isConsecutive();
3489 }
3490
3491protected:
3492#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3493 /// Print the recipe.
3494 void printRecipe(raw_ostream &O, const Twine &Indent,
3495 VPSlotTracker &SlotTracker) const override;
3496#endif
3497};
3498
3499/// A recipe for widening load operations with vector-predication intrinsics,
3500/// using the address to load from, the explicit vector length and an optional
3501/// mask.
3502struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe,
3503 public VPRecipeValue {
3504 VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL,
3505 VPValue *Mask)
3506 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadEVLSC, L.getIngredient(),
3507 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3508 L.getDebugLoc()),
3509 VPRecipeValue(this, &getIngredient()) {
3510 setMask(Mask);
3511 }
3512
3513 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3514
3515 /// Return the EVL operand.
3516 VPValue *getEVL() const { return getOperand(N: 1); }
3517
3518 /// Generate the wide load or gather.
3519 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3520
3521 /// Return the cost of this VPWidenLoadEVLRecipe.
3522 LLVM_ABI_FOR_TEST InstructionCost
3523 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3524
3525 /// Returns true if the recipe only uses the first lane of operand \p Op.
3526 bool usesFirstLaneOnly(const VPValue *Op) const override {
3527 assert(is_contained(operands(), Op) &&
3528 "Op must be an operand of the recipe");
3529 // Widened loads only demand the first lane of EVL and consecutive loads
3530 // only demand the first lane of their address.
3531 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3532 }
3533
3534protected:
3535#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3536 /// Print the recipe.
3537 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3538 VPSlotTracker &SlotTracker) const override;
3539#endif
3540};
3541
3542/// A recipe for widening store operations, using the stored value, the address
3543/// to store to and an optional mask.
3544struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
3545 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3546 VPValue *Mask, bool Consecutive, bool Reverse,
3547 const VPIRMetadata &Metadata, DebugLoc DL)
3548 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreSC, Store,
3549 {Addr, StoredVal}, Consecutive, Reverse, Metadata,
3550 DL) {
3551 setMask(Mask);
3552 }
3553
3554 VPWidenStoreRecipe *clone() override {
3555 return new VPWidenStoreRecipe(cast<StoreInst>(Val&: Ingredient), getAddr(),
3556 getStoredValue(), getMask(), Consecutive,
3557 Reverse, *this, getDebugLoc());
3558 }
3559
3560 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3561
3562 /// Return the value stored by this recipe.
3563 VPValue *getStoredValue() const { return getOperand(N: 1); }
3564
3565 /// Generate a wide store or scatter.
3566 void execute(VPTransformState &State) override;
3567
3568 /// Returns true if the recipe only uses the first lane of operand \p Op.
3569 bool usesFirstLaneOnly(const VPValue *Op) const override {
3570 assert(is_contained(operands(), Op) &&
3571 "Op must be an operand of the recipe");
3572 // Widened, consecutive stores only demand the first lane of their address,
3573 // unless the same operand is also stored.
3574 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3575 }
3576
3577protected:
3578#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3579 /// Print the recipe.
3580 void printRecipe(raw_ostream &O, const Twine &Indent,
3581 VPSlotTracker &SlotTracker) const override;
3582#endif
3583};
3584
3585/// A recipe for widening store operations with vector-predication intrinsics,
3586/// using the value to store, the address to store to, the explicit vector
3587/// length and an optional mask.
3588struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
3589 VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr,
3590 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3591 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreEVLSC, S.getIngredient(),
3592 {Addr, StoredVal, &EVL}, S.isConsecutive(),
3593 S.isReverse(), S, S.getDebugLoc()) {
3594 setMask(Mask);
3595 }
3596
3597 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3598
3599 /// Return the address accessed by this recipe.
3600 VPValue *getStoredValue() const { return getOperand(N: 1); }
3601
3602 /// Return the EVL operand.
3603 VPValue *getEVL() const { return getOperand(N: 2); }
3604
3605 /// Generate the wide store or scatter.
3606 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3607
3608 /// Return the cost of this VPWidenStoreEVLRecipe.
3609 LLVM_ABI_FOR_TEST InstructionCost
3610 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3611
3612 /// Returns true if the recipe only uses the first lane of operand \p Op.
3613 bool usesFirstLaneOnly(const VPValue *Op) const override {
3614 assert(is_contained(operands(), Op) &&
3615 "Op must be an operand of the recipe");
3616 if (Op == getEVL()) {
3617 assert(getStoredValue() != Op && "unexpected store of EVL");
3618 return true;
3619 }
3620 // Widened, consecutive memory operations only demand the first lane of
3621 // their address, unless the same operand is also stored. That latter can
3622 // happen with opaque pointers.
3623 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3624 }
3625
3626protected:
3627#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3628 /// Print the recipe.
3629 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3630 VPSlotTracker &SlotTracker) const override;
3631#endif
3632};
3633
3634/// Recipe to expand a SCEV expression.
3635class VPExpandSCEVRecipe : public VPSingleDefRecipe {
3636 const SCEV *Expr;
3637
3638public:
3639 VPExpandSCEVRecipe(const SCEV *Expr)
3640 : VPSingleDefRecipe(VPRecipeBase::VPExpandSCEVSC, {}), Expr(Expr) {}
3641
3642 ~VPExpandSCEVRecipe() override = default;
3643
3644 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3645
3646 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
3647
3648 void execute(VPTransformState &State) override {
3649 llvm_unreachable("SCEV expressions must be expanded before final execute");
3650 }
3651
3652 /// Return the cost of this VPExpandSCEVRecipe.
3653 InstructionCost computeCost(ElementCount VF,
3654 VPCostContext &Ctx) const override {
3655 // TODO: Compute accurate cost after retiring the legacy cost model.
3656 return 0;
3657 }
3658
3659 const SCEV *getSCEV() const { return Expr; }
3660
3661protected:
3662#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3663 /// Print the recipe.
3664 void printRecipe(raw_ostream &O, const Twine &Indent,
3665 VPSlotTracker &SlotTracker) const override;
3666#endif
3667};
3668
3669/// Canonical scalar induction phi of the vector loop. Starting at the specified
3670/// start value (either 0 or the resume value when vectorizing the epilogue
3671/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3672/// canonical induction variable.
3673class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
3674public:
3675 VPCanonicalIVPHIRecipe(VPIRValue *StartV, DebugLoc DL)
3676 : VPHeaderPHIRecipe(VPRecipeBase::VPCanonicalIVPHISC, nullptr, StartV,
3677 DL) {}
3678
3679 ~VPCanonicalIVPHIRecipe() override = default;
3680
3681 VPCanonicalIVPHIRecipe *clone() override {
3682 auto *R = new VPCanonicalIVPHIRecipe(getStartValue(), getDebugLoc());
3683 R->addOperand(Operand: getBackedgeValue());
3684 return R;
3685 }
3686
3687 VP_CLASSOF_IMPL(VPRecipeBase::VPCanonicalIVPHISC)
3688
3689 void execute(VPTransformState &State) override {
3690 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3691 "scalar phi recipe");
3692 }
3693
3694 /// Returns the start value of the canonical induction.
3695 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
3696
3697 /// Returns the scalar type of the induction.
3698 Type *getScalarType() const { return getStartValue()->getType(); }
3699
3700 /// Returns true if the recipe only uses the first lane of operand \p Op.
3701 bool usesFirstLaneOnly(const VPValue *Op) const override {
3702 assert(is_contained(operands(), Op) &&
3703 "Op must be an operand of the recipe");
3704 return true;
3705 }
3706
3707 /// Returns true if the recipe only uses the first part of operand \p Op.
3708 bool usesFirstPartOnly(const VPValue *Op) const override {
3709 assert(is_contained(operands(), Op) &&
3710 "Op must be an operand of the recipe");
3711 return true;
3712 }
3713
3714 /// Return the cost of this VPCanonicalIVPHIRecipe.
3715 InstructionCost computeCost(ElementCount VF,
3716 VPCostContext &Ctx) const override {
3717 // For now, match the behavior of the legacy cost model.
3718 return 0;
3719 }
3720
3721protected:
3722#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3723 /// Print the recipe.
3724 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3725 VPSlotTracker &SlotTracker) const override;
3726#endif
3727};
3728
3729/// A recipe for generating the active lane mask for the vector loop that is
3730/// used to predicate the vector operations.
3731class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
3732public:
3733 VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
3734 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
3735 StartMask, DL) {}
3736
3737 ~VPActiveLaneMaskPHIRecipe() override = default;
3738
3739 VPActiveLaneMaskPHIRecipe *clone() override {
3740 auto *R = new VPActiveLaneMaskPHIRecipe(getOperand(N: 0), getDebugLoc());
3741 if (getNumOperands() == 2)
3742 R->addOperand(Operand: getOperand(N: 1));
3743 return R;
3744 }
3745
3746 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
3747
3748 /// Generate the active lane mask phi of the vector loop.
3749 void execute(VPTransformState &State) override;
3750
3751protected:
3752#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3753 /// Print the recipe.
3754 void printRecipe(raw_ostream &O, const Twine &Indent,
3755 VPSlotTracker &SlotTracker) const override;
3756#endif
3757};
3758
3759/// A recipe for generating the phi node for the current index of elements,
3760/// adjusted in accordance with EVL value. It starts at the start value of the
3761/// canonical induction and gets incremented by EVL in each iteration of the
3762/// vector loop.
3763class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
3764public:
3765 VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
3766 : VPHeaderPHIRecipe(VPRecipeBase::VPEVLBasedIVPHISC, nullptr, StartIV,
3767 DL) {}
3768
3769 ~VPEVLBasedIVPHIRecipe() override = default;
3770
3771 VPEVLBasedIVPHIRecipe *clone() override {
3772 llvm_unreachable("cloning not implemented yet");
3773 }
3774
3775 VP_CLASSOF_IMPL(VPRecipeBase::VPEVLBasedIVPHISC)
3776
3777 void execute(VPTransformState &State) override {
3778 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3779 "scalar phi recipe");
3780 }
3781
3782 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3783 InstructionCost computeCost(ElementCount VF,
3784 VPCostContext &Ctx) const override {
3785 // For now, match the behavior of the legacy cost model.
3786 return 0;
3787 }
3788
3789 /// Returns true if the recipe only uses the first lane of operand \p Op.
3790 bool usesFirstLaneOnly(const VPValue *Op) const override {
3791 assert(is_contained(operands(), Op) &&
3792 "Op must be an operand of the recipe");
3793 return true;
3794 }
3795
3796protected:
3797#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3798 /// Print the recipe.
3799 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3800 VPSlotTracker &SlotTracker) const override;
3801#endif
3802};
3803
3804/// A Recipe for widening the canonical induction variable of the vector loop.
3805class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe,
3806 public VPUnrollPartAccessor<1> {
3807public:
3808 VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
3809 : VPSingleDefRecipe(VPRecipeBase::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3810
3811 ~VPWidenCanonicalIVRecipe() override = default;
3812
3813 VPWidenCanonicalIVRecipe *clone() override {
3814 return new VPWidenCanonicalIVRecipe(
3815 cast<VPCanonicalIVPHIRecipe>(Val: getOperand(N: 0)));
3816 }
3817
3818 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
3819
3820 /// Generate a canonical vector induction variable of the vector loop, with
3821 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3822 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3823 void execute(VPTransformState &State) override;
3824
3825 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3826 InstructionCost computeCost(ElementCount VF,
3827 VPCostContext &Ctx) const override {
3828 // TODO: Compute accurate cost after retiring the legacy cost model.
3829 return 0;
3830 }
3831
3832protected:
3833#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3834 /// Print the recipe.
3835 void printRecipe(raw_ostream &O, const Twine &Indent,
3836 VPSlotTracker &SlotTracker) const override;
3837#endif
3838};
3839
3840/// A recipe for converting the input value \p IV value to the corresponding
3841/// value of an IV with different start and step values, using Start + IV *
3842/// Step.
3843class VPDerivedIVRecipe : public VPSingleDefRecipe {
3844 /// Kind of the induction.
3845 const InductionDescriptor::InductionKind Kind;
3846 /// If not nullptr, the floating point induction binary operator. Must be set
3847 /// for floating point inductions.
3848 const FPMathOperator *FPBinOp;
3849
3850 /// Name to use for the generated IR instruction for the derived IV.
3851 std::string Name;
3852
3853public:
3854 VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start,
3855 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3856 const Twine &Name = "")
3857 : VPDerivedIVRecipe(
3858 IndDesc.getKind(),
3859 dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp()),
3860 Start, CanonicalIV, Step, Name) {}
3861
3862 VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
3863 const FPMathOperator *FPBinOp, VPIRValue *Start,
3864 VPValue *IV, VPValue *Step, const Twine &Name = "")
3865 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step}),
3866 Kind(Kind), FPBinOp(FPBinOp), Name(Name.str()) {}
3867
3868 ~VPDerivedIVRecipe() override = default;
3869
3870 VPDerivedIVRecipe *clone() override {
3871 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(N: 1),
3872 getStepValue());
3873 }
3874
3875 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
3876
3877 /// Generate the transformed value of the induction at offset StartValue (1.
3878 /// operand) + IV (2. operand) * StepValue (3, operand).
3879 void execute(VPTransformState &State) override;
3880
3881 /// Return the cost of this VPDerivedIVRecipe.
3882 InstructionCost computeCost(ElementCount VF,
3883 VPCostContext &Ctx) const override {
3884 // TODO: Compute accurate cost after retiring the legacy cost model.
3885 return 0;
3886 }
3887
3888 Type *getScalarType() const { return getStartValue()->getType(); }
3889
3890 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
3891 VPValue *getStepValue() const { return getOperand(N: 2); }
3892
3893 /// Returns true if the recipe only uses the first lane of operand \p Op.
3894 bool usesFirstLaneOnly(const VPValue *Op) const override {
3895 assert(is_contained(operands(), Op) &&
3896 "Op must be an operand of the recipe");
3897 return true;
3898 }
3899
3900protected:
3901#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3902 /// Print the recipe.
3903 void printRecipe(raw_ostream &O, const Twine &Indent,
3904 VPSlotTracker &SlotTracker) const override;
3905#endif
3906};
3907
3908/// A recipe for handling phi nodes of integer and floating-point inductions,
3909/// producing their scalar values. Before unrolling by UF the recipe represents
3910/// the VF*UF scalar values to be produced, or UF scalar values if only first
3911/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
3912/// operand StartIndex to all unroll parts except part 0, as the recipe
3913/// represents the VF scalar values (this number of values is taken from
3914/// State.VF rather than from the VF operand) starting at IV + StartIndex.
3915class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
3916 Instruction::BinaryOps InductionOpcode;
3917
3918public:
3919 VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF,
3920 Instruction::BinaryOps Opcode, FastMathFlags FMFs,
3921 DebugLoc DL)
3922 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
3923 FMFs, DL),
3924 InductionOpcode(Opcode) {}
3925
3926 VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
3927 VPValue *Step, VPValue *VF,
3928 DebugLoc DL = DebugLoc::getUnknown())
3929 : VPScalarIVStepsRecipe(
3930 IV, Step, VF, IndDesc.getInductionOpcode(),
3931 dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp())
3932 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3933 : FastMathFlags(),
3934 DL) {}
3935
3936 ~VPScalarIVStepsRecipe() override = default;
3937
3938 VPScalarIVStepsRecipe *clone() override {
3939 return new VPScalarIVStepsRecipe(
3940 getOperand(N: 0), getOperand(N: 1), getOperand(N: 2), InductionOpcode,
3941 hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(),
3942 getDebugLoc());
3943 }
3944
3945 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
3946
3947 /// Generate the scalarized versions of the phi node as needed by their users.
3948 void execute(VPTransformState &State) override;
3949
3950 /// Return the cost of this VPScalarIVStepsRecipe.
3951 InstructionCost computeCost(ElementCount VF,
3952 VPCostContext &Ctx) const override {
3953 // TODO: Compute accurate cost after retiring the legacy cost model.
3954 return 0;
3955 }
3956
3957 VPValue *getStepValue() const { return getOperand(N: 1); }
3958
3959 /// Return the number of scalars to produce per unroll part, used to compute
3960 /// StartIndex during unrolling.
3961 VPValue *getVFValue() const { return getOperand(N: 2); }
3962
3963 /// Return the StartIndex, or null if known to be zero, valid only after
3964 /// unrolling.
3965 VPValue *getStartIndex() const {
3966 return getNumOperands() == 4 ? getOperand(N: 3) : nullptr;
3967 }
3968
3969 /// Returns true if the recipe only uses the first lane of operand \p Op.
3970 bool usesFirstLaneOnly(const VPValue *Op) const override {
3971 assert(is_contained(operands(), Op) &&
3972 "Op must be an operand of the recipe");
3973 return true;
3974 }
3975
3976protected:
3977#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3978 /// Print the recipe.
3979 void printRecipe(raw_ostream &O, const Twine &Indent,
3980 VPSlotTracker &SlotTracker) const override;
3981#endif
3982};
3983
3984/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3985/// types implementing VPPhiAccessors. Used by isa<> & co.
3986template <> struct CastIsPossible<VPPhiAccessors, const VPRecipeBase *> {
3987 static inline bool isPossible(const VPRecipeBase *f) {
3988 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3989 return isa<VPIRPhi, VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPhi>(Val: f);
3990 }
3991};
3992/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3993/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3994template <typename SrcTy>
3995struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3996
3997 using Self = CastInfo<VPPhiAccessors, SrcTy>;
3998
3999 /// doCast is used by cast<>.
4000 static inline VPPhiAccessors *doCast(SrcTy R) {
4001 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
4002 switch (R->getVPRecipeID()) {
4003 case VPRecipeBase::VPInstructionSC:
4004 return cast<VPPhi>(R);
4005 case VPRecipeBase::VPIRInstructionSC:
4006 return cast<VPIRPhi>(R);
4007 case VPRecipeBase::VPWidenPHISC:
4008 return cast<VPWidenPHIRecipe>(R);
4009 default:
4010 return cast<VPHeaderPHIRecipe>(R);
4011 }
4012 }());
4013 }
4014
4015 /// doCastIfPossible is used by dyn_cast<>.
4016 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
4017 if (!Self::isPossible(f))
4018 return nullptr;
4019 return doCast(R: f);
4020 }
4021};
4022template <>
4023struct CastInfo<VPPhiAccessors, VPRecipeBase *>
4024 : CastInfoVPPhiAccessors<VPRecipeBase *> {};
4025template <>
4026struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
4027 : CastInfoVPPhiAccessors<const VPRecipeBase *> {};
4028
4029/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
4030/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
4031namespace detail {
4032template <typename DstTy, typename RecipeBasePtrTy>
4033static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
4034 switch (R->getVPRecipeID()) {
4035 case VPRecipeBase::VPInstructionSC:
4036 return cast<VPInstruction>(R);
4037 case VPRecipeBase::VPWidenSC:
4038 return cast<VPWidenRecipe>(R);
4039 case VPRecipeBase::VPWidenCastSC:
4040 return cast<VPWidenCastRecipe>(R);
4041 case VPRecipeBase::VPWidenIntrinsicSC:
4042 return cast<VPWidenIntrinsicRecipe>(R);
4043 case VPRecipeBase::VPWidenCallSC:
4044 return cast<VPWidenCallRecipe>(R);
4045 case VPRecipeBase::VPReplicateSC:
4046 return cast<VPReplicateRecipe>(R);
4047 case VPRecipeBase::VPInterleaveSC:
4048 case VPRecipeBase::VPInterleaveEVLSC:
4049 return cast<VPInterleaveBase>(R);
4050 case VPRecipeBase::VPWidenLoadSC:
4051 case VPRecipeBase::VPWidenLoadEVLSC:
4052 case VPRecipeBase::VPWidenStoreSC:
4053 case VPRecipeBase::VPWidenStoreEVLSC:
4054 return cast<VPWidenMemoryRecipe>(R);
4055 default:
4056 llvm_unreachable("invalid recipe for VPIRMetadata cast");
4057 }
4058}
4059} // namespace detail
4060
4061/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
4062/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
4063template <typename DstTy, typename SrcTy>
4064struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
4065 static inline bool isPossible(SrcTy R) {
4066 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
4067 // also handled in castToVPIRMetadata.
4068 return isa<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
4069 VPWidenIntrinsicRecipe, VPWidenCallRecipe, VPReplicateRecipe,
4070 VPInterleaveRecipe, VPInterleaveEVLRecipe, VPWidenLoadRecipe,
4071 VPWidenLoadEVLRecipe, VPWidenStoreRecipe, VPWidenStoreEVLRecipe>(
4072 R);
4073 }
4074
4075 using RetTy = DstTy *;
4076
4077 /// doCast is used by cast<>.
4078 static inline RetTy doCast(SrcTy R) {
4079 return detail::castToVPIRMetadata<RetTy, SrcTy>(R);
4080 }
4081
4082 /// doCastIfPossible is used by dyn_cast<>.
4083 static inline RetTy doCastIfPossible(SrcTy R) {
4084 if (!isPossible(R))
4085 return nullptr;
4086 return doCast(R);
4087 }
4088};
4089template <>
4090struct CastInfo<VPIRMetadata, VPRecipeBase *>
4091 : CastInfoVPIRMetadata<VPIRMetadata, VPRecipeBase *> {};
4092template <>
4093struct CastInfo<VPIRMetadata, const VPRecipeBase *>
4094 : CastInfoVPIRMetadata<const VPIRMetadata, const VPRecipeBase *> {};
4095
4096/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4097/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4098/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4099class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4100 friend class VPlan;
4101
4102 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4103 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4104 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4105 if (Recipe)
4106 appendRecipe(Recipe);
4107 }
4108
4109public:
4110 using RecipeListTy = iplist<VPRecipeBase>;
4111
4112protected:
4113 /// The VPRecipes held in the order of output instructions to generate.
4114 RecipeListTy Recipes;
4115
4116 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4117 : VPBlockBase(BlockSC, Name.str()) {}
4118
4119public:
4120 ~VPBasicBlock() override {
4121 while (!Recipes.empty())
4122 Recipes.pop_back();
4123 }
4124
4125 /// Instruction iterators...
4126 using iterator = RecipeListTy::iterator;
4127 using const_iterator = RecipeListTy::const_iterator;
4128 using reverse_iterator = RecipeListTy::reverse_iterator;
4129 using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
4130
4131 //===--------------------------------------------------------------------===//
4132 /// Recipe iterator methods
4133 ///
4134 inline iterator begin() { return Recipes.begin(); }
4135 inline const_iterator begin() const { return Recipes.begin(); }
4136 inline iterator end() { return Recipes.end(); }
4137 inline const_iterator end() const { return Recipes.end(); }
4138
4139 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4140 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4141 inline reverse_iterator rend() { return Recipes.rend(); }
4142 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4143
4144 inline size_t size() const { return Recipes.size(); }
4145 inline bool empty() const { return Recipes.empty(); }
4146 inline const VPRecipeBase &front() const { return Recipes.front(); }
4147 inline VPRecipeBase &front() { return Recipes.front(); }
4148 inline const VPRecipeBase &back() const { return Recipes.back(); }
4149 inline VPRecipeBase &back() { return Recipes.back(); }
4150
4151 /// Returns a reference to the list of recipes.
4152 RecipeListTy &getRecipeList() { return Recipes; }
4153
4154 /// Returns a pointer to a member of the recipe list.
4155 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4156 return &VPBasicBlock::Recipes;
4157 }
4158
4159 /// Method to support type inquiry through isa, cast, and dyn_cast.
4160 static inline bool classof(const VPBlockBase *V) {
4161 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4162 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4163 }
4164
4165 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4166 assert(Recipe && "No recipe to append.");
4167 assert(!Recipe->Parent && "Recipe already in VPlan");
4168 Recipe->Parent = this;
4169 Recipes.insert(where: InsertPt, New: Recipe);
4170 }
4171
4172 /// Augment the existing recipes of a VPBasicBlock with an additional
4173 /// \p Recipe as the last recipe.
4174 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, InsertPt: end()); }
4175
4176 /// The method which generates the output IR instructions that correspond to
4177 /// this VPBasicBlock, thereby "executing" the VPlan.
4178 void execute(VPTransformState *State) override;
4179
4180 /// Return the cost of this VPBasicBlock.
4181 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4182
4183 /// Return the position of the first non-phi node recipe in the block.
4184 iterator getFirstNonPhi();
4185
4186 /// Returns an iterator range over the PHI-like recipes in the block.
4187 iterator_range<iterator> phis() {
4188 return make_range(x: begin(), y: getFirstNonPhi());
4189 }
4190
4191 /// Split current block at \p SplitAt by inserting a new block between the
4192 /// current block and its successors and moving all recipes starting at
4193 /// SplitAt to the new block. Returns the new block.
4194 VPBasicBlock *splitAt(iterator SplitAt);
4195
4196 VPRegionBlock *getEnclosingLoopRegion();
4197 const VPRegionBlock *getEnclosingLoopRegion() const;
4198
4199#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4200 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4201 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4202 ///
4203 /// Note that the numbering is applied to the whole VPlan, so printing
4204 /// individual blocks is consistent with the whole VPlan printing.
4205 void print(raw_ostream &O, const Twine &Indent,
4206 VPSlotTracker &SlotTracker) const override;
4207 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4208#endif
4209
4210 /// If the block has multiple successors, return the branch recipe terminating
4211 /// the block. If there are no or only a single successor, return nullptr;
4212 VPRecipeBase *getTerminator();
4213 const VPRecipeBase *getTerminator() const;
4214
4215 /// Returns true if the block is exiting it's parent region.
4216 bool isExiting() const;
4217
4218 /// Clone the current block and it's recipes, without updating the operands of
4219 /// the cloned recipes.
4220 VPBasicBlock *clone() override;
4221
4222 /// Returns the predecessor block at index \p Idx with the predecessors as per
4223 /// the corresponding plain CFG. If the block is an entry block to a region,
4224 /// the first predecessor is the single predecessor of a region, and the
4225 /// second predecessor is the exiting block of the region.
4226 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4227
4228protected:
4229 /// Execute the recipes in the IR basic block \p BB.
4230 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4231
4232 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4233 /// generated for this VPBB.
4234 void connectToPredecessors(VPTransformState &State);
4235
4236private:
4237 /// Create an IR BasicBlock to hold the output instructions generated by this
4238 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4239 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4240};
4241
4242inline const VPBasicBlock *
4243VPPhiAccessors::getIncomingBlock(unsigned Idx) const {
4244 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4245}
4246
4247/// A special type of VPBasicBlock that wraps an existing IR basic block.
4248/// Recipes of the block get added before the first non-phi instruction in the
4249/// wrapped block.
4250/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4251/// preheader block.
4252class VPIRBasicBlock : public VPBasicBlock {
4253 friend class VPlan;
4254
4255 BasicBlock *IRBB;
4256
4257 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4258 VPIRBasicBlock(BasicBlock *IRBB)
4259 : VPBasicBlock(VPIRBasicBlockSC,
4260 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4261 IRBB(IRBB) {}
4262
4263public:
4264 ~VPIRBasicBlock() override = default;
4265
4266 static inline bool classof(const VPBlockBase *V) {
4267 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4268 }
4269
4270 /// The method which generates the output IR instructions that correspond to
4271 /// this VPBasicBlock, thereby "executing" the VPlan.
4272 void execute(VPTransformState *State) override;
4273
4274 VPIRBasicBlock *clone() override;
4275
4276 BasicBlock *getIRBasicBlock() const { return IRBB; }
4277};
4278
4279/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4280/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4281/// A VPRegionBlock may indicate that its contents are to be replicated several
4282/// times. This is designed to support predicated scalarization, in which a
4283/// scalar if-then code structure needs to be generated VF * UF times. Having
4284/// this replication indicator helps to keep a single model for multiple
4285/// candidate VF's. The actual replication takes place only once the desired VF
4286/// and UF have been determined.
4287class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4288 friend class VPlan;
4289
4290 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4291 VPBlockBase *Entry;
4292
4293 /// Hold the Single Exiting block of the SESE region modelled by the
4294 /// VPRegionBlock.
4295 VPBlockBase *Exiting;
4296
4297 /// An indicator whether this region is to generate multiple replicated
4298 /// instances of output IR corresponding to its VPBlockBases.
4299 bool IsReplicator;
4300
4301 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4302 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4303 const std::string &Name = "", bool IsReplicator = false)
4304 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4305 IsReplicator(IsReplicator) {
4306 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4307 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4308 Entry->setParent(this);
4309 Exiting->setParent(this);
4310 }
4311 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4312 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4313 IsReplicator(IsReplicator) {}
4314
4315public:
4316 ~VPRegionBlock() override = default;
4317
4318 /// Method to support type inquiry through isa, cast, and dyn_cast.
4319 static inline bool classof(const VPBlockBase *V) {
4320 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4321 }
4322
4323 const VPBlockBase *getEntry() const { return Entry; }
4324 VPBlockBase *getEntry() { return Entry; }
4325
4326 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4327 /// EntryBlock must have no predecessors.
4328 void setEntry(VPBlockBase *EntryBlock) {
4329 assert(EntryBlock->getPredecessors().empty() &&
4330 "Entry block cannot have predecessors.");
4331 Entry = EntryBlock;
4332 EntryBlock->setParent(this);
4333 }
4334
4335 const VPBlockBase *getExiting() const { return Exiting; }
4336 VPBlockBase *getExiting() { return Exiting; }
4337
4338 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4339 /// ExitingBlock must have no successors.
4340 void setExiting(VPBlockBase *ExitingBlock) {
4341 assert(ExitingBlock->getSuccessors().empty() &&
4342 "Exit block cannot have successors.");
4343 Exiting = ExitingBlock;
4344 ExitingBlock->setParent(this);
4345 }
4346
4347 /// Returns the pre-header VPBasicBlock of the loop region.
4348 VPBasicBlock *getPreheaderVPBB() {
4349 assert(!isReplicator() && "should only get pre-header of loop regions");
4350 return getSinglePredecessor()->getExitingBasicBlock();
4351 }
4352
4353 /// An indicator whether this region is to generate multiple replicated
4354 /// instances of output IR corresponding to its VPBlockBases.
4355 bool isReplicator() const { return IsReplicator; }
4356
4357 /// The method which generates the output IR instructions that correspond to
4358 /// this VPRegionBlock, thereby "executing" the VPlan.
4359 void execute(VPTransformState *State) override;
4360
4361 // Return the cost of this region.
4362 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4363
4364#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4365 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4366 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4367 /// consequtive numbers.
4368 ///
4369 /// Note that the numbering is applied to the whole VPlan, so printing
4370 /// individual regions is consistent with the whole VPlan printing.
4371 void print(raw_ostream &O, const Twine &Indent,
4372 VPSlotTracker &SlotTracker) const override;
4373 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4374#endif
4375
4376 /// Clone all blocks in the single-entry single-exit region of the block and
4377 /// their recipes without updating the operands of the cloned recipes.
4378 VPRegionBlock *clone() override;
4379
4380 /// Remove the current region from its VPlan, connecting its predecessor to
4381 /// its entry, and its exiting block to its successor.
4382 void dissolveToCFGLoop();
4383
4384 /// Returns the canonical induction recipe of the region.
4385 VPCanonicalIVPHIRecipe *getCanonicalIV() {
4386 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4387 if (EntryVPBB->empty()) {
4388 // VPlan native path. TODO: Unify both code paths.
4389 EntryVPBB = cast<VPBasicBlock>(Val: EntryVPBB->getSingleSuccessor());
4390 }
4391 return cast<VPCanonicalIVPHIRecipe>(Val: &*EntryVPBB->begin());
4392 }
4393 const VPCanonicalIVPHIRecipe *getCanonicalIV() const {
4394 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4395 }
4396
4397 /// Return the type of the canonical IV for loop regions.
4398 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4399 const Type *getCanonicalIVType() const {
4400 return getCanonicalIV()->getScalarType();
4401 }
4402};
4403
4404inline VPRegionBlock *VPRecipeBase::getRegion() {
4405 return getParent()->getParent();
4406}
4407
4408inline const VPRegionBlock *VPRecipeBase::getRegion() const {
4409 return getParent()->getParent();
4410}
4411
4412/// VPlan models a candidate for vectorization, encoding various decisions take
4413/// to produce efficient output IR, including which branches, basic-blocks and
4414/// output IR instructions to generate, and their cost. VPlan holds a
4415/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4416/// VPBasicBlock.
4417class VPlan {
4418 friend class VPlanPrinter;
4419 friend class VPSlotTracker;
4420
4421 /// VPBasicBlock corresponding to the original preheader. Used to place
4422 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4423 /// rest of VPlan execution.
4424 /// When this VPlan is used for the epilogue vector loop, the entry will be
4425 /// replaced by a new entry block created during skeleton creation.
4426 VPBasicBlock *Entry;
4427
4428 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4429 VPIRBasicBlock *ScalarHeader;
4430
4431 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4432 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4433 /// e.g. if the scalar epilogue always executes.
4434 SmallVector<VPIRBasicBlock *, 2> ExitBlocks;
4435
4436 /// Holds the VFs applicable to this VPlan.
4437 SmallSetVector<ElementCount, 2> VFs;
4438
4439 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4440 /// any UF.
4441 SmallSetVector<unsigned, 2> UFs;
4442
4443 /// Holds the name of the VPlan, for printing.
4444 std::string Name;
4445
4446 /// Represents the trip count of the original loop, for folding
4447 /// the tail.
4448 VPValue *TripCount = nullptr;
4449
4450 /// Represents the backedge taken count of the original loop, for folding
4451 /// the tail. It equals TripCount - 1.
4452 VPSymbolicValue *BackedgeTakenCount = nullptr;
4453
4454 /// Represents the vector trip count.
4455 VPSymbolicValue VectorTripCount;
4456
4457 /// Represents the vectorization factor of the loop.
4458 VPSymbolicValue VF;
4459
4460 /// Represents the loop-invariant VF * UF of the vector loop region.
4461 VPSymbolicValue VFxUF;
4462
4463 /// Contains all the external definitions created for this VPlan, as a mapping
4464 /// from IR Values to VPIRValues.
4465 SmallMapVector<Value *, VPIRValue *, 16> LiveIns;
4466
4467 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4468 /// VPlan is destroyed.
4469 SmallVector<VPBlockBase *> CreatedBlocks;
4470
4471 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4472 /// wrapping the original header of the scalar loop.
4473 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4474 : Entry(Entry), ScalarHeader(ScalarHeader) {
4475 Entry->setPlan(this);
4476 assert(ScalarHeader->getNumSuccessors() == 0 &&
4477 "scalar header must be a leaf node");
4478 }
4479
4480public:
4481 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4482 /// original preheader and scalar header of \p L, to be used as entry and
4483 /// scalar header blocks of the new VPlan.
4484 VPlan(Loop *L);
4485
4486 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4487 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4488 VPlan(BasicBlock *ScalarHeaderBB) {
4489 setEntry(createVPBasicBlock(Name: "preheader"));
4490 ScalarHeader = createVPIRBasicBlock(IRBB: ScalarHeaderBB);
4491 }
4492
4493 LLVM_ABI_FOR_TEST ~VPlan();
4494
4495 void setEntry(VPBasicBlock *VPBB) {
4496 Entry = VPBB;
4497 VPBB->setPlan(this);
4498 }
4499
4500 /// Generate the IR code for this VPlan.
4501 void execute(VPTransformState *State);
4502
4503 /// Return the cost of this plan.
4504 InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
4505
4506 VPBasicBlock *getEntry() { return Entry; }
4507 const VPBasicBlock *getEntry() const { return Entry; }
4508
4509 /// Returns the preheader of the vector loop region, if one exists, or null
4510 /// otherwise.
4511 VPBasicBlock *getVectorPreheader() {
4512 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4513 return VectorRegion
4514 ? cast<VPBasicBlock>(Val: VectorRegion->getSinglePredecessor())
4515 : nullptr;
4516 }
4517
4518 /// Returns the VPRegionBlock of the vector loop.
4519 LLVM_ABI_FOR_TEST VPRegionBlock *getVectorLoopRegion();
4520 LLVM_ABI_FOR_TEST const VPRegionBlock *getVectorLoopRegion() const;
4521
4522 /// Returns the 'middle' block of the plan, that is the block that selects
4523 /// whether to execute the scalar tail loop or the exit block from the loop
4524 /// latch. If there is an early exit from the vector loop, the middle block
4525 /// conceptully has the early exit block as third successor, split accross 2
4526 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4527 /// tail loop or the exit block. If the scalar tail loop or exit block are
4528 /// known to always execute, the middle block may branch directly to that
4529 /// block. This function cannot be called once the vector loop region has been
4530 /// removed.
4531 VPBasicBlock *getMiddleBlock() {
4532 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4533 assert(
4534 LoopRegion &&
4535 "cannot call the function after vector loop region has been removed");
4536 // The middle block is always the last successor of the region.
4537 return cast<VPBasicBlock>(Val: LoopRegion->getSuccessors().back());
4538 }
4539
4540 const VPBasicBlock *getMiddleBlock() const {
4541 return const_cast<VPlan *>(this)->getMiddleBlock();
4542 }
4543
4544 /// Return the VPBasicBlock for the preheader of the scalar loop.
4545 VPBasicBlock *getScalarPreheader() const {
4546 return cast<VPBasicBlock>(Val: getScalarHeader()->getSinglePredecessor());
4547 }
4548
4549 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4550 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4551
4552 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4553 /// the original scalar loop.
4554 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4555
4556 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4557 /// exit block.
4558 VPIRBasicBlock *getExitBlock(BasicBlock *IRBB) const;
4559
4560 /// Returns true if \p VPBB is an exit block.
4561 bool isExitBlock(VPBlockBase *VPBB);
4562
4563 /// The trip count of the original loop.
4564 VPValue *getTripCount() const {
4565 assert(TripCount && "trip count needs to be set before accessing it");
4566 return TripCount;
4567 }
4568
4569 /// Set the trip count assuming it is currently null; if it is not - use
4570 /// resetTripCount().
4571 void setTripCount(VPValue *NewTripCount) {
4572 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4573 TripCount = NewTripCount;
4574 }
4575
4576 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4577 /// the original trip count have been replaced.
4578 void resetTripCount(VPValue *NewTripCount) {
4579 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4580 "TripCount must be set when resetting");
4581 TripCount = NewTripCount;
4582 }
4583
4584 /// The backedge taken count of the original loop.
4585 VPValue *getOrCreateBackedgeTakenCount() {
4586 if (!BackedgeTakenCount)
4587 BackedgeTakenCount = new VPSymbolicValue();
4588 return BackedgeTakenCount;
4589 }
4590 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4591
4592 /// The vector trip count.
4593 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4594
4595 /// Returns the VF of the vector loop region.
4596 VPValue &getVF() { return VF; };
4597 const VPValue &getVF() const { return VF; };
4598
4599 /// Returns VF * UF of the vector loop region.
4600 VPValue &getVFxUF() { return VFxUF; }
4601
4602 LLVMContext &getContext() const {
4603 return getScalarHeader()->getIRBasicBlock()->getContext();
4604 }
4605
4606 void addVF(ElementCount VF) { VFs.insert(X: VF); }
4607
4608 void setVF(ElementCount VF) {
4609 assert(hasVF(VF) && "Cannot set VF not already in plan");
4610 VFs.clear();
4611 VFs.insert(X: VF);
4612 }
4613
4614 bool hasVF(ElementCount VF) const { return VFs.count(key: VF); }
4615 bool hasScalableVF() const {
4616 return any_of(Range: VFs, P: [](ElementCount VF) { return VF.isScalable(); });
4617 }
4618
4619 /// Returns an iterator range over all VFs of the plan.
4620 iterator_range<SmallSetVector<ElementCount, 2>::iterator>
4621 vectorFactors() const {
4622 return VFs;
4623 }
4624
4625 bool hasScalarVFOnly() const {
4626 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4627 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4628 "Plan with scalar VF should only have a single VF");
4629 return HasScalarVFOnly;
4630 }
4631
4632 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(key: UF); }
4633
4634 unsigned getUF() const {
4635 assert(UFs.size() == 1 && "Expected a single UF");
4636 return UFs[0];
4637 }
4638
4639 void setUF(unsigned UF) {
4640 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4641 UFs.clear();
4642 UFs.insert(X: UF);
4643 }
4644
4645 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4646 /// concrete UF.
4647 bool isUnrolled() const { return UFs.size() == 1; }
4648
4649 /// Return a string with the name of the plan and the applicable VFs and UFs.
4650 std::string getName() const;
4651
4652 void setName(const Twine &newName) { Name = newName.str(); }
4653
4654 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4655 /// yet) for \p V.
4656 VPIRValue *getOrAddLiveIn(Value *V) {
4657 assert(V && "Trying to get or add the VPIRValue of a null Value");
4658 auto [It, Inserted] = LiveIns.try_emplace(Key: V);
4659 if (Inserted) {
4660 if (auto *CI = dyn_cast<ConstantInt>(Val: V))
4661 It->second = new VPConstantInt(CI);
4662 else
4663 It->second = new VPIRValue(V);
4664 }
4665
4666 assert(isa<VPIRValue>(It->second) &&
4667 "Only VPIRValues should be in mapping");
4668 return It->second;
4669 }
4670 VPIRValue *getOrAddLiveIn(VPIRValue *V) {
4671 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4672 return getOrAddLiveIn(V: V->getValue());
4673 }
4674
4675 /// Return a VPIRValue wrapping i1 true.
4676 VPIRValue *getTrue() { return getConstantInt(BitWidth: 1, Val: 1); }
4677
4678 /// Return a VPIRValue wrapping i1 false.
4679 VPIRValue *getFalse() { return getConstantInt(BitWidth: 1, Val: 0); }
4680
4681 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4682 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4683 return getOrAddLiveIn(V: ConstantInt::get(Ty, V: Val, IsSigned));
4684 }
4685
4686 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4687 /// value.
4688 VPIRValue *getConstantInt(unsigned BitWidth, uint64_t Val,
4689 bool IsSigned = false) {
4690 return getConstantInt(Val: APInt(BitWidth, Val, IsSigned));
4691 }
4692
4693 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4694 VPIRValue *getConstantInt(const APInt &Val) {
4695 return getOrAddLiveIn(V: ConstantInt::get(Context&: getContext(), V: Val));
4696 }
4697
4698 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4699 /// otherwise.
4700 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(Key: V); }
4701
4702 /// Return the list of live-in VPValues available in the VPlan.
4703 auto getLiveIns() const { return LiveIns.values(); }
4704
4705#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4706 /// Print the live-ins of this VPlan to \p O.
4707 void printLiveIns(raw_ostream &O) const;
4708
4709 /// Print this VPlan to \p O.
4710 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4711
4712 /// Print this VPlan in DOT format to \p O.
4713 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4714
4715 /// Dump the plan to stderr (for debugging).
4716 LLVM_DUMP_METHOD void dump() const;
4717#endif
4718
4719 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4720 /// recipes to refer to the clones, and return it.
4721 LLVM_ABI_FOR_TEST VPlan *duplicate();
4722
4723 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4724 /// present. The returned block is owned by the VPlan and deleted once the
4725 /// VPlan is destroyed.
4726 VPBasicBlock *createVPBasicBlock(const Twine &Name,
4727 VPRecipeBase *Recipe = nullptr) {
4728 auto *VPB = new VPBasicBlock(Name, Recipe);
4729 CreatedBlocks.push_back(Elt: VPB);
4730 return VPB;
4731 }
4732
4733 /// Create a new loop region with \p Name and entry and exiting blocks set
4734 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4735 /// owned by the VPlan and deleted once the VPlan is destroyed.
4736 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4737 VPBlockBase *Entry = nullptr,
4738 VPBlockBase *Exiting = nullptr) {
4739 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4740 : new VPRegionBlock(Name);
4741 CreatedBlocks.push_back(Elt: VPB);
4742 return VPB;
4743 }
4744
4745 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4746 /// returned block is owned by the VPlan and deleted once the VPlan is
4747 /// destroyed.
4748 VPRegionBlock *createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting,
4749 const std::string &Name = "") {
4750 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4751 CreatedBlocks.push_back(Elt: VPB);
4752 return VPB;
4753 }
4754
4755 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4756 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4757 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4758 VPIRBasicBlock *createEmptyVPIRBasicBlock(BasicBlock *IRBB);
4759
4760 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4761 /// instructions in \p IRBB, except its terminator which is managed by the
4762 /// successors of the block in VPlan. The returned block is owned by the VPlan
4763 /// and deleted once the VPlan is destroyed.
4764 LLVM_ABI_FOR_TEST VPIRBasicBlock *createVPIRBasicBlock(BasicBlock *IRBB);
4765
4766 /// Returns true if the VPlan is based on a loop with an early exit. That is
4767 /// the case if the VPlan has either more than one exit block or a single exit
4768 /// block with multiple predecessors (one for the exit via the latch and one
4769 /// via the other early exit).
4770 bool hasEarlyExit() const {
4771 return count_if(Range: ExitBlocks,
4772 P: [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4773 1 ||
4774 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4775 }
4776
4777 /// Returns true if the scalar tail may execute after the vector loop. Note
4778 /// that this relies on unneeded branches to the scalar tail loop being
4779 /// removed.
4780 bool hasScalarTail() const {
4781 return !(!getScalarPreheader()->hasPredecessors() ||
4782 getScalarPreheader()->getSinglePredecessor() == getEntry());
4783 }
4784};
4785
4786#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4787inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4788 Plan.print(OS);
4789 return OS;
4790}
4791#endif
4792
4793} // end namespace llvm
4794
4795#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
4796