1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/MapVector.h"
29#include "llvm/ADT/SmallPtrSet.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
34#include "llvm/Analysis/IVDescriptors.h"
35#include "llvm/Analysis/MemoryLocation.h"
36#include "llvm/Analysis/VectorUtils.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/IR/FMF.h"
39#include "llvm/IR/Operator.h"
40#include "llvm/Support/Compiler.h"
41#include "llvm/Support/InstructionCost.h"
42#include <cassert>
43#include <cstddef>
44#include <functional>
45#include <string>
46#include <utility>
47#include <variant>
48
49namespace llvm {
50
51class BasicBlock;
52class DominatorTree;
53class InnerLoopVectorizer;
54class IRBuilderBase;
55struct VPTransformState;
56class raw_ostream;
57class RecurrenceDescriptor;
58class SCEV;
59class Type;
60class VPBasicBlock;
61class VPBuilder;
62class VPDominatorTree;
63class VPRegionBlock;
64class VPlan;
65class VPLane;
66class VPReplicateRecipe;
67class VPlanSlp;
68class Value;
69class LoopVectorizationCostModel;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77using VPlanPtr = std::unique_ptr<VPlan>;
78
79/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
80/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
81class LLVM_ABI_FOR_TEST VPBlockBase {
82 friend class VPBlockUtils;
83
84 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
85
86 /// An optional name for the block.
87 std::string Name;
88
89 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
90 /// it is a topmost VPBlockBase.
91 VPRegionBlock *Parent = nullptr;
92
93 /// List of predecessor blocks.
94 SmallVector<VPBlockBase *, 1> Predecessors;
95
96 /// List of successor blocks.
97 SmallVector<VPBlockBase *, 1> Successors;
98
99 /// VPlan containing the block. Can only be set on the entry block of the
100 /// plan.
101 VPlan *Plan = nullptr;
102
103 /// Add \p Successor as the last successor to this block.
104 void appendSuccessor(VPBlockBase *Successor) {
105 assert(Successor && "Cannot add nullptr successor!");
106 Successors.push_back(Elt: Successor);
107 }
108
109 /// Add \p Predecessor as the last predecessor to this block.
110 void appendPredecessor(VPBlockBase *Predecessor) {
111 assert(Predecessor && "Cannot add nullptr predecessor!");
112 Predecessors.push_back(Elt: Predecessor);
113 }
114
115 /// Remove \p Predecessor from the predecessors of this block.
116 void removePredecessor(VPBlockBase *Predecessor) {
117 auto Pos = find(Range&: Predecessors, Val: Predecessor);
118 assert(Pos && "Predecessor does not exist");
119 Predecessors.erase(CI: Pos);
120 }
121
122 /// Remove \p Successor from the successors of this block.
123 void removeSuccessor(VPBlockBase *Successor) {
124 auto Pos = find(Range&: Successors, Val: Successor);
125 assert(Pos && "Successor does not exist");
126 Successors.erase(CI: Pos);
127 }
128
129 /// This function replaces one predecessor with another, useful when
130 /// trying to replace an old block in the CFG with a new one.
131 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
132 auto I = find(Range&: Predecessors, Val: Old);
133 assert(I != Predecessors.end());
134 assert(Old->getParent() == New->getParent() &&
135 "replaced predecessor must have the same parent");
136 *I = New;
137 }
138
139 /// This function replaces one successor with another, useful when
140 /// trying to replace an old block in the CFG with a new one.
141 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
142 auto I = find(Range&: Successors, Val: Old);
143 assert(I != Successors.end());
144 assert(Old->getParent() == New->getParent() &&
145 "replaced successor must have the same parent");
146 *I = New;
147 }
148
149protected:
150 VPBlockBase(const unsigned char SC, const std::string &N)
151 : SubclassID(SC), Name(N) {}
152
153public:
154 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
155 /// that are actually instantiated. Values of this enumeration are kept in the
156 /// SubclassID field of the VPBlockBase objects. They are used for concrete
157 /// type identification.
158 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
159
160 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
161
162 virtual ~VPBlockBase() = default;
163
164 const std::string &getName() const { return Name; }
165
166 void setName(const Twine &newName) { Name = newName.str(); }
167
168 /// \return an ID for the concrete type of this object.
169 /// This is used to implement the classof checks. This should not be used
170 /// for any other purpose, as the values may change as LLVM evolves.
171 unsigned getVPBlockID() const { return SubclassID; }
172
173 VPRegionBlock *getParent() { return Parent; }
174 const VPRegionBlock *getParent() const { return Parent; }
175
176 /// \return A pointer to the plan containing the current block.
177 VPlan *getPlan();
178 const VPlan *getPlan() const;
179
180 /// Sets the pointer of the plan containing the block. The block must be the
181 /// entry block into the VPlan.
182 void setPlan(VPlan *ParentPlan);
183
184 void setParent(VPRegionBlock *P) { Parent = P; }
185
186 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
187 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
188 /// VPBlockBase is a VPBasicBlock, it is returned.
189 const VPBasicBlock *getEntryBasicBlock() const;
190 VPBasicBlock *getEntryBasicBlock();
191
192 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
193 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
194 /// VPBlockBase is a VPBasicBlock, it is returned.
195 const VPBasicBlock *getExitingBasicBlock() const;
196 VPBasicBlock *getExitingBasicBlock();
197
198 const VPBlocksTy &getSuccessors() const { return Successors; }
199 VPBlocksTy &getSuccessors() { return Successors; }
200
201 iterator_range<VPBlockBase **> successors() { return Successors; }
202 iterator_range<VPBlockBase **> predecessors() { return Predecessors; }
203
204 const VPBlocksTy &getPredecessors() const { return Predecessors; }
205 VPBlocksTy &getPredecessors() { return Predecessors; }
206
207 /// \return the successor of this VPBlockBase if it has a single successor.
208 /// Otherwise return a null pointer.
209 VPBlockBase *getSingleSuccessor() const {
210 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
211 }
212
213 /// \return the predecessor of this VPBlockBase if it has a single
214 /// predecessor. Otherwise return a null pointer.
215 VPBlockBase *getSinglePredecessor() const {
216 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
217 }
218
219 size_t getNumSuccessors() const { return Successors.size(); }
220 size_t getNumPredecessors() const { return Predecessors.size(); }
221
222 /// Returns true if this block has any predecessors.
223 bool hasPredecessors() const { return !Predecessors.empty(); }
224
225 /// An Enclosing Block of a block B is any block containing B, including B
226 /// itself. \return the closest enclosing block starting from "this", which
227 /// has successors. \return the root enclosing block if all enclosing blocks
228 /// have no successors.
229 VPBlockBase *getEnclosingBlockWithSuccessors();
230
231 /// \return the closest enclosing block starting from "this", which has
232 /// predecessors. \return the root enclosing block if all enclosing blocks
233 /// have no predecessors.
234 VPBlockBase *getEnclosingBlockWithPredecessors();
235
236 /// \return the successors either attached directly to this VPBlockBase or, if
237 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
238 /// successors of its own, search recursively for the first enclosing
239 /// VPRegionBlock that has successors and return them. If no such
240 /// VPRegionBlock exists, return the (empty) successors of the topmost
241 /// VPBlockBase reached.
242 const VPBlocksTy &getHierarchicalSuccessors() {
243 return getEnclosingBlockWithSuccessors()->getSuccessors();
244 }
245
246 /// \return the hierarchical successor of this VPBlockBase if it has a single
247 /// hierarchical successor. Otherwise return a null pointer.
248 VPBlockBase *getSingleHierarchicalSuccessor() {
249 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
250 }
251
252 /// \return the predecessors either attached directly to this VPBlockBase or,
253 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
254 /// predecessors of its own, search recursively for the first enclosing
255 /// VPRegionBlock that has predecessors and return them. If no such
256 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
257 /// VPBlockBase reached.
258 const VPBlocksTy &getHierarchicalPredecessors() {
259 return getEnclosingBlockWithPredecessors()->getPredecessors();
260 }
261
262 /// \return the hierarchical predecessor of this VPBlockBase if it has a
263 /// single hierarchical predecessor. Otherwise return a null pointer.
264 VPBlockBase *getSingleHierarchicalPredecessor() {
265 return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
266 }
267
268 /// Set a given VPBlockBase \p Successor as the single successor of this
269 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
270 /// This VPBlockBase must have no successors.
271 void setOneSuccessor(VPBlockBase *Successor) {
272 assert(Successors.empty() && "Setting one successor when others exist.");
273 assert(Successor->getParent() == getParent() &&
274 "connected blocks must have the same parent");
275 appendSuccessor(Successor);
276 }
277
278 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
279 /// successors of this VPBlockBase. This VPBlockBase is not added as
280 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
281 /// successors.
282 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
283 assert(Successors.empty() && "Setting two successors when others exist.");
284 appendSuccessor(Successor: IfTrue);
285 appendSuccessor(Successor: IfFalse);
286 }
287
288 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
289 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
290 /// as successor of any VPBasicBlock in \p NewPreds.
291 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
292 assert(Predecessors.empty() && "Block predecessors already set.");
293 for (auto *Pred : NewPreds)
294 appendPredecessor(Predecessor: Pred);
295 }
296
297 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
298 /// This VPBlockBase must have no successors. This VPBlockBase is not added
299 /// as predecessor of any VPBasicBlock in \p NewSuccs.
300 void setSuccessors(ArrayRef<VPBlockBase *> NewSuccs) {
301 assert(Successors.empty() && "Block successors already set.");
302 for (auto *Succ : NewSuccs)
303 appendSuccessor(Successor: Succ);
304 }
305
306 /// Remove all the predecessor of this block.
307 void clearPredecessors() { Predecessors.clear(); }
308
309 /// Remove all the successors of this block.
310 void clearSuccessors() { Successors.clear(); }
311
312 /// Swap predecessors of the block. The block must have exactly 2
313 /// predecessors.
314 void swapPredecessors() {
315 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
316 std::swap(a&: Predecessors[0], b&: Predecessors[1]);
317 }
318
319 /// Swap successors of the block. The block must have exactly 2 successors.
320 // TODO: This should be part of introducing conditional branch recipes rather
321 // than being independent.
322 void swapSuccessors() {
323 assert(Successors.size() == 2 && "must have 2 successors to swap");
324 std::swap(a&: Successors[0], b&: Successors[1]);
325 }
326
327 /// Returns the index for \p Pred in the blocks predecessors list.
328 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
329 assert(count(Predecessors, Pred) == 1 &&
330 "must have Pred exactly once in Predecessors");
331 return std::distance(first: Predecessors.begin(), last: find(Range: Predecessors, Val: Pred));
332 }
333
334 /// Returns the index for \p Succ in the blocks successor list.
335 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
336 assert(count(Successors, Succ) == 1 &&
337 "must have Succ exactly once in Successors");
338 return std::distance(first: Successors.begin(), last: find(Range: Successors, Val: Succ));
339 }
340
341 /// The method which generates the output IR that correspond to this
342 /// VPBlockBase, thereby "executing" the VPlan.
343 virtual void execute(VPTransformState *State) = 0;
344
345 /// Return the cost of the block.
346 virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
347
348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
349 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
350 OS << getName();
351 }
352
353 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
354 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
355 /// consequtive numbers.
356 ///
357 /// Note that the numbering is applied to the whole VPlan, so printing
358 /// individual blocks is consistent with the whole VPlan printing.
359 virtual void print(raw_ostream &O, const Twine &Indent,
360 VPSlotTracker &SlotTracker) const = 0;
361
362 /// Print plain-text dump of this VPlan to \p O.
363 void print(raw_ostream &O) const;
364
365 /// Print the successors of this block to \p O, prefixing all lines with \p
366 /// Indent.
367 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
368
369 /// Dump this VPBlockBase to dbgs().
370 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
371#endif
372
373 /// Clone the current block and it's recipes without updating the operands of
374 /// the cloned recipes, including all blocks in the single-entry single-exit
375 /// region for VPRegionBlocks.
376 virtual VPBlockBase *clone() = 0;
377};
378
379/// VPRecipeBase is a base class modeling a sequence of one or more output IR
380/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
381/// and is responsible for deleting its defined values. Single-value
382/// recipes must inherit from VPSingleDef instead of inheriting from both
383/// VPRecipeBase and VPValue separately.
384class LLVM_ABI_FOR_TEST VPRecipeBase
385 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
386 public VPDef,
387 public VPUser {
388 friend VPBasicBlock;
389 friend class VPBlockUtils;
390
391 /// Subclass identifier (for isa/dyn_cast).
392 const unsigned char SubclassID;
393
394 /// Each VPRecipe belongs to a single VPBasicBlock.
395 VPBasicBlock *Parent = nullptr;
396
397 /// The debug location for the recipe.
398 DebugLoc DL;
399
400public:
401 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
402 /// that is actually instantiated. Values of this enumeration are kept in the
403 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
404 /// type identification.
405 using VPRecipeTy = enum {
406 VPBranchOnMaskSC,
407 VPDerivedIVSC,
408 VPExpandSCEVSC,
409 VPExpressionSC,
410 VPIRInstructionSC,
411 VPInstructionSC,
412 VPInterleaveEVLSC,
413 VPInterleaveSC,
414 VPReductionEVLSC,
415 VPReductionSC,
416 VPReplicateSC,
417 VPScalarIVStepsSC,
418 VPVectorPointerSC,
419 VPVectorEndPointerSC,
420 VPWidenCallSC,
421 VPWidenCanonicalIVSC,
422 VPWidenCastSC,
423 VPWidenGEPSC,
424 VPWidenIntrinsicSC,
425 VPWidenLoadEVLSC,
426 VPWidenLoadSC,
427 VPWidenStoreEVLSC,
428 VPWidenStoreSC,
429 VPWidenSC,
430 VPBlendSC,
431 VPHistogramSC,
432 // START: Phi-like recipes. Need to be kept together.
433 VPWidenPHISC,
434 VPPredInstPHISC,
435 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
436 // VPHeaderPHIRecipe need to be kept together.
437 VPCanonicalIVPHISC,
438 VPCurrentIterationPHISC,
439 VPActiveLaneMaskPHISC,
440 VPFirstOrderRecurrencePHISC,
441 VPWidenIntOrFpInductionSC,
442 VPWidenPointerInductionSC,
443 VPReductionPHISC,
444 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
445 // END: Phi-like recipes
446 VPFirstPHISC = VPWidenPHISC,
447 VPFirstHeaderPHISC = VPCanonicalIVPHISC,
448 VPLastHeaderPHISC = VPReductionPHISC,
449 VPLastPHISC = VPReductionPHISC,
450 };
451
452 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
453 DebugLoc DL = DebugLoc::getUnknown())
454 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
455
456 ~VPRecipeBase() override = default;
457
458 /// Clone the current recipe.
459 virtual VPRecipeBase *clone() = 0;
460
461 /// \return the VPBasicBlock which this VPRecipe belongs to.
462 VPBasicBlock *getParent() { return Parent; }
463 const VPBasicBlock *getParent() const { return Parent; }
464
465 /// \return the VPRegionBlock which the recipe belongs to.
466 VPRegionBlock *getRegion();
467 const VPRegionBlock *getRegion() const;
468
469 /// The method which generates the output IR instructions that correspond to
470 /// this VPRecipe, thereby "executing" the VPlan.
471 virtual void execute(VPTransformState &State) = 0;
472
473 /// Return the cost of this recipe, taking into account if the cost
474 /// computation should be skipped and the ForceTargetInstructionCost flag.
475 /// Also takes care of printing the cost for debugging.
476 InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
477
478 /// Insert an unlinked recipe into a basic block immediately before
479 /// the specified recipe.
480 void insertBefore(VPRecipeBase *InsertPos);
481 /// Insert an unlinked recipe into \p BB immediately before the insertion
482 /// point \p IP;
483 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
484
485 /// Insert an unlinked Recipe into a basic block immediately after
486 /// the specified Recipe.
487 void insertAfter(VPRecipeBase *InsertPos);
488
489 /// Unlink this recipe from its current VPBasicBlock and insert it into
490 /// the VPBasicBlock that MovePos lives in, right after MovePos.
491 void moveAfter(VPRecipeBase *MovePos);
492
493 /// Unlink this recipe and insert into BB before I.
494 ///
495 /// \pre I is a valid iterator into BB.
496 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
497
498 /// This method unlinks 'this' from the containing basic block, but does not
499 /// delete it.
500 void removeFromParent();
501
502 /// This method unlinks 'this' from the containing basic block and deletes it.
503 ///
504 /// \returns an iterator pointing to the element after the erased one
505 iplist<VPRecipeBase>::iterator eraseFromParent();
506
507 /// \return an ID for the concrete type of this object.
508 unsigned getVPRecipeID() const { return SubclassID; }
509
510 /// Method to support type inquiry through isa, cast, and dyn_cast.
511 static inline bool classof(const VPDef *D) {
512 // All VPDefs are also VPRecipeBases.
513 return true;
514 }
515
516 static inline bool classof(const VPUser *U) { return true; }
517
518 /// Returns true if the recipe may have side-effects.
519 bool mayHaveSideEffects() const;
520
521 /// Returns true for PHI-like recipes.
522 bool isPhi() const;
523
524 /// Returns true if the recipe may read from memory.
525 bool mayReadFromMemory() const;
526
527 /// Returns true if the recipe may write to memory.
528 bool mayWriteToMemory() const;
529
530 /// Returns true if the recipe may read from or write to memory.
531 bool mayReadOrWriteMemory() const {
532 return mayReadFromMemory() || mayWriteToMemory();
533 }
534
535 /// Returns the debug location of the recipe.
536 DebugLoc getDebugLoc() const { return DL; }
537
538 /// Return true if the recipe is a scalar cast.
539 bool isScalarCast() const;
540
541 /// Set the recipe's debug location to \p NewDL.
542 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
543
544#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
545 /// Dump the recipe to stderr (for debugging).
546 LLVM_ABI_FOR_TEST void dump() const;
547
548 /// Print the recipe, delegating to printRecipe().
549 void print(raw_ostream &O, const Twine &Indent,
550 VPSlotTracker &SlotTracker) const;
551#endif
552
553protected:
554 /// Compute the cost of this recipe either using a recipe's specialized
555 /// implementation or using the legacy cost model and the underlying
556 /// instructions.
557 virtual InstructionCost computeCost(ElementCount VF,
558 VPCostContext &Ctx) const;
559
560#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
561 /// Each concrete VPRecipe prints itself, without printing common information,
562 /// like debug info or metadata.
563 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
564 VPSlotTracker &SlotTracker) const = 0;
565#endif
566};
567
568// Helper macro to define common classof implementations for recipes.
569#define VP_CLASSOF_IMPL(VPRecipeID) \
570 static inline bool classof(const VPRecipeBase *R) { \
571 return R->getVPRecipeID() == VPRecipeID; \
572 } \
573 static inline bool classof(const VPValue *V) { \
574 auto *R = V->getDefiningRecipe(); \
575 return R && R->getVPRecipeID() == VPRecipeID; \
576 } \
577 static inline bool classof(const VPUser *U) { \
578 auto *R = dyn_cast<VPRecipeBase>(U); \
579 return R && R->getVPRecipeID() == VPRecipeID; \
580 } \
581 static inline bool classof(const VPSingleDefRecipe *R) { \
582 return R->getVPRecipeID() == VPRecipeID; \
583 }
584
585/// VPSingleDef is a base class for recipes for modeling a sequence of one or
586/// more output IR that define a single result VPValue.
587/// Note that VPRecipeBase must be inherited from before VPValue.
588class VPSingleDefRecipe : public VPRecipeBase, public VPRecipeValue {
589public:
590 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
591 DebugLoc DL = DebugLoc::getUnknown())
592 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
593
594 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
595 Value *UV, DebugLoc DL = DebugLoc::getUnknown())
596 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
597
598 static inline bool classof(const VPRecipeBase *R) {
599 switch (R->getVPRecipeID()) {
600 case VPRecipeBase::VPDerivedIVSC:
601 case VPRecipeBase::VPExpandSCEVSC:
602 case VPRecipeBase::VPExpressionSC:
603 case VPRecipeBase::VPInstructionSC:
604 case VPRecipeBase::VPReductionEVLSC:
605 case VPRecipeBase::VPReductionSC:
606 case VPRecipeBase::VPReplicateSC:
607 case VPRecipeBase::VPScalarIVStepsSC:
608 case VPRecipeBase::VPVectorPointerSC:
609 case VPRecipeBase::VPVectorEndPointerSC:
610 case VPRecipeBase::VPWidenCallSC:
611 case VPRecipeBase::VPWidenCanonicalIVSC:
612 case VPRecipeBase::VPWidenCastSC:
613 case VPRecipeBase::VPWidenGEPSC:
614 case VPRecipeBase::VPWidenIntrinsicSC:
615 case VPRecipeBase::VPWidenSC:
616 case VPRecipeBase::VPBlendSC:
617 case VPRecipeBase::VPPredInstPHISC:
618 case VPRecipeBase::VPCanonicalIVPHISC:
619 case VPRecipeBase::VPCurrentIterationPHISC:
620 case VPRecipeBase::VPActiveLaneMaskPHISC:
621 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
622 case VPRecipeBase::VPWidenPHISC:
623 case VPRecipeBase::VPWidenIntOrFpInductionSC:
624 case VPRecipeBase::VPWidenPointerInductionSC:
625 case VPRecipeBase::VPReductionPHISC:
626 return true;
627 case VPRecipeBase::VPBranchOnMaskSC:
628 case VPRecipeBase::VPInterleaveEVLSC:
629 case VPRecipeBase::VPInterleaveSC:
630 case VPRecipeBase::VPIRInstructionSC:
631 case VPRecipeBase::VPWidenLoadEVLSC:
632 case VPRecipeBase::VPWidenLoadSC:
633 case VPRecipeBase::VPWidenStoreEVLSC:
634 case VPRecipeBase::VPWidenStoreSC:
635 case VPRecipeBase::VPHistogramSC:
636 // TODO: Widened stores don't define a value, but widened loads do. Split
637 // the recipes to be able to make widened loads VPSingleDefRecipes.
638 return false;
639 }
640 llvm_unreachable("Unhandled VPRecipeID");
641 }
642
643 static inline bool classof(const VPUser *U) {
644 auto *R = dyn_cast<VPRecipeBase>(Val: U);
645 return R && classof(R);
646 }
647
648 VPSingleDefRecipe *clone() override = 0;
649
650 /// Returns the underlying instruction.
651 Instruction *getUnderlyingInstr() {
652 return cast<Instruction>(Val: getUnderlyingValue());
653 }
654 const Instruction *getUnderlyingInstr() const {
655 return cast<Instruction>(Val: getUnderlyingValue());
656 }
657
658#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
659 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
660 LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const;
661#endif
662};
663
664/// Class to record and manage LLVM IR flags.
665class VPIRFlags {
666 enum class OperationType : unsigned char {
667 Cmp,
668 FCmp,
669 OverflowingBinOp,
670 Trunc,
671 DisjointOp,
672 PossiblyExactOp,
673 GEPOp,
674 FPMathOp,
675 NonNegOp,
676 ReductionOp,
677 Other
678 };
679
680public:
681 struct WrapFlagsTy {
682 char HasNUW : 1;
683 char HasNSW : 1;
684
685 WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
686 };
687
688 struct TruncFlagsTy {
689 char HasNUW : 1;
690 char HasNSW : 1;
691
692 TruncFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
693 };
694
695 struct DisjointFlagsTy {
696 char IsDisjoint : 1;
697 DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
698 };
699
700 struct NonNegFlagsTy {
701 char NonNeg : 1;
702 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
703 };
704
705private:
706 struct ExactFlagsTy {
707 char IsExact : 1;
708 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
709 };
710 struct FastMathFlagsTy {
711 char AllowReassoc : 1;
712 char NoNaNs : 1;
713 char NoInfs : 1;
714 char NoSignedZeros : 1;
715 char AllowReciprocal : 1;
716 char AllowContract : 1;
717 char ApproxFunc : 1;
718
719 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
720 };
721 /// Holds both the predicate and fast-math flags for floating-point
722 /// comparisons.
723 struct FCmpFlagsTy {
724 CmpInst::Predicate Pred;
725 FastMathFlagsTy FMFs;
726 };
727 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
728 struct ReductionFlagsTy {
729 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
730 // additional kinds.
731 unsigned char Kind : 6;
732 // TODO: Derive order/in-loop from plan and remove here.
733 unsigned char IsOrdered : 1;
734 unsigned char IsInLoop : 1;
735 FastMathFlagsTy FMFs;
736
737 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
738 FastMathFlags FMFs)
739 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
740 IsInLoop(IsInLoop), FMFs(FMFs) {}
741 };
742
743 OperationType OpType;
744
745 union {
746 CmpInst::Predicate CmpPredicate;
747 WrapFlagsTy WrapFlags;
748 TruncFlagsTy TruncFlags;
749 DisjointFlagsTy DisjointFlags;
750 ExactFlagsTy ExactFlags;
751 GEPNoWrapFlags GEPFlags;
752 NonNegFlagsTy NonNegFlags;
753 FastMathFlagsTy FMFs;
754 FCmpFlagsTy FCmpFlags;
755 ReductionFlagsTy ReductionFlags;
756 unsigned AllFlags;
757 };
758
759public:
760 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
761
762 VPIRFlags(Instruction &I) {
763 if (auto *FCmp = dyn_cast<FCmpInst>(Val: &I)) {
764 OpType = OperationType::FCmp;
765 FCmpFlags.Pred = FCmp->getPredicate();
766 FCmpFlags.FMFs = FCmp->getFastMathFlags();
767 } else if (auto *Op = dyn_cast<CmpInst>(Val: &I)) {
768 OpType = OperationType::Cmp;
769 CmpPredicate = Op->getPredicate();
770 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(Val: &I)) {
771 OpType = OperationType::DisjointOp;
772 DisjointFlags.IsDisjoint = Op->isDisjoint();
773 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(Val: &I)) {
774 OpType = OperationType::OverflowingBinOp;
775 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
776 } else if (auto *Op = dyn_cast<TruncInst>(Val: &I)) {
777 OpType = OperationType::Trunc;
778 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
779 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(Val: &I)) {
780 OpType = OperationType::PossiblyExactOp;
781 ExactFlags.IsExact = Op->isExact();
782 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) {
783 OpType = OperationType::GEPOp;
784 GEPFlags = GEP->getNoWrapFlags();
785 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(Val: &I)) {
786 OpType = OperationType::NonNegOp;
787 NonNegFlags.NonNeg = PNNI->hasNonNeg();
788 } else if (auto *Op = dyn_cast<FPMathOperator>(Val: &I)) {
789 OpType = OperationType::FPMathOp;
790 FMFs = Op->getFastMathFlags();
791 } else {
792 OpType = OperationType::Other;
793 AllFlags = 0;
794 }
795 }
796
797 VPIRFlags(CmpInst::Predicate Pred)
798 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
799
800 VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
801 : OpType(OperationType::FCmp) {
802 FCmpFlags.Pred = Pred;
803 FCmpFlags.FMFs = FMFs;
804 }
805
806 VPIRFlags(WrapFlagsTy WrapFlags)
807 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
808
809 VPIRFlags(TruncFlagsTy TruncFlags)
810 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
811
812 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
813
814 VPIRFlags(DisjointFlagsTy DisjointFlags)
815 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
816
817 VPIRFlags(NonNegFlagsTy NonNegFlags)
818 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
819
820 VPIRFlags(ExactFlagsTy ExactFlags)
821 : OpType(OperationType::PossiblyExactOp), ExactFlags(ExactFlags) {}
822
823 VPIRFlags(GEPNoWrapFlags GEPFlags)
824 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
825
826 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
827 : OpType(OperationType::ReductionOp),
828 ReductionFlags(Kind, IsOrdered, IsInLoop, FMFs) {}
829
830 void transferFlags(VPIRFlags &Other) {
831 OpType = Other.OpType;
832 AllFlags = Other.AllFlags;
833 }
834
835 /// Only keep flags also present in \p Other. \p Other must have the same
836 /// OpType as the current object.
837 void intersectFlags(const VPIRFlags &Other);
838
839 /// Drop all poison-generating flags.
840 void dropPoisonGeneratingFlags() {
841 // NOTE: This needs to be kept in-sync with
842 // Instruction::dropPoisonGeneratingFlags.
843 switch (OpType) {
844 case OperationType::OverflowingBinOp:
845 WrapFlags.HasNUW = false;
846 WrapFlags.HasNSW = false;
847 break;
848 case OperationType::Trunc:
849 TruncFlags.HasNUW = false;
850 TruncFlags.HasNSW = false;
851 break;
852 case OperationType::DisjointOp:
853 DisjointFlags.IsDisjoint = false;
854 break;
855 case OperationType::PossiblyExactOp:
856 ExactFlags.IsExact = false;
857 break;
858 case OperationType::GEPOp:
859 GEPFlags = GEPNoWrapFlags::none();
860 break;
861 case OperationType::FPMathOp:
862 case OperationType::FCmp:
863 case OperationType::ReductionOp:
864 getFMFsRef().NoNaNs = false;
865 getFMFsRef().NoInfs = false;
866 break;
867 case OperationType::NonNegOp:
868 NonNegFlags.NonNeg = false;
869 break;
870 case OperationType::Cmp:
871 case OperationType::Other:
872 break;
873 }
874 }
875
876 /// Apply the IR flags to \p I.
877 void applyFlags(Instruction &I) const {
878 switch (OpType) {
879 case OperationType::OverflowingBinOp:
880 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
881 I.setHasNoSignedWrap(WrapFlags.HasNSW);
882 break;
883 case OperationType::Trunc:
884 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
885 I.setHasNoSignedWrap(TruncFlags.HasNSW);
886 break;
887 case OperationType::DisjointOp:
888 cast<PossiblyDisjointInst>(Val: &I)->setIsDisjoint(DisjointFlags.IsDisjoint);
889 break;
890 case OperationType::PossiblyExactOp:
891 I.setIsExact(ExactFlags.IsExact);
892 break;
893 case OperationType::GEPOp:
894 cast<GetElementPtrInst>(Val: &I)->setNoWrapFlags(GEPFlags);
895 break;
896 case OperationType::FPMathOp:
897 case OperationType::FCmp: {
898 const FastMathFlagsTy &F = getFMFsRef();
899 I.setHasAllowReassoc(F.AllowReassoc);
900 I.setHasNoNaNs(F.NoNaNs);
901 I.setHasNoInfs(F.NoInfs);
902 I.setHasNoSignedZeros(F.NoSignedZeros);
903 I.setHasAllowReciprocal(F.AllowReciprocal);
904 I.setHasAllowContract(F.AllowContract);
905 I.setHasApproxFunc(F.ApproxFunc);
906 break;
907 }
908 case OperationType::NonNegOp:
909 I.setNonNeg(NonNegFlags.NonNeg);
910 break;
911 case OperationType::ReductionOp:
912 llvm_unreachable("reduction ops should not use applyFlags");
913 case OperationType::Cmp:
914 case OperationType::Other:
915 break;
916 }
917 }
918
919 CmpInst::Predicate getPredicate() const {
920 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
921 "recipe doesn't have a compare predicate");
922 return OpType == OperationType::FCmp ? FCmpFlags.Pred : CmpPredicate;
923 }
924
925 void setPredicate(CmpInst::Predicate Pred) {
926 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
927 "recipe doesn't have a compare predicate");
928 if (OpType == OperationType::FCmp)
929 FCmpFlags.Pred = Pred;
930 else
931 CmpPredicate = Pred;
932 }
933
934 GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; }
935
936 /// Returns true if the recipe has a comparison predicate.
937 bool hasPredicate() const {
938 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
939 }
940
941 /// Returns true if the recipe has fast-math flags.
942 bool hasFastMathFlags() const {
943 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
944 OpType == OperationType::ReductionOp;
945 }
946
947 LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const;
948
949 /// Returns true if the recipe has non-negative flag.
950 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
951
952 bool isNonNeg() const {
953 assert(OpType == OperationType::NonNegOp &&
954 "recipe doesn't have a NNEG flag");
955 return NonNegFlags.NonNeg;
956 }
957
958 bool hasNoUnsignedWrap() const {
959 switch (OpType) {
960 case OperationType::OverflowingBinOp:
961 return WrapFlags.HasNUW;
962 case OperationType::Trunc:
963 return TruncFlags.HasNUW;
964 default:
965 llvm_unreachable("recipe doesn't have a NUW flag");
966 }
967 }
968
969 bool hasNoSignedWrap() const {
970 switch (OpType) {
971 case OperationType::OverflowingBinOp:
972 return WrapFlags.HasNSW;
973 case OperationType::Trunc:
974 return TruncFlags.HasNSW;
975 default:
976 llvm_unreachable("recipe doesn't have a NSW flag");
977 }
978 }
979
980 bool isDisjoint() const {
981 assert(OpType == OperationType::DisjointOp &&
982 "recipe cannot have a disjoing flag");
983 return DisjointFlags.IsDisjoint;
984 }
985
986 RecurKind getRecurKind() const {
987 assert(OpType == OperationType::ReductionOp &&
988 "recipe doesn't have reduction flags");
989 return static_cast<RecurKind>(ReductionFlags.Kind);
990 }
991
992 bool isReductionOrdered() const {
993 assert(OpType == OperationType::ReductionOp &&
994 "recipe doesn't have reduction flags");
995 return ReductionFlags.IsOrdered;
996 }
997
998 bool isReductionInLoop() const {
999 assert(OpType == OperationType::ReductionOp &&
1000 "recipe doesn't have reduction flags");
1001 return ReductionFlags.IsInLoop;
1002 }
1003
1004private:
1005 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1006 FastMathFlagsTy &getFMFsRef() {
1007 if (OpType == OperationType::FCmp)
1008 return FCmpFlags.FMFs;
1009 if (OpType == OperationType::ReductionOp)
1010 return ReductionFlags.FMFs;
1011 return FMFs;
1012 }
1013 const FastMathFlagsTy &getFMFsRef() const {
1014 if (OpType == OperationType::FCmp)
1015 return FCmpFlags.FMFs;
1016 if (OpType == OperationType::ReductionOp)
1017 return ReductionFlags.FMFs;
1018 return FMFs;
1019 }
1020
1021public:
1022 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1023 /// otherwise. Opcodes not supporting default flags include compares and
1024 /// ComputeReductionResult.
1025 static VPIRFlags getDefaultFlags(unsigned Opcode);
1026
1027#if !defined(NDEBUG)
1028 /// Returns true if the set flags are valid for \p Opcode.
1029 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1030
1031 /// Returns true if \p Opcode has its required flags set.
1032 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1033#endif
1034
1035#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1036 void printFlags(raw_ostream &O) const;
1037#endif
1038};
1039
1040/// A pure-virtual common base class for recipes defining a single VPValue and
1041/// using IR flags.
1042struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
1043 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1044 const VPIRFlags &Flags,
1045 DebugLoc DL = DebugLoc::getUnknown())
1046 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1047
1048 static inline bool classof(const VPRecipeBase *R) {
1049 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1050 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1051 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1052 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1053 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1054 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1055 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1056 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1057 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1058 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1059 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1060 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC;
1061 }
1062
1063 static inline bool classof(const VPUser *U) {
1064 auto *R = dyn_cast<VPRecipeBase>(Val: U);
1065 return R && classof(R);
1066 }
1067
1068 static inline bool classof(const VPValue *V) {
1069 auto *R = V->getDefiningRecipe();
1070 return R && classof(R);
1071 }
1072
1073 VPRecipeWithIRFlags *clone() override = 0;
1074
1075 static inline bool classof(const VPSingleDefRecipe *R) {
1076 return classof(R: static_cast<const VPRecipeBase *>(R));
1077 }
1078
1079 void execute(VPTransformState &State) override = 0;
1080
1081 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1082 InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF,
1083 VPCostContext &Ctx) const;
1084};
1085
1086/// Helper to access the operand that contains the unroll part for this recipe
1087/// after unrolling.
1088template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
1089protected:
1090 /// Return the VPValue operand containing the unroll part or null if there is
1091 /// no such operand.
1092 VPValue *getUnrollPartOperand(const VPUser &U) const;
1093
1094 /// Return the unroll part.
1095 unsigned getUnrollPart(const VPUser &U) const;
1096};
1097
1098/// Helper to manage IR metadata for recipes. It filters out metadata that
1099/// cannot be propagated.
1100class VPIRMetadata {
1101 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
1102
1103public:
1104 VPIRMetadata() = default;
1105
1106 /// Adds metatadata that can be preserved from the original instruction
1107 /// \p I.
1108 VPIRMetadata(Instruction &I) { getMetadataToPropagate(Inst: &I, Metadata); }
1109
1110 /// Copy constructor for cloning.
1111 VPIRMetadata(const VPIRMetadata &Other) = default;
1112
1113 VPIRMetadata &operator=(const VPIRMetadata &Other) = default;
1114
1115 /// Add all metadata to \p I.
1116 void applyMetadata(Instruction &I) const;
1117
1118 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1119 /// already exists, it will be replaced. Otherwise, it will be added.
1120 void setMetadata(unsigned Kind, MDNode *Node) {
1121 auto It =
1122 llvm::find_if(Range&: Metadata, P: [Kind](const std::pair<unsigned, MDNode *> &P) {
1123 return P.first == Kind;
1124 });
1125 if (It != Metadata.end())
1126 It->second = Node;
1127 else
1128 Metadata.emplace_back(Args&: Kind, Args&: Node);
1129 }
1130
1131 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1132 /// nodes that are common to both.
1133 void intersect(const VPIRMetadata &MD);
1134
1135 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1136 MDNode *getMetadata(unsigned Kind) const {
1137 auto It =
1138 find_if(Range: Metadata, P: [Kind](const auto &P) { return P.first == Kind; });
1139 return It != Metadata.end() ? It->second : nullptr;
1140 }
1141
1142#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1143 /// Print metadata with node IDs.
1144 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1145#endif
1146};
1147
1148/// This is a concrete Recipe that models a single VPlan-level instruction.
1149/// While as any Recipe it may generate a sequence of IR instructions when
1150/// executed, these instructions would always form a single-def expression as
1151/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1152/// opcodes can take an optional mask. Masks may be assigned during
1153/// predication.
1154class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
1155 public VPIRMetadata {
1156 friend class VPlanSlp;
1157
1158public:
1159 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1160 enum {
1161 FirstOrderRecurrenceSplice =
1162 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1163 // values of a first-order recurrence.
1164 Not,
1165 SLPLoad,
1166 SLPStore,
1167 // Creates a mask where each lane is active (true) whilst the current
1168 // counter (first operand + index) is less than the second operand. i.e.
1169 // mask[i] = icmpt ult (op0 + i), op1
1170 // The size of the mask returned is VF * Multiplier (UF, third op).
1171 ActiveLaneMask,
1172 ExplicitVectorLength,
1173 CalculateTripCountMinusVF,
1174 // Increment the canonical IV separately for each unrolled part.
1175 CanonicalIVIncrementForPart,
1176 // Abstract instruction that compares two values and branches. This is
1177 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1178 BranchOnCount,
1179 BranchOnCond,
1180 // Branch with 2 boolean condition operands and 3 successors. If condition
1181 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1182 // successor 1; otherwise branches to successor 2. Expanded after region
1183 // dissolution into: (1) an OR of the two conditions branching to
1184 // middle.split or successor 2, and (2) middle.split branching to successor
1185 // 0 or successor 1 based on condition 0.
1186 BranchOnTwoConds,
1187 Broadcast,
1188 /// Given operands of (the same) struct type, creates a struct of fixed-
1189 /// width vectors each containing a struct field of all operands. The
1190 /// number of operands matches the element count of every vector.
1191 BuildStructVector,
1192 /// Creates a fixed-width vector containing all operands. The number of
1193 /// operands matches the vector element count.
1194 BuildVector,
1195 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1196 /// abstract VPInstruction whose single defined VPValue represents VF
1197 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1198 /// VPInstructions.
1199 Unpack,
1200 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1201 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1202 ComputeAnyOfResult,
1203 ComputeReductionResult,
1204 // Extracts the last part of its operand. Removed during unrolling.
1205 ExtractLastPart,
1206 // Extracts the last lane of its vector operand, per part.
1207 ExtractLastLane,
1208 // Extracts the second-to-last lane from its operand or the second-to-last
1209 // part if it is scalar. In the latter case, the recipe will be removed
1210 // during unrolling.
1211 ExtractPenultimateElement,
1212 LogicalAnd, // Non-poison propagating logical And.
1213 LogicalOr, // Non-poison propagating logical Or.
1214 // Add an offset in bytes (second operand) to a base pointer (first
1215 // operand). Only generates scalar values (either for the first lane only or
1216 // for all lanes, depending on its uses).
1217 PtrAdd,
1218 // Add a vector offset in bytes (second operand) to a scalar base pointer
1219 // (first operand).
1220 WidePtrAdd,
1221 // Returns a scalar boolean value, which is true if any lane of its
1222 // (boolean) vector operands is true. It produces the reduced value across
1223 // all unrolled iterations. Unrolling will add all copies of its original
1224 // operand as additional operands. AnyOf is poison-safe as all operands
1225 // will be frozen.
1226 AnyOf,
1227 // Calculates the first active lane index of the vector predicate operands.
1228 // It produces the lane index across all unrolled iterations. Unrolling will
1229 // add all copies of its original operand as additional operands.
1230 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1231 // result even with operands that are all zeroes.
1232 FirstActiveLane,
1233 // Calculates the last active lane index of the vector predicate operands.
1234 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1235 // tail-folding to extract the correct live-out value from the last active
1236 // iteration. It produces the lane index across all unrolled iterations.
1237 // Unrolling will add all copies of its original operand as additional
1238 // operands.
1239 LastActiveLane,
1240 // Returns a reversed vector for the operand.
1241 Reverse,
1242
1243 // The opcodes below are used for VPInstructionWithType.
1244 //
1245 /// Scale the first operand (vector step) by the second operand
1246 /// (scalar-step). Casts both operands to the result type if needed.
1247 WideIVStep,
1248 /// Start vector for reductions with 3 operands: the original start value,
1249 /// the identity value for the reduction and an integer indicating the
1250 /// scaling factor.
1251 ReductionStartVector,
1252 // Creates a step vector starting from 0 to VF with a step of 1.
1253 StepVector,
1254 /// Extracts a single lane (first operand) from a set of vector operands.
1255 /// The lane specifies an index into a vector formed by combining all vector
1256 /// operands (all operands after the first one).
1257 ExtractLane,
1258 /// Explicit user for the resume phi of the canonical induction in the main
1259 /// VPlan, used by the epilogue vector loop.
1260 ResumeForEpilogue,
1261 /// Extracts the lane from the first operand corresponding to the last
1262 /// active (non-zero) lane in the mask (second operand), or if no lanes
1263 /// were active in the mask, returns the default value (third operand).
1264 ExtractLastActive,
1265
1266 /// Returns the value for vscale.
1267 VScale,
1268 /// Compute the exiting value of a wide induction after vectorization, that
1269 /// is the value of the last lane of the induction increment (i.e. its
1270 /// backedge value). Takes the wide induction recipe and the original
1271 /// backedge value as operands.
1272 ExitingIVValue,
1273 OpsEnd = ExitingIVValue,
1274 };
1275
1276 /// Returns true if this VPInstruction generates scalar values for all lanes.
1277 /// Most VPInstructions generate a single value per part, either vector or
1278 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1279 /// values per all lanes, stemming from an original ingredient. This method
1280 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1281 /// underlying ingredient.
1282 bool doesGeneratePerAllLanes() const;
1283
1284 /// Return the number of operands determined by the opcode of the
1285 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1286 /// cannot be determined directly by the opcode.
1287 unsigned getNumOperandsForOpcode() const;
1288
1289private:
1290 typedef unsigned char OpcodeTy;
1291 OpcodeTy Opcode;
1292
1293 /// An optional name that can be used for the generated IR instruction.
1294 std::string Name;
1295
1296 /// Returns true if we can generate a scalar for the first lane only if
1297 /// needed.
1298 bool canGenerateScalarForFirstLane() const;
1299
1300 /// Utility methods serving execute(): generates a single vector instance of
1301 /// the modeled instruction. \returns the generated value. . In some cases an
1302 /// existing value is returned rather than a generated one.
1303 Value *generate(VPTransformState &State);
1304
1305 /// Returns true if the VPInstruction does not need masking.
1306 bool alwaysUnmasked() const {
1307 // For now only VPInstructions with underlying values use masks.
1308 // TODO: provide masks to VPInstructions w/o underlying values.
1309 if (!getUnderlyingValue())
1310 return true;
1311
1312 return Opcode == Instruction::PHI || Opcode == Instruction::GetElementPtr;
1313 }
1314
1315public:
1316 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1317 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1318 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1319
1320 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1321
1322 VPInstruction *clone() override {
1323 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1324 getDebugLoc(), Name);
1325 if (getUnderlyingValue())
1326 New->setUnderlyingValue(getUnderlyingInstr());
1327 return New;
1328 }
1329
1330 unsigned getOpcode() const { return Opcode; }
1331
1332 /// Generate the instruction.
1333 /// TODO: We currently execute only per-part unless a specific instance is
1334 /// provided.
1335 void execute(VPTransformState &State) override;
1336
1337 /// Return the cost of this VPInstruction.
1338 InstructionCost computeCost(ElementCount VF,
1339 VPCostContext &Ctx) const override;
1340
1341#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1342 /// Print the VPInstruction to dbgs() (for debugging).
1343 LLVM_DUMP_METHOD void dump() const;
1344#endif
1345
1346 bool hasResult() const {
1347 // CallInst may or may not have a result, depending on the called function.
1348 // Conservatively return calls have results for now.
1349 switch (getOpcode()) {
1350 case Instruction::Ret:
1351 case Instruction::Br:
1352 case Instruction::Store:
1353 case Instruction::Switch:
1354 case Instruction::IndirectBr:
1355 case Instruction::Resume:
1356 case Instruction::CatchRet:
1357 case Instruction::Unreachable:
1358 case Instruction::Fence:
1359 case Instruction::AtomicRMW:
1360 case VPInstruction::BranchOnCond:
1361 case VPInstruction::BranchOnTwoConds:
1362 case VPInstruction::BranchOnCount:
1363 return false;
1364 default:
1365 return true;
1366 }
1367 }
1368
1369 /// Returns true if the VPInstruction has a mask operand.
1370 bool isMasked() const {
1371 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1372 // VPInstructions without a fixed number of operands cannot be masked.
1373 if (NumOpsForOpcode == -1u)
1374 return false;
1375 return NumOpsForOpcode + 1 == getNumOperands();
1376 }
1377
1378 /// Returns the number of operands, excluding the mask if the VPInstruction is
1379 /// masked.
1380 unsigned getNumOperandsWithoutMask() const {
1381 return getNumOperands() - isMasked();
1382 }
1383
1384 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1385 void addMask(VPValue *Mask) {
1386 assert(!isMasked() && "recipe is already masked");
1387 if (alwaysUnmasked())
1388 return;
1389 addOperand(Operand: Mask);
1390 }
1391
1392 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1393 /// VPInstructions.
1394 VPValue *getMask() const {
1395 return isMasked() ? getOperand(N: getNumOperands() - 1) : nullptr;
1396 }
1397
1398 /// Returns an iterator range over the operands excluding the mask operand
1399 /// if present.
1400 iterator_range<operand_iterator> operandsWithoutMask() {
1401 return make_range(x: op_begin(), y: op_begin() + getNumOperandsWithoutMask());
1402 }
1403 iterator_range<const_operand_iterator> operandsWithoutMask() const {
1404 return make_range(x: op_begin(), y: op_begin() + getNumOperandsWithoutMask());
1405 }
1406
1407 /// Returns true if the underlying opcode may read from or write to memory.
1408 bool opcodeMayReadOrWriteFromMemory() const;
1409
1410 /// Returns true if the recipe only uses the first lane of operand \p Op.
1411 bool usesFirstLaneOnly(const VPValue *Op) const override;
1412
1413 /// Returns true if the recipe only uses the first part of operand \p Op.
1414 bool usesFirstPartOnly(const VPValue *Op) const override;
1415
1416 /// Returns true if this VPInstruction produces a scalar value from a vector,
1417 /// e.g. by performing a reduction or extracting a lane.
1418 bool isVectorToScalar() const;
1419
1420 /// Returns true if this VPInstruction's operands are single scalars and the
1421 /// result is also a single scalar.
1422 bool isSingleScalar() const;
1423
1424 /// Returns the symbolic name assigned to the VPInstruction.
1425 StringRef getName() const { return Name; }
1426
1427 /// Set the symbolic name for the VPInstruction.
1428 void setName(StringRef NewName) { Name = NewName.str(); }
1429
1430protected:
1431#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1432 /// Print the VPInstruction to \p O.
1433 void printRecipe(raw_ostream &O, const Twine &Indent,
1434 VPSlotTracker &SlotTracker) const override;
1435#endif
1436};
1437
1438/// A specialization of VPInstruction augmenting it with a dedicated result
1439/// type, to be used when the opcode and operands of the VPInstruction don't
1440/// directly determine the result type. Note that there is no separate recipe ID
1441/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1442/// distinguished purely by the opcode.
1443class VPInstructionWithType : public VPInstruction {
1444 /// Scalar result type produced by the recipe.
1445 Type *ResultTy;
1446
1447public:
1448 VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
1449 Type *ResultTy, const VPIRFlags &Flags = {},
1450 const VPIRMetadata &Metadata = {},
1451 DebugLoc DL = DebugLoc::getUnknown(),
1452 const Twine &Name = "")
1453 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1454 ResultTy(ResultTy) {}
1455
1456 static inline bool classof(const VPRecipeBase *R) {
1457 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1458 // type information.
1459 if (R->isScalarCast())
1460 return true;
1461 auto *VPI = dyn_cast<VPInstruction>(Val: R);
1462 if (!VPI)
1463 return false;
1464 switch (VPI->getOpcode()) {
1465 case VPInstruction::WideIVStep:
1466 case VPInstruction::StepVector:
1467 case VPInstruction::VScale:
1468 return true;
1469 default:
1470 return false;
1471 }
1472 }
1473
1474 static inline bool classof(const VPUser *R) {
1475 return isa<VPInstructionWithType>(Val: cast<VPRecipeBase>(Val: R));
1476 }
1477
1478 VPInstruction *clone() override {
1479 auto *New =
1480 new VPInstructionWithType(getOpcode(), operands(), getResultType(),
1481 *this, *this, getDebugLoc(), getName());
1482 New->setUnderlyingValue(getUnderlyingValue());
1483 return New;
1484 }
1485
1486 void execute(VPTransformState &State) override;
1487
1488 /// Return the cost of this VPInstruction.
1489 InstructionCost computeCost(ElementCount VF,
1490 VPCostContext &Ctx) const override {
1491 // TODO: Compute accurate cost after retiring the legacy cost model.
1492 return 0;
1493 }
1494
1495 Type *getResultType() const { return ResultTy; }
1496
1497protected:
1498#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1499 /// Print the recipe.
1500 void printRecipe(raw_ostream &O, const Twine &Indent,
1501 VPSlotTracker &SlotTracker) const override;
1502#endif
1503};
1504
1505/// Helper type to provide functions to access incoming values and blocks for
1506/// phi-like recipes.
1507class VPPhiAccessors {
1508protected:
1509 /// Return a VPRecipeBase* to the current object.
1510 virtual const VPRecipeBase *getAsRecipe() const = 0;
1511
1512public:
1513 virtual ~VPPhiAccessors() = default;
1514
1515 /// Returns the incoming VPValue with index \p Idx.
1516 VPValue *getIncomingValue(unsigned Idx) const {
1517 return getAsRecipe()->getOperand(N: Idx);
1518 }
1519
1520 /// Returns the incoming block with index \p Idx.
1521 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1522
1523 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1524 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1525
1526 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1527 /// block.
1528 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1529
1530 /// Returns the number of incoming values, also number of incoming blocks.
1531 virtual unsigned getNumIncoming() const {
1532 return getAsRecipe()->getNumOperands();
1533 }
1534
1535 /// Returns an interator range over the incoming values.
1536 VPUser::const_operand_range incoming_values() const {
1537 return make_range(x: getAsRecipe()->op_begin(),
1538 y: getAsRecipe()->op_begin() + getNumIncoming());
1539 }
1540
1541 using const_incoming_blocks_range = iterator_range<mapped_iterator<
1542 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1543
1544 /// Returns an iterator range over the incoming blocks.
1545 const_incoming_blocks_range incoming_blocks() const {
1546 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1547 return getIncomingBlock(Idx);
1548 };
1549 return map_range(C: index_range(0, getNumIncoming()), F: GetBlock);
1550 }
1551
1552 /// Returns an iterator range over pairs of incoming values and corresponding
1553 /// incoming blocks.
1554 detail::zippy<llvm::detail::zip_first, VPUser::const_operand_range,
1555 const_incoming_blocks_range>
1556 incoming_values_and_blocks() const {
1557 return zip_equal(t: incoming_values(), u: incoming_blocks());
1558 }
1559
1560 /// Removes the incoming value for \p IncomingBlock, which must be a
1561 /// predecessor.
1562 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1563
1564#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1565 /// Print the recipe.
1566 void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1567#endif
1568};
1569
1570struct LLVM_ABI_FOR_TEST VPPhi : public VPInstruction, public VPPhiAccessors {
1571 VPPhi(ArrayRef<VPValue *> Operands, const VPIRFlags &Flags, DebugLoc DL,
1572 const Twine &Name = "")
1573 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name) {}
1574
1575 static inline bool classof(const VPUser *U) {
1576 auto *VPI = dyn_cast<VPInstruction>(Val: U);
1577 return VPI && VPI->getOpcode() == Instruction::PHI;
1578 }
1579
1580 static inline bool classof(const VPValue *V) {
1581 auto *VPI = dyn_cast<VPInstruction>(Val: V);
1582 return VPI && VPI->getOpcode() == Instruction::PHI;
1583 }
1584
1585 static inline bool classof(const VPSingleDefRecipe *SDR) {
1586 auto *VPI = dyn_cast<VPInstruction>(Val: SDR);
1587 return VPI && VPI->getOpcode() == Instruction::PHI;
1588 }
1589
1590 VPPhi *clone() override {
1591 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1592 PhiR->setUnderlyingValue(getUnderlyingValue());
1593 return PhiR;
1594 }
1595
1596 void execute(VPTransformState &State) override;
1597
1598protected:
1599#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1600 /// Print the recipe.
1601 void printRecipe(raw_ostream &O, const Twine &Indent,
1602 VPSlotTracker &SlotTracker) const override;
1603#endif
1604
1605 const VPRecipeBase *getAsRecipe() const override { return this; }
1606};
1607
1608/// A recipe to wrap on original IR instruction not to be modified during
1609/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1610/// Expect PHIs, VPIRInstructions cannot have any operands.
1611class VPIRInstruction : public VPRecipeBase {
1612 Instruction &I;
1613
1614protected:
1615 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1616 /// subclasses may need to be created, e.g. VPIRPhi.
1617 VPIRInstruction(Instruction &I)
1618 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1619
1620public:
1621 ~VPIRInstruction() override = default;
1622
1623 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1624 /// VPIRInstruction.
1625 LLVM_ABI_FOR_TEST static VPIRInstruction *create(Instruction &I);
1626
1627 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1628
1629 VPIRInstruction *clone() override {
1630 auto *R = create(I);
1631 for (auto *Op : operands())
1632 R->addOperand(Operand: Op);
1633 return R;
1634 }
1635
1636 void execute(VPTransformState &State) override;
1637
1638 /// Return the cost of this VPIRInstruction.
1639 LLVM_ABI_FOR_TEST InstructionCost
1640 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1641
1642 Instruction &getInstruction() const { return I; }
1643
1644 bool usesScalars(const VPValue *Op) const override {
1645 assert(is_contained(operands(), Op) &&
1646 "Op must be an operand of the recipe");
1647 return true;
1648 }
1649
1650 bool usesFirstPartOnly(const VPValue *Op) const override {
1651 assert(is_contained(operands(), Op) &&
1652 "Op must be an operand of the recipe");
1653 return true;
1654 }
1655
1656 bool usesFirstLaneOnly(const VPValue *Op) const override {
1657 assert(is_contained(operands(), Op) &&
1658 "Op must be an operand of the recipe");
1659 return true;
1660 }
1661
1662protected:
1663#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1664 /// Print the recipe.
1665 void printRecipe(raw_ostream &O, const Twine &Indent,
1666 VPSlotTracker &SlotTracker) const override;
1667#endif
1668};
1669
1670/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1671/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1672/// allowed, and it is used to add a new incoming value for the single
1673/// predecessor VPBB.
1674struct LLVM_ABI_FOR_TEST VPIRPhi : public VPIRInstruction,
1675 public VPPhiAccessors {
1676 VPIRPhi(PHINode &PN) : VPIRInstruction(PN) {}
1677
1678 static inline bool classof(const VPRecipeBase *U) {
1679 auto *R = dyn_cast<VPIRInstruction>(Val: U);
1680 return R && isa<PHINode>(Val: R->getInstruction());
1681 }
1682
1683 PHINode &getIRPhi() { return cast<PHINode>(Val&: getInstruction()); }
1684
1685 void execute(VPTransformState &State) override;
1686
1687protected:
1688#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1689 /// Print the recipe.
1690 void printRecipe(raw_ostream &O, const Twine &Indent,
1691 VPSlotTracker &SlotTracker) const override;
1692#endif
1693
1694 const VPRecipeBase *getAsRecipe() const override { return this; }
1695};
1696
1697/// VPWidenRecipe is a recipe for producing a widened instruction using the
1698/// opcode and operands of the recipe. This recipe covers most of the
1699/// traditional vectorization cases where each recipe transforms into a
1700/// vectorized version of itself.
1701class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags,
1702 public VPIRMetadata {
1703 unsigned Opcode;
1704
1705public:
1706 VPWidenRecipe(Instruction &I, ArrayRef<VPValue *> Operands,
1707 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1708 DebugLoc DL = {})
1709 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1710 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1711 setUnderlyingValue(&I);
1712 }
1713
1714 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1715 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1716 DebugLoc DL = {})
1717 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1718 VPIRMetadata(Metadata), Opcode(Opcode) {}
1719
1720 ~VPWidenRecipe() override = default;
1721
1722 VPWidenRecipe *clone() override {
1723 if (auto *UV = getUnderlyingValue())
1724 return new VPWidenRecipe(*cast<Instruction>(Val: UV), operands(), *this, *this,
1725 getDebugLoc());
1726 return new VPWidenRecipe(Opcode, operands(), *this, *this, getDebugLoc());
1727 }
1728
1729 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1730
1731 /// Produce a widened instruction using the opcode and operands of the recipe,
1732 /// processing State.VF elements.
1733 void execute(VPTransformState &State) override;
1734
1735 /// Return the cost of this VPWidenRecipe.
1736 InstructionCost computeCost(ElementCount VF,
1737 VPCostContext &Ctx) const override;
1738
1739 unsigned getOpcode() const { return Opcode; }
1740
1741protected:
1742#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1743 /// Print the recipe.
1744 void printRecipe(raw_ostream &O, const Twine &Indent,
1745 VPSlotTracker &SlotTracker) const override;
1746#endif
1747
1748 /// Returns true if the recipe only uses the first lane of operand \p Op.
1749 bool usesFirstLaneOnly(const VPValue *Op) const override {
1750 assert(is_contained(operands(), Op) &&
1751 "Op must be an operand of the recipe");
1752 return Opcode == Instruction::Select && Op == getOperand(N: 0) &&
1753 Op->isDefinedOutsideLoopRegions();
1754 }
1755};
1756
1757/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1758class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
1759 /// Cast instruction opcode.
1760 Instruction::CastOps Opcode;
1761
1762 /// Result type for the cast.
1763 Type *ResultTy;
1764
1765public:
1766 VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1767 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1768 const VPIRMetadata &Metadata = {},
1769 DebugLoc DL = DebugLoc::getUnknown())
1770 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, Flags, DL),
1771 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1772 assert(flagsValidForOpcode(Opcode) &&
1773 "Set flags not supported for the provided opcode");
1774 assert(hasRequiredFlagsForOpcode(Opcode) &&
1775 "Opcode requires specific flags to be set");
1776 setUnderlyingValue(CI);
1777 }
1778
1779 ~VPWidenCastRecipe() override = default;
1780
1781 VPWidenCastRecipe *clone() override {
1782 return new VPWidenCastRecipe(Opcode, getOperand(N: 0), ResultTy,
1783 cast_or_null<CastInst>(Val: getUnderlyingValue()),
1784 *this, *this, getDebugLoc());
1785 }
1786
1787 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1788
1789 /// Produce widened copies of the cast.
1790 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1791
1792 /// Return the cost of this VPWidenCastRecipe.
1793 LLVM_ABI_FOR_TEST InstructionCost
1794 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1795
1796 Instruction::CastOps getOpcode() const { return Opcode; }
1797
1798 /// Returns the result type of the cast.
1799 Type *getResultType() const { return ResultTy; }
1800
1801protected:
1802#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1803 /// Print the recipe.
1804 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1805 VPSlotTracker &SlotTracker) const override;
1806#endif
1807};
1808
1809/// A recipe for widening vector intrinsics.
1810class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
1811 /// ID of the vector intrinsic to widen.
1812 Intrinsic::ID VectorIntrinsicID;
1813
1814 /// Scalar return type of the intrinsic.
1815 Type *ResultTy;
1816
1817 /// True if the intrinsic may read from memory.
1818 bool MayReadFromMemory;
1819
1820 /// True if the intrinsic may read write to memory.
1821 bool MayWriteToMemory;
1822
1823 /// True if the intrinsic may have side-effects.
1824 bool MayHaveSideEffects;
1825
1826public:
1827 VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
1828 ArrayRef<VPValue *> CallArguments, Type *Ty,
1829 const VPIRFlags &Flags = {},
1830 const VPIRMetadata &MD = {},
1831 DebugLoc DL = DebugLoc::getUnknown())
1832 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1833 Flags, DL),
1834 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1835 MayReadFromMemory(CI.mayReadFromMemory()),
1836 MayWriteToMemory(CI.mayWriteToMemory()),
1837 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1838 setUnderlyingValue(&CI);
1839 }
1840
1841 VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
1842 ArrayRef<VPValue *> CallArguments, Type *Ty,
1843 const VPIRFlags &Flags = {},
1844 const VPIRMetadata &Metadata = {},
1845 DebugLoc DL = DebugLoc::getUnknown())
1846 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1847 Flags, DL),
1848 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1849 ResultTy(Ty) {
1850 LLVMContext &Ctx = Ty->getContext();
1851 AttributeSet Attrs = Intrinsic::getFnAttributes(C&: Ctx, id: VectorIntrinsicID);
1852 MemoryEffects ME = Attrs.getMemoryEffects();
1853 MayReadFromMemory = !ME.onlyWritesMemory();
1854 MayWriteToMemory = !ME.onlyReadsMemory();
1855 MayHaveSideEffects = MayWriteToMemory ||
1856 !Attrs.hasAttribute(Kind: Attribute::NoUnwind) ||
1857 !Attrs.hasAttribute(Kind: Attribute::WillReturn);
1858 }
1859
1860 ~VPWidenIntrinsicRecipe() override = default;
1861
1862 VPWidenIntrinsicRecipe *clone() override {
1863 if (Value *CI = getUnderlyingValue())
1864 return new VPWidenIntrinsicRecipe(*cast<CallInst>(Val: CI), VectorIntrinsicID,
1865 operands(), ResultTy, *this, *this,
1866 getDebugLoc());
1867 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1868 *this, *this, getDebugLoc());
1869 }
1870
1871 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntrinsicSC)
1872
1873 /// Produce a widened version of the vector intrinsic.
1874 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1875
1876 /// Return the cost of this vector intrinsic.
1877 LLVM_ABI_FOR_TEST InstructionCost
1878 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1879
1880 /// Return the ID of the intrinsic.
1881 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1882
1883 /// Return the scalar return type of the intrinsic.
1884 Type *getResultType() const { return ResultTy; }
1885
1886 /// Return to name of the intrinsic as string.
1887 StringRef getIntrinsicName() const;
1888
1889 /// Returns true if the intrinsic may read from memory.
1890 bool mayReadFromMemory() const { return MayReadFromMemory; }
1891
1892 /// Returns true if the intrinsic may write to memory.
1893 bool mayWriteToMemory() const { return MayWriteToMemory; }
1894
1895 /// Returns true if the intrinsic may have side-effects.
1896 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1897
1898 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1899
1900protected:
1901#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1902 /// Print the recipe.
1903 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1904 VPSlotTracker &SlotTracker) const override;
1905#endif
1906};
1907
1908/// A recipe for widening Call instructions using library calls.
1909class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags,
1910 public VPIRMetadata {
1911 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1912 /// between a given VF and the chosen vectorized variant, so there will be a
1913 /// different VPlan for each VF with a valid variant.
1914 Function *Variant;
1915
1916public:
1917 VPWidenCallRecipe(Value *UV, Function *Variant,
1918 ArrayRef<VPValue *> CallArguments,
1919 const VPIRFlags &Flags = {},
1920 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1921 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments, Flags,
1922 DL),
1923 VPIRMetadata(Metadata), Variant(Variant) {
1924 setUnderlyingValue(UV);
1925 assert(
1926 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1927 "last operand must be the called function");
1928 }
1929
1930 ~VPWidenCallRecipe() override = default;
1931
1932 VPWidenCallRecipe *clone() override {
1933 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1934 *this, *this, getDebugLoc());
1935 }
1936
1937 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
1938
1939 /// Produce a widened version of the call instruction.
1940 void execute(VPTransformState &State) override;
1941
1942 /// Return the cost of this VPWidenCallRecipe.
1943 InstructionCost computeCost(ElementCount VF,
1944 VPCostContext &Ctx) const override;
1945
1946 Function *getCalledScalarFunction() const {
1947 return cast<Function>(Val: getOperand(N: getNumOperands() - 1)->getLiveInIRValue());
1948 }
1949
1950 operand_range args() { return drop_end(RangeOrContainer: operands()); }
1951 const_operand_range args() const { return drop_end(RangeOrContainer: operands()); }
1952
1953protected:
1954#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1955 /// Print the recipe.
1956 void printRecipe(raw_ostream &O, const Twine &Indent,
1957 VPSlotTracker &SlotTracker) const override;
1958#endif
1959};
1960
1961/// A recipe representing a sequence of load -> update -> store as part of
1962/// a histogram operation. This means there may be aliasing between vector
1963/// lanes, which is handled by the llvm.experimental.vector.histogram family
1964/// of intrinsics. The only update operations currently supported are
1965/// 'add' and 'sub' where the other term is loop-invariant.
1966class VPHistogramRecipe : public VPRecipeBase {
1967 /// Opcode of the update operation, currently either add or sub.
1968 unsigned Opcode;
1969
1970public:
1971 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1972 DebugLoc DL = DebugLoc::getUnknown())
1973 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
1974 Opcode(Opcode) {}
1975
1976 ~VPHistogramRecipe() override = default;
1977
1978 VPHistogramRecipe *clone() override {
1979 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1980 }
1981
1982 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
1983
1984 /// Produce a vectorized histogram operation.
1985 void execute(VPTransformState &State) override;
1986
1987 /// Return the cost of this VPHistogramRecipe.
1988 InstructionCost computeCost(ElementCount VF,
1989 VPCostContext &Ctx) const override;
1990
1991 unsigned getOpcode() const { return Opcode; }
1992
1993 /// Return the mask operand if one was provided, or a null pointer if all
1994 /// lanes should be executed unconditionally.
1995 VPValue *getMask() const {
1996 return getNumOperands() == 3 ? getOperand(N: 2) : nullptr;
1997 }
1998
1999protected:
2000#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2001 /// Print the recipe
2002 void printRecipe(raw_ostream &O, const Twine &Indent,
2003 VPSlotTracker &SlotTracker) const override;
2004#endif
2005};
2006
2007/// A recipe for handling GEP instructions.
2008class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
2009 Type *SourceElementTy;
2010
2011 bool isPointerLoopInvariant() const {
2012 return getOperand(N: 0)->isDefinedOutsideLoopRegions();
2013 }
2014
2015 bool isIndexLoopInvariant(unsigned I) const {
2016 return getOperand(N: I + 1)->isDefinedOutsideLoopRegions();
2017 }
2018
2019public:
2020 VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands,
2021 const VPIRFlags &Flags = {},
2022 DebugLoc DL = DebugLoc::getUnknown())
2023 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands, Flags, DL),
2024 SourceElementTy(GEP->getSourceElementType()) {
2025 setUnderlyingValue(GEP);
2026 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
2027 (void)Metadata;
2028 getMetadataToPropagate(Inst: GEP, Metadata);
2029 assert(Metadata.empty() && "unexpected metadata on GEP");
2030 }
2031
2032 ~VPWidenGEPRecipe() override = default;
2033
2034 VPWidenGEPRecipe *clone() override {
2035 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(Val: getUnderlyingInstr()),
2036 operands(), *this, getDebugLoc());
2037 }
2038
2039 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2040
2041 /// This recipe generates a GEP instruction.
2042 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2043
2044 /// Generate the gep nodes.
2045 void execute(VPTransformState &State) override;
2046
2047 Type *getSourceElementType() const { return SourceElementTy; }
2048
2049 /// Return the cost of this VPWidenGEPRecipe.
2050 InstructionCost computeCost(ElementCount VF,
2051 VPCostContext &Ctx) const override {
2052 // TODO: Compute accurate cost after retiring the legacy cost model.
2053 return 0;
2054 }
2055
2056 /// Returns true if the recipe only uses the first lane of operand \p Op.
2057 bool usesFirstLaneOnly(const VPValue *Op) const override;
2058
2059protected:
2060#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2061 /// Print the recipe.
2062 void printRecipe(raw_ostream &O, const Twine &Indent,
2063 VPSlotTracker &SlotTracker) const override;
2064#endif
2065};
2066
2067/// A recipe to compute a pointer to the last element of each part of a widened
2068/// memory access for widened memory accesses of SourceElementTy. Used for
2069/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2070/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2071/// unroller otherwise.
2072class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags {
2073 Type *SourceElementTy;
2074
2075 /// The constant stride of the pointer computed by this recipe, expressed in
2076 /// units of SourceElementTy.
2077 int64_t Stride;
2078
2079public:
2080 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2081 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2082 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2083 GEPFlags, DL),
2084 SourceElementTy(SourceElementTy), Stride(Stride) {
2085 assert(Stride < 0 && "Stride must be negative");
2086 }
2087
2088 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2089
2090 Type *getSourceElementType() const { return SourceElementTy; }
2091 int64_t getStride() const { return Stride; }
2092 VPValue *getPointer() const { return getOperand(N: 0); }
2093 VPValue *getVFValue() const { return getOperand(N: 1); }
2094 VPValue *getOffset() const {
2095 return getNumOperands() == 3 ? getOperand(N: 2) : nullptr;
2096 }
2097
2098 /// Adds the offset operand to the recipe.
2099 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2100 void materializeOffset(unsigned Part = 0);
2101
2102 void execute(VPTransformState &State) override;
2103
2104 bool usesFirstLaneOnly(const VPValue *Op) const override {
2105 assert(is_contained(operands(), Op) &&
2106 "Op must be an operand of the recipe");
2107 return true;
2108 }
2109
2110 /// Return the cost of this VPVectorPointerRecipe.
2111 InstructionCost computeCost(ElementCount VF,
2112 VPCostContext &Ctx) const override {
2113 // TODO: Compute accurate cost after retiring the legacy cost model.
2114 return 0;
2115 }
2116
2117 /// Returns true if the recipe only uses the first part of operand \p Op.
2118 bool usesFirstPartOnly(const VPValue *Op) const override {
2119 assert(is_contained(operands(), Op) &&
2120 "Op must be an operand of the recipe");
2121 assert(getNumOperands() <= 2 && "must have at most two operands");
2122 return true;
2123 }
2124
2125 VPVectorEndPointerRecipe *clone() override {
2126 auto *VEPR = new VPVectorEndPointerRecipe(
2127 getPointer(), getVFValue(), getSourceElementType(), getStride(),
2128 getGEPNoWrapFlags(), getDebugLoc());
2129 if (auto *Offset = getOffset())
2130 VEPR->addOperand(Operand: Offset);
2131 return VEPR;
2132 }
2133
2134protected:
2135#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2136 /// Print the recipe.
2137 void printRecipe(raw_ostream &O, const Twine &Indent,
2138 VPSlotTracker &SlotTracker) const override;
2139#endif
2140};
2141
2142/// A recipe to compute the pointers for widened memory accesses of \p
2143/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
2144/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
2145class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
2146 Type *SourceElementTy;
2147
2148public:
2149 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
2150 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2151 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC, Ptr, GEPFlags, DL),
2152 SourceElementTy(SourceElementTy) {}
2153
2154 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2155
2156 VPValue *getOffset() {
2157 return getNumOperands() == 2 ? getOperand(N: 1) : nullptr;
2158 }
2159
2160 void execute(VPTransformState &State) override;
2161
2162 Type *getSourceElementType() const { return SourceElementTy; }
2163
2164 bool usesFirstLaneOnly(const VPValue *Op) const override {
2165 assert(is_contained(operands(), Op) &&
2166 "Op must be an operand of the recipe");
2167 return true;
2168 }
2169
2170 /// Returns true if the recipe only uses the first part of operand \p Op.
2171 bool usesFirstPartOnly(const VPValue *Op) const override {
2172 assert(is_contained(operands(), Op) &&
2173 "Op must be an operand of the recipe");
2174 assert(getNumOperands() <= 2 && "must have at most two operands");
2175 return true;
2176 }
2177
2178 VPVectorPointerRecipe *clone() override {
2179 auto *Clone = new VPVectorPointerRecipe(getOperand(N: 0), SourceElementTy,
2180 getGEPNoWrapFlags(), getDebugLoc());
2181 if (auto *Off = getOffset())
2182 Clone->addOperand(Operand: Off);
2183 return Clone;
2184 }
2185
2186 /// Return the cost of this VPHeaderPHIRecipe.
2187 InstructionCost computeCost(ElementCount VF,
2188 VPCostContext &Ctx) const override {
2189 // TODO: Compute accurate cost after retiring the legacy cost model.
2190 return 0;
2191 }
2192
2193protected:
2194#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2195 /// Print the recipe.
2196 void printRecipe(raw_ostream &O, const Twine &Indent,
2197 VPSlotTracker &SlotTracker) const override;
2198#endif
2199};
2200
2201/// A pure virtual base class for all recipes modeling header phis, including
2202/// phis for first order recurrences, pointer inductions and reductions. The
2203/// start value is the first operand of the recipe and the incoming value from
2204/// the backedge is the second operand.
2205///
2206/// Inductions are modeled using the following sub-classes:
2207/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2208/// starting at a specified value (zero for the main vector loop, the resume
2209/// value for the epilogue vector loop) and stepping by 1. The induction
2210/// controls exiting of the vector loop by comparing against the vector trip
2211/// count. Produces a single scalar PHI for the induction value per
2212/// iteration.
2213/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2214/// floating point inductions with arbitrary start and step values. Produces
2215/// a vector PHI per-part.
2216/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2217/// pointer induction. Produces either a vector PHI per-part or scalar values
2218/// per-lane based on the canonical induction.
2219/// * VPFirstOrderRecurrencePHIRecipe
2220/// * VPReductionPHIRecipe
2221/// * VPActiveLaneMaskPHIRecipe
2222/// * VPEVLBasedIVPHIRecipe
2223class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe,
2224 public VPPhiAccessors {
2225protected:
2226 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2227 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2228 : VPSingleDefRecipe(VPRecipeID, Start, UnderlyingInstr, DL) {}
2229
2230 const VPRecipeBase *getAsRecipe() const override { return this; }
2231
2232public:
2233 ~VPHeaderPHIRecipe() override = default;
2234
2235 /// Method to support type inquiry through isa, cast, and dyn_cast.
2236 static inline bool classof(const VPRecipeBase *R) {
2237 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2238 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2239 }
2240 static inline bool classof(const VPValue *V) {
2241 return isa<VPHeaderPHIRecipe>(Val: V->getDefiningRecipe());
2242 }
2243 static inline bool classof(const VPSingleDefRecipe *R) {
2244 return isa<VPHeaderPHIRecipe>(Val: static_cast<const VPRecipeBase *>(R));
2245 }
2246
2247 /// Generate the phi nodes.
2248 void execute(VPTransformState &State) override = 0;
2249
2250 /// Return the cost of this header phi recipe.
2251 InstructionCost computeCost(ElementCount VF,
2252 VPCostContext &Ctx) const override;
2253
2254 /// Returns the start value of the phi, if one is set.
2255 VPValue *getStartValue() {
2256 return getNumOperands() == 0 ? nullptr : getOperand(N: 0);
2257 }
2258 VPValue *getStartValue() const {
2259 return getNumOperands() == 0 ? nullptr : getOperand(N: 0);
2260 }
2261
2262 /// Update the start value of the recipe.
2263 void setStartValue(VPValue *V) { setOperand(I: 0, New: V); }
2264
2265 /// Returns the incoming value from the loop backedge.
2266 virtual VPValue *getBackedgeValue() {
2267 return getOperand(N: 1);
2268 }
2269
2270 /// Update the incoming value from the loop backedge.
2271 void setBackedgeValue(VPValue *V) { setOperand(I: 1, New: V); }
2272
2273 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2274 /// to be a recipe.
2275 virtual VPRecipeBase &getBackedgeRecipe() {
2276 return *getBackedgeValue()->getDefiningRecipe();
2277 }
2278
2279protected:
2280#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2281 /// Print the recipe.
2282 void printRecipe(raw_ostream &O, const Twine &Indent,
2283 VPSlotTracker &SlotTracker) const override = 0;
2284#endif
2285};
2286
2287/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2288/// VPWidenPointerInductionRecipe), providing shared functionality, including
2289/// retrieving the step value, induction descriptor and original phi node.
2290class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
2291 const InductionDescriptor &IndDesc;
2292
2293public:
2294 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2295 VPValue *Step, const InductionDescriptor &IndDesc,
2296 DebugLoc DL)
2297 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2298 addOperand(Operand: Step);
2299 }
2300
2301 static inline bool classof(const VPRecipeBase *R) {
2302 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2303 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2304 }
2305
2306 static inline bool classof(const VPValue *V) {
2307 auto *R = V->getDefiningRecipe();
2308 return R && classof(R);
2309 }
2310
2311 static inline bool classof(const VPSingleDefRecipe *R) {
2312 return classof(R: static_cast<const VPRecipeBase *>(R));
2313 }
2314
2315 void execute(VPTransformState &State) override = 0;
2316
2317 /// Returns the start value of the induction.
2318 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
2319
2320 /// Returns the step value of the induction.
2321 VPValue *getStepValue() { return getOperand(N: 1); }
2322 const VPValue *getStepValue() const { return getOperand(N: 1); }
2323
2324 /// Update the step value of the recipe.
2325 void setStepValue(VPValue *V) { setOperand(I: 1, New: V); }
2326
2327 VPValue *getVFValue() { return getOperand(N: 2); }
2328 const VPValue *getVFValue() const { return getOperand(N: 2); }
2329
2330 /// Returns the number of incoming values, also number of incoming blocks.
2331 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2332 /// incoming value, its start value.
2333 unsigned getNumIncoming() const override { return 1; }
2334
2335 /// Returns the underlying PHINode if one exists, or null otherwise.
2336 PHINode *getPHINode() const {
2337 return cast_if_present<PHINode>(Val: getUnderlyingValue());
2338 }
2339
2340 /// Returns the induction descriptor for the recipe.
2341 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2342
2343 VPValue *getBackedgeValue() override {
2344 // TODO: All operands of base recipe must exist and be at same index in
2345 // derived recipe.
2346 llvm_unreachable(
2347 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2348 }
2349
2350 VPRecipeBase &getBackedgeRecipe() override {
2351 // TODO: All operands of base recipe must exist and be at same index in
2352 // derived recipe.
2353 llvm_unreachable(
2354 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2355 }
2356
2357 /// Returns true if the recipe only uses the first lane of operand \p Op.
2358 bool usesFirstLaneOnly(const VPValue *Op) const override {
2359 assert(is_contained(operands(), Op) &&
2360 "Op must be an operand of the recipe");
2361 // The recipe creates its own wide start value, so it only requests the
2362 // first lane of the operand.
2363 // TODO: Remove once creating the start value is modeled separately.
2364 return Op == getStartValue() || Op == getStepValue();
2365 }
2366};
2367
2368/// A recipe for handling phi nodes of integer and floating-point inductions,
2369/// producing their vector values. This is an abstract recipe and must be
2370/// converted to concrete recipes before executing.
2371class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe,
2372 public VPIRFlags {
2373 TruncInst *Trunc;
2374
2375 // If this recipe is unrolled it will have 2 additional operands.
2376 bool isUnrolled() const { return getNumOperands() == 5; }
2377
2378public:
2379 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step,
2380 VPValue *VF, const InductionDescriptor &IndDesc,
2381 const VPIRFlags &Flags, DebugLoc DL)
2382 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2383 Start, Step, IndDesc, DL),
2384 VPIRFlags(Flags), Trunc(nullptr) {
2385 addOperand(Operand: VF);
2386 }
2387
2388 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step,
2389 VPValue *VF, const InductionDescriptor &IndDesc,
2390 TruncInst *Trunc, const VPIRFlags &Flags,
2391 DebugLoc DL)
2392 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2393 Start, Step, IndDesc, DL),
2394 VPIRFlags(Flags), Trunc(Trunc) {
2395 addOperand(Operand: VF);
2396 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
2397 (void)Metadata;
2398 if (Trunc)
2399 getMetadataToPropagate(Inst: Trunc, Metadata);
2400 assert(Metadata.empty() && "unexpected metadata on Trunc");
2401 }
2402
2403 ~VPWidenIntOrFpInductionRecipe() override = default;
2404
2405 VPWidenIntOrFpInductionRecipe *clone() override {
2406 return new VPWidenIntOrFpInductionRecipe(
2407 getPHINode(), getStartValue(), getStepValue(), getVFValue(),
2408 getInductionDescriptor(), Trunc, *this, getDebugLoc());
2409 }
2410
2411 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2412
2413 void execute(VPTransformState &State) override {
2414 llvm_unreachable("cannot execute this recipe, should be expanded via "
2415 "expandVPWidenIntOrFpInductionRecipe");
2416 }
2417
2418 /// Returns the start value of the induction.
2419 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
2420
2421 /// If the recipe has been unrolled, return the VPValue for the induction
2422 /// increment, otherwise return null.
2423 VPValue *getSplatVFValue() const {
2424 return isUnrolled() ? getOperand(N: getNumOperands() - 2) : nullptr;
2425 }
2426
2427 /// Returns the number of incoming values, also number of incoming blocks.
2428 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2429 /// incoming value, its start value.
2430 unsigned getNumIncoming() const override { return 1; }
2431
2432 /// Returns the first defined value as TruncInst, if it is one or nullptr
2433 /// otherwise.
2434 TruncInst *getTruncInst() { return Trunc; }
2435 const TruncInst *getTruncInst() const { return Trunc; }
2436
2437 /// Returns true if the induction is canonical, i.e. starting at 0 and
2438 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2439 /// same type as the canonical induction.
2440 bool isCanonical() const;
2441
2442 /// Returns the scalar type of the induction.
2443 Type *getScalarType() const {
2444 return Trunc ? Trunc->getType() : getStartValue()->getType();
2445 }
2446
2447 /// Returns the VPValue representing the value of this induction at
2448 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2449 /// take place.
2450 VPValue *getLastUnrolledPartOperand() {
2451 return isUnrolled() ? getOperand(N: getNumOperands() - 1) : this;
2452 }
2453
2454protected:
2455#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2456 /// Print the recipe.
2457 void printRecipe(raw_ostream &O, const Twine &Indent,
2458 VPSlotTracker &SlotTracker) const override;
2459#endif
2460};
2461
2462class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
2463public:
2464 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2465 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2466 /// VF*UF.
2467 VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
2468 VPValue *NumUnrolledElems,
2469 const InductionDescriptor &IndDesc, DebugLoc DL)
2470 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2471 Start, Step, IndDesc, DL) {
2472 addOperand(Operand: NumUnrolledElems);
2473 }
2474
2475 ~VPWidenPointerInductionRecipe() override = default;
2476
2477 VPWidenPointerInductionRecipe *clone() override {
2478 return new VPWidenPointerInductionRecipe(
2479 cast<PHINode>(Val: getUnderlyingInstr()), getOperand(N: 0), getOperand(N: 1),
2480 getOperand(N: 2), getInductionDescriptor(), getDebugLoc());
2481 }
2482
2483 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2484
2485 /// Generate vector values for the pointer induction.
2486 void execute(VPTransformState &State) override {
2487 llvm_unreachable("cannot execute this recipe, should be expanded via "
2488 "expandVPWidenPointerInduction");
2489 };
2490
2491 /// Returns true if only scalar values will be generated.
2492 bool onlyScalarsGenerated(bool IsScalable);
2493
2494protected:
2495#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2496 /// Print the recipe.
2497 void printRecipe(raw_ostream &O, const Twine &Indent,
2498 VPSlotTracker &SlotTracker) const override;
2499#endif
2500};
2501
2502/// A recipe for widened phis. Incoming values are operands of the recipe and
2503/// their operand index corresponds to the incoming predecessor block. If the
2504/// recipe is placed in an entry block to a (non-replicate) region, it must have
2505/// exactly 2 incoming values, the first from the predecessor of the region and
2506/// the second from the exiting block of the region.
2507class LLVM_ABI_FOR_TEST VPWidenPHIRecipe : public VPSingleDefRecipe,
2508 public VPPhiAccessors {
2509 /// Name to use for the generated IR instruction for the widened phi.
2510 std::string Name;
2511
2512public:
2513 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2514 /// debug location \p DL.
2515 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2516 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2517 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, {}, Phi, DL),
2518 Name(Name.str()) {
2519 if (Start)
2520 addOperand(Operand: Start);
2521 }
2522
2523 VPWidenPHIRecipe *clone() override {
2524 auto *C = new VPWidenPHIRecipe(cast<PHINode>(Val: getUnderlyingValue()),
2525 getOperand(N: 0), getDebugLoc(), Name);
2526 for (VPValue *Op : llvm::drop_begin(RangeOrContainer: operands()))
2527 C->addOperand(Operand: Op);
2528 return C;
2529 }
2530
2531 ~VPWidenPHIRecipe() override = default;
2532
2533 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2534
2535 /// Generate the phi/select nodes.
2536 void execute(VPTransformState &State) override;
2537
2538 /// Return the cost of this VPWidenPHIRecipe.
2539 InstructionCost computeCost(ElementCount VF,
2540 VPCostContext &Ctx) const override;
2541
2542protected:
2543#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2544 /// Print the recipe.
2545 void printRecipe(raw_ostream &O, const Twine &Indent,
2546 VPSlotTracker &SlotTracker) const override;
2547#endif
2548
2549 const VPRecipeBase *getAsRecipe() const override { return this; }
2550};
2551
2552/// A recipe for handling first-order recurrence phis. The start value is the
2553/// first operand of the recipe and the incoming value from the backedge is the
2554/// second operand.
2555struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
2556 VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start,
2557 VPValue &BackedgeValue)
2558 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2559 &Start) {
2560 addOperand(Operand: &BackedgeValue);
2561 }
2562
2563 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2564
2565 VPFirstOrderRecurrencePHIRecipe *clone() override {
2566 return new VPFirstOrderRecurrencePHIRecipe(
2567 cast<PHINode>(Val: getUnderlyingInstr()), *getOperand(N: 0), *getOperand(N: 1));
2568 }
2569
2570 void execute(VPTransformState &State) override;
2571
2572 /// Return the cost of this first-order recurrence phi recipe.
2573 InstructionCost computeCost(ElementCount VF,
2574 VPCostContext &Ctx) const override;
2575
2576 /// Returns true if the recipe only uses the first lane of operand \p Op.
2577 bool usesFirstLaneOnly(const VPValue *Op) const override {
2578 assert(is_contained(operands(), Op) &&
2579 "Op must be an operand of the recipe");
2580 return Op == getStartValue();
2581 }
2582
2583protected:
2584#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2585 /// Print the recipe.
2586 void printRecipe(raw_ostream &O, const Twine &Indent,
2587 VPSlotTracker &SlotTracker) const override;
2588#endif
2589};
2590
2591/// Possible variants of a reduction.
2592
2593/// This reduction is ordered and in-loop.
2594struct RdxOrdered {};
2595/// This reduction is in-loop.
2596struct RdxInLoop {};
2597/// This reduction is unordered with the partial result scaled down by some
2598/// factor.
2599struct RdxUnordered {
2600 unsigned VFScaleFactor;
2601};
2602using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2603
2604inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2605 unsigned ScaleFactor) {
2606 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2607 if (Ordered)
2608 return RdxOrdered{};
2609 if (InLoop)
2610 return RdxInLoop{};
2611 return RdxUnordered{/*VFScaleFactor=*/.VFScaleFactor: ScaleFactor};
2612}
2613
2614/// A recipe for handling reduction phis. The start value is the first operand
2615/// of the recipe and the incoming value from the backedge is the second
2616/// operand.
2617class VPReductionPHIRecipe : public VPHeaderPHIRecipe, public VPIRFlags {
2618 /// The recurrence kind of the reduction.
2619 const RecurKind Kind;
2620
2621 ReductionStyle Style;
2622
2623 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2624 /// patterns for argmin/argmax).
2625 /// TODO: Also support cases where the phi itself has a single use, but its
2626 /// compare has multiple uses.
2627 bool HasUsesOutsideReductionChain;
2628
2629public:
2630 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2631 VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start,
2632 VPValue &BackedgeValue, ReductionStyle Style,
2633 const VPIRFlags &Flags,
2634 bool HasUsesOutsideReductionChain = false)
2635 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2636 VPIRFlags(Flags), Kind(Kind), Style(Style),
2637 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2638 addOperand(Operand: &BackedgeValue);
2639 }
2640
2641 ~VPReductionPHIRecipe() override = default;
2642
2643 VPReductionPHIRecipe *clone() override {
2644 return new VPReductionPHIRecipe(
2645 dyn_cast_or_null<PHINode>(Val: getUnderlyingValue()), getRecurrenceKind(),
2646 *getOperand(N: 0), *getBackedgeValue(), Style, *this,
2647 HasUsesOutsideReductionChain);
2648 }
2649
2650 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2651
2652 /// Generate the phi/select nodes.
2653 void execute(VPTransformState &State) override;
2654
2655 /// Get the factor that the VF of this recipe's output should be scaled by, or
2656 /// 1 if it isn't scaled.
2657 unsigned getVFScaleFactor() const {
2658 auto *Partial = std::get_if<RdxUnordered>(ptr: &Style);
2659 return Partial ? Partial->VFScaleFactor : 1;
2660 }
2661
2662 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2663 /// > 1.
2664 void setVFScaleFactor(unsigned ScaleFactor) {
2665 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2666 Style = RdxUnordered{.VFScaleFactor: ScaleFactor};
2667 }
2668
2669 /// Returns the number of incoming values, also number of incoming blocks.
2670 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2671 /// incoming value, its start value.
2672 unsigned getNumIncoming() const override { return 2; }
2673
2674 /// Returns the recurrence kind of the reduction.
2675 RecurKind getRecurrenceKind() const { return Kind; }
2676
2677 /// Returns true, if the phi is part of an ordered reduction.
2678 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(v: Style); }
2679
2680 /// Returns true if the phi is part of an in-loop reduction.
2681 bool isInLoop() const {
2682 return std::holds_alternative<RdxInLoop>(v: Style) ||
2683 std::holds_alternative<RdxOrdered>(v: Style);
2684 }
2685
2686 /// Returns true if the reduction outputs a vector with a scaled down VF.
2687 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2688
2689 /// Returns true, if the phi is part of a multi-use reduction.
2690 bool hasUsesOutsideReductionChain() const {
2691 return HasUsesOutsideReductionChain;
2692 }
2693
2694 /// Returns true if the recipe only uses the first lane of operand \p Op.
2695 bool usesFirstLaneOnly(const VPValue *Op) const override {
2696 assert(is_contained(operands(), Op) &&
2697 "Op must be an operand of the recipe");
2698 return isOrdered() || isInLoop();
2699 }
2700
2701protected:
2702#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2703 /// Print the recipe.
2704 void printRecipe(raw_ostream &O, const Twine &Indent,
2705 VPSlotTracker &SlotTracker) const override;
2706#endif
2707};
2708
2709/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2710/// instructions.
2711class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPRecipeWithIRFlags {
2712public:
2713 /// The blend operation is a User of the incoming values and of their
2714 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2715 /// be omitted (implied by passing an odd number of operands) in which case
2716 /// all other incoming values are merged into it.
2717 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands,
2718 const VPIRFlags &Flags, DebugLoc DL)
2719 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands, Flags, DL) {
2720 assert(Operands.size() >= 2 && "Expected at least two operands!");
2721 setUnderlyingValue(Phi);
2722 }
2723
2724 VPBlendRecipe *clone() override {
2725 return new VPBlendRecipe(cast_or_null<PHINode>(Val: getUnderlyingValue()),
2726 operands(), *this, getDebugLoc());
2727 }
2728
2729 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2730
2731 /// A normalized blend is one that has an odd number of operands, whereby the
2732 /// first operand does not have an associated mask.
2733 bool isNormalized() const { return getNumOperands() % 2; }
2734
2735 /// Return the number of incoming values, taking into account when normalized
2736 /// the first incoming value will have no mask.
2737 unsigned getNumIncomingValues() const {
2738 return (getNumOperands() + isNormalized()) / 2;
2739 }
2740
2741 /// Return incoming value number \p Idx.
2742 VPValue *getIncomingValue(unsigned Idx) const {
2743 return Idx == 0 ? getOperand(N: 0) : getOperand(N: Idx * 2 - isNormalized());
2744 }
2745
2746 /// Return mask number \p Idx.
2747 VPValue *getMask(unsigned Idx) const {
2748 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2749 return Idx == 0 ? getOperand(N: 1) : getOperand(N: Idx * 2 + !isNormalized());
2750 }
2751
2752 /// Set mask number \p Idx to \p V.
2753 void setMask(unsigned Idx, VPValue *V) {
2754 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2755 Idx == 0 ? setOperand(I: 1, New: V) : setOperand(I: Idx * 2 + !isNormalized(), New: V);
2756 }
2757
2758 void execute(VPTransformState &State) override {
2759 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2760 }
2761
2762 /// Return the cost of this VPWidenMemoryRecipe.
2763 InstructionCost computeCost(ElementCount VF,
2764 VPCostContext &Ctx) const override;
2765
2766 /// Returns true if the recipe only uses the first lane of operand \p Op.
2767 bool usesFirstLaneOnly(const VPValue *Op) const override {
2768 assert(is_contained(operands(), Op) &&
2769 "Op must be an operand of the recipe");
2770 // Recursing through Blend recipes only, must terminate at header phi's the
2771 // latest.
2772 return all_of(Range: users(),
2773 P: [this](VPUser *U) { return U->usesFirstLaneOnly(Op: this); });
2774 }
2775
2776protected:
2777#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2778 /// Print the recipe.
2779 void printRecipe(raw_ostream &O, const Twine &Indent,
2780 VPSlotTracker &SlotTracker) const override;
2781#endif
2782};
2783
2784/// A common base class for interleaved memory operations.
2785/// An Interleaved memory operation is a memory access method that combines
2786/// multiple strided loads/stores into a single wide load/store with shuffles.
2787/// The first operand is the start address. The optional operands are, in order,
2788/// the stored values and the mask.
2789class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase,
2790 public VPIRMetadata {
2791 const InterleaveGroup<Instruction> *IG;
2792
2793 /// Indicates if the interleave group is in a conditional block and requires a
2794 /// mask.
2795 bool HasMask = false;
2796
2797 /// Indicates if gaps between members of the group need to be masked out or if
2798 /// unusued gaps can be loaded speculatively.
2799 bool NeedsMaskForGaps = false;
2800
2801protected:
2802 VPInterleaveBase(const unsigned char SC,
2803 const InterleaveGroup<Instruction> *IG,
2804 ArrayRef<VPValue *> Operands,
2805 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2806 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2807 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2808 NeedsMaskForGaps(NeedsMaskForGaps) {
2809 // TODO: extend the masked interleaved-group support to reversed access.
2810 assert((!Mask || !IG->isReverse()) &&
2811 "Reversed masked interleave-group not supported.");
2812 if (StoredValues.empty()) {
2813 for (unsigned I = 0; I < IG->getFactor(); ++I)
2814 if (Instruction *Inst = IG->getMember(Index: I)) {
2815 assert(!Inst->getType()->isVoidTy() && "must have result");
2816 new VPRecipeValue(this, Inst);
2817 }
2818 } else {
2819 for (auto *SV : StoredValues)
2820 addOperand(Operand: SV);
2821 }
2822 if (Mask) {
2823 HasMask = true;
2824 addOperand(Operand: Mask);
2825 }
2826 }
2827
2828public:
2829 VPInterleaveBase *clone() override = 0;
2830
2831 static inline bool classof(const VPRecipeBase *R) {
2832 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
2833 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
2834 }
2835
2836 static inline bool classof(const VPUser *U) {
2837 auto *R = dyn_cast<VPRecipeBase>(Val: U);
2838 return R && classof(R);
2839 }
2840
2841 /// Return the address accessed by this recipe.
2842 VPValue *getAddr() const {
2843 return getOperand(N: 0); // Address is the 1st, mandatory operand.
2844 }
2845
2846 /// Return the mask used by this recipe. Note that a full mask is represented
2847 /// by a nullptr.
2848 VPValue *getMask() const {
2849 // Mask is optional and the last operand.
2850 return HasMask ? getOperand(N: getNumOperands() - 1) : nullptr;
2851 }
2852
2853 /// Return true if the access needs a mask because of the gaps.
2854 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2855
2856 const InterleaveGroup<Instruction> *getInterleaveGroup() const { return IG; }
2857
2858 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2859
2860 void execute(VPTransformState &State) override {
2861 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2862 }
2863
2864 /// Return the cost of this recipe.
2865 InstructionCost computeCost(ElementCount VF,
2866 VPCostContext &Ctx) const override;
2867
2868 /// Returns true if the recipe only uses the first lane of operand \p Op.
2869 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2870
2871 /// Returns the number of stored operands of this interleave group. Returns 0
2872 /// for load interleave groups.
2873 virtual unsigned getNumStoreOperands() const = 0;
2874
2875 /// Return the VPValues stored by this interleave group. If it is a load
2876 /// interleave group, return an empty ArrayRef.
2877 ArrayRef<VPValue *> getStoredValues() const {
2878 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
2879 getNumStoreOperands()};
2880 }
2881};
2882
2883/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2884/// or stores into one wide load/store and shuffles. The first operand of a
2885/// VPInterleave recipe is the address, followed by the stored values, followed
2886/// by an optional mask.
2887class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase {
2888public:
2889 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2890 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2891 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2892 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
2893 Mask, NeedsMaskForGaps, MD, DL) {}
2894
2895 ~VPInterleaveRecipe() override = default;
2896
2897 VPInterleaveRecipe *clone() override {
2898 return new VPInterleaveRecipe(getInterleaveGroup(), getAddr(),
2899 getStoredValues(), getMask(),
2900 needsMaskForGaps(), *this, getDebugLoc());
2901 }
2902
2903 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
2904
2905 /// Generate the wide load or store, and shuffles.
2906 void execute(VPTransformState &State) override;
2907
2908 bool usesFirstLaneOnly(const VPValue *Op) const override {
2909 assert(is_contained(operands(), Op) &&
2910 "Op must be an operand of the recipe");
2911 return Op == getAddr() && !llvm::is_contained(Range: getStoredValues(), Element: Op);
2912 }
2913
2914 unsigned getNumStoreOperands() const override {
2915 return getNumOperands() - (getMask() ? 2 : 1);
2916 }
2917
2918protected:
2919#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2920 /// Print the recipe.
2921 void printRecipe(raw_ostream &O, const Twine &Indent,
2922 VPSlotTracker &SlotTracker) const override;
2923#endif
2924};
2925
2926/// A recipe for interleaved memory operations with vector-predication
2927/// intrinsics. The first operand is the address, the second operand is the
2928/// explicit vector length. Stored values and mask are optional operands.
2929class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
2930public:
2931 VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
2932 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
2933 R.getInterleaveGroup(), {R.getAddr(), &EVL},
2934 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2935 R.getDebugLoc()) {
2936 assert(!getInterleaveGroup()->isReverse() &&
2937 "Reversed interleave-group with tail folding is not supported.");
2938 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2939 "supported for scalable vector.");
2940 }
2941
2942 ~VPInterleaveEVLRecipe() override = default;
2943
2944 VPInterleaveEVLRecipe *clone() override {
2945 llvm_unreachable("cloning not implemented yet");
2946 }
2947
2948 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
2949
2950 /// The VPValue of the explicit vector length.
2951 VPValue *getEVL() const { return getOperand(N: 1); }
2952
2953 /// Generate the wide load or store, and shuffles.
2954 void execute(VPTransformState &State) override;
2955
2956 /// The recipe only uses the first lane of the address, and EVL operand.
2957 bool usesFirstLaneOnly(const VPValue *Op) const override {
2958 assert(is_contained(operands(), Op) &&
2959 "Op must be an operand of the recipe");
2960 return (Op == getAddr() && !llvm::is_contained(Range: getStoredValues(), Element: Op)) ||
2961 Op == getEVL();
2962 }
2963
2964 unsigned getNumStoreOperands() const override {
2965 return getNumOperands() - (getMask() ? 3 : 2);
2966 }
2967
2968protected:
2969#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2970 /// Print the recipe.
2971 void printRecipe(raw_ostream &O, const Twine &Indent,
2972 VPSlotTracker &SlotTracker) const override;
2973#endif
2974};
2975
2976/// A recipe to represent inloop, ordered or partial reduction operations. It
2977/// performs a reduction on a vector operand into a scalar (vector in the case
2978/// of a partial reduction) value, and adds the result to a chain. The Operands
2979/// are {ChainOp, VecOp, [Condition]}.
2980class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
2981
2982 /// The recurrence kind for the reduction in question.
2983 RecurKind RdxKind;
2984 /// Whether the reduction is conditional.
2985 bool IsConditional = false;
2986 ReductionStyle Style;
2987
2988protected:
2989 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2990 FastMathFlags FMFs, Instruction *I,
2991 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2992 ReductionStyle Style, DebugLoc DL)
2993 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2994 Style(Style) {
2995 if (CondOp) {
2996 IsConditional = true;
2997 addOperand(Operand: CondOp);
2998 }
2999 setUnderlyingValue(I);
3000 }
3001
3002public:
3003 VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
3004 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3005 ReductionStyle Style, DebugLoc DL = DebugLoc::getUnknown())
3006 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3007 {ChainOp, VecOp}, CondOp, Style, DL) {}
3008
3009 VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs,
3010 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3011 ReductionStyle Style, DebugLoc DL = DebugLoc::getUnknown())
3012 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3013 {ChainOp, VecOp}, CondOp, Style, DL) {}
3014
3015 ~VPReductionRecipe() override = default;
3016
3017 VPReductionRecipe *clone() override {
3018 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
3019 getUnderlyingInstr(), getChainOp(), getVecOp(),
3020 getCondOp(), Style, getDebugLoc());
3021 }
3022
3023 static inline bool classof(const VPRecipeBase *R) {
3024 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3025 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3026 }
3027
3028 static inline bool classof(const VPUser *U) {
3029 auto *R = dyn_cast<VPRecipeBase>(Val: U);
3030 return R && classof(R);
3031 }
3032
3033 static inline bool classof(const VPValue *VPV) {
3034 const VPRecipeBase *R = VPV->getDefiningRecipe();
3035 return R && classof(R);
3036 }
3037
3038 static inline bool classof(const VPSingleDefRecipe *R) {
3039 return classof(R: static_cast<const VPRecipeBase *>(R));
3040 }
3041
3042 /// Generate the reduction in the loop.
3043 void execute(VPTransformState &State) override;
3044
3045 /// Return the cost of VPReductionRecipe.
3046 InstructionCost computeCost(ElementCount VF,
3047 VPCostContext &Ctx) const override;
3048
3049 /// Return the recurrence kind for the in-loop reduction.
3050 RecurKind getRecurrenceKind() const { return RdxKind; }
3051 /// Return true if the in-loop reduction is ordered.
3052 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(v: Style); };
3053 /// Return true if the in-loop reduction is conditional.
3054 bool isConditional() const { return IsConditional; };
3055 /// Returns true if the reduction outputs a vector with a scaled down VF.
3056 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3057 /// Returns true if the reduction is in-loop.
3058 bool isInLoop() const {
3059 return std::holds_alternative<RdxInLoop>(v: Style) ||
3060 std::holds_alternative<RdxOrdered>(v: Style);
3061 }
3062 /// The VPValue of the scalar Chain being accumulated.
3063 VPValue *getChainOp() const { return getOperand(N: 0); }
3064 /// The VPValue of the vector value to be reduced.
3065 VPValue *getVecOp() const { return getOperand(N: 1); }
3066 /// The VPValue of the condition for the block.
3067 VPValue *getCondOp() const {
3068 return isConditional() ? getOperand(N: getNumOperands() - 1) : nullptr;
3069 }
3070 /// Get the factor that the VF of this recipe's output should be scaled by, or
3071 /// 1 if it isn't scaled.
3072 unsigned getVFScaleFactor() const {
3073 auto *Partial = std::get_if<RdxUnordered>(ptr: &Style);
3074 return Partial ? Partial->VFScaleFactor : 1;
3075 }
3076
3077protected:
3078#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3079 /// Print the recipe.
3080 void printRecipe(raw_ostream &O, const Twine &Indent,
3081 VPSlotTracker &SlotTracker) const override;
3082#endif
3083};
3084
3085/// A recipe to represent inloop reduction operations with vector-predication
3086/// intrinsics, performing a reduction on a vector operand with the explicit
3087/// vector length (EVL) into a scalar value, and adding the result to a chain.
3088/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3089class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe {
3090public:
3091 VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp,
3092 DebugLoc DL = DebugLoc::getUnknown())
3093 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3094 R.getFastMathFlags(),
3095 cast_or_null<Instruction>(Val: R.getUnderlyingValue()),
3096 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3097 getReductionStyle(/*InLoop=*/InLoop: true, Ordered: R.isOrdered(), ScaleFactor: 1),
3098 DL) {}
3099
3100 ~VPReductionEVLRecipe() override = default;
3101
3102 VPReductionEVLRecipe *clone() override {
3103 llvm_unreachable("cloning not implemented yet");
3104 }
3105
3106 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3107
3108 /// Generate the reduction in the loop
3109 void execute(VPTransformState &State) override;
3110
3111 /// The VPValue of the explicit vector length.
3112 VPValue *getEVL() const { return getOperand(N: 2); }
3113
3114 /// Returns true if the recipe only uses the first lane of operand \p Op.
3115 bool usesFirstLaneOnly(const VPValue *Op) const override {
3116 assert(is_contained(operands(), Op) &&
3117 "Op must be an operand of the recipe");
3118 return Op == getEVL();
3119 }
3120
3121protected:
3122#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3123 /// Print the recipe.
3124 void printRecipe(raw_ostream &O, const Twine &Indent,
3125 VPSlotTracker &SlotTracker) const override;
3126#endif
3127};
3128
3129/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3130/// copies of the original scalar type, one per lane, instead of producing a
3131/// single copy of widened type for all lanes. If the instruction is known to be
3132/// a single scalar, only one copy, per lane zero, will be generated.
3133class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags,
3134 public VPIRMetadata {
3135 /// Indicator if only a single replica per lane is needed.
3136 bool IsSingleScalar;
3137
3138 /// Indicator if the replicas are also predicated.
3139 bool IsPredicated;
3140
3141public:
3142 VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands,
3143 bool IsSingleScalar, VPValue *Mask = nullptr,
3144 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3145 DebugLoc DL = DebugLoc::getUnknown())
3146 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands, Flags, DL),
3147 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3148 IsPredicated(Mask) {
3149 setUnderlyingValue(I);
3150 if (Mask)
3151 addOperand(Operand: Mask);
3152 }
3153
3154 ~VPReplicateRecipe() override = default;
3155
3156 VPReplicateRecipe *clone() override {
3157 auto *Copy = new VPReplicateRecipe(
3158 getUnderlyingInstr(), operands(), IsSingleScalar,
3159 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3160 Copy->transferFlags(Other&: *this);
3161 return Copy;
3162 }
3163
3164 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3165
3166 /// Generate replicas of the desired Ingredient. Replicas will be generated
3167 /// for all parts and lanes unless a specific part and lane are specified in
3168 /// the \p State.
3169 void execute(VPTransformState &State) override;
3170
3171 /// Return the cost of this VPReplicateRecipe.
3172 InstructionCost computeCost(ElementCount VF,
3173 VPCostContext &Ctx) const override;
3174
3175 bool isSingleScalar() const { return IsSingleScalar; }
3176
3177 bool isPredicated() const { return IsPredicated; }
3178
3179 /// Returns true if the recipe only uses the first lane of operand \p Op.
3180 bool usesFirstLaneOnly(const VPValue *Op) const override {
3181 assert(is_contained(operands(), Op) &&
3182 "Op must be an operand of the recipe");
3183 return isSingleScalar();
3184 }
3185
3186 /// Returns true if the recipe uses scalars of operand \p Op.
3187 bool usesScalars(const VPValue *Op) const override {
3188 assert(is_contained(operands(), Op) &&
3189 "Op must be an operand of the recipe");
3190 return true;
3191 }
3192
3193 /// Returns true if the recipe is used by a widened recipe via an intervening
3194 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3195 /// in a vector.
3196 bool shouldPack() const;
3197
3198 /// Return the mask of a predicated VPReplicateRecipe.
3199 VPValue *getMask() {
3200 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3201 return getOperand(N: getNumOperands() - 1);
3202 }
3203
3204 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3205
3206protected:
3207#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3208 /// Print the recipe.
3209 void printRecipe(raw_ostream &O, const Twine &Indent,
3210 VPSlotTracker &SlotTracker) const override;
3211#endif
3212};
3213
3214/// A recipe for generating conditional branches on the bits of a mask.
3215class LLVM_ABI_FOR_TEST VPBranchOnMaskRecipe : public VPRecipeBase {
3216public:
3217 VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
3218 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3219
3220 VPBranchOnMaskRecipe *clone() override {
3221 return new VPBranchOnMaskRecipe(getOperand(N: 0), getDebugLoc());
3222 }
3223
3224 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3225
3226 /// Generate the extraction of the appropriate bit from the block mask and the
3227 /// conditional branch.
3228 void execute(VPTransformState &State) override;
3229
3230 /// Return the cost of this VPBranchOnMaskRecipe.
3231 InstructionCost computeCost(ElementCount VF,
3232 VPCostContext &Ctx) const override;
3233
3234#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3235 /// Print the recipe.
3236 void printRecipe(raw_ostream &O, const Twine &Indent,
3237 VPSlotTracker &SlotTracker) const override {
3238 O << Indent << "BRANCH-ON-MASK ";
3239 printOperands(O, SlotTracker);
3240 }
3241#endif
3242
3243 /// Returns true if the recipe uses scalars of operand \p Op.
3244 bool usesScalars(const VPValue *Op) const override {
3245 assert(is_contained(operands(), Op) &&
3246 "Op must be an operand of the recipe");
3247 return true;
3248 }
3249};
3250
3251/// A recipe to combine multiple recipes into a single 'expression' recipe,
3252/// which should be considered a single entity for cost-modeling and transforms.
3253/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3254/// expression recipes, before execute. The individual expression recipes are
3255/// completely disconnected from the def-use graph of other recipes not part of
3256/// the expression. Def-use edges between pairs of expression recipes remain
3257/// intact, whereas every edge between an expression recipe and a recipe outside
3258/// the expression is elevated to connect the non-expression recipe with the
3259/// VPExpressionRecipe itself.
3260class VPExpressionRecipe : public VPSingleDefRecipe {
3261 /// Recipes included in this VPExpressionRecipe. This could contain
3262 /// duplicates.
3263 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3264
3265 /// Temporary VPValues used for external operands of the expression, i.e.
3266 /// operands not defined by recipes in the expression.
3267 SmallVector<VPValue *> LiveInPlaceholders;
3268
3269 enum class ExpressionTypes {
3270 /// Represents an inloop extended reduction operation, performing a
3271 /// reduction on an extended vector operand into a scalar value, and adding
3272 /// the result to a chain.
3273 ExtendedReduction,
3274 /// Represent an inloop multiply-accumulate reduction, multiplying the
3275 /// extended vector operands, performing a reduction.add on the result, and
3276 /// adding the scalar result to a chain.
3277 ExtMulAccReduction,
3278 /// Represent an inloop multiply-accumulate reduction, multiplying the
3279 /// vector operands, performing a reduction.add on the result, and adding
3280 /// the scalar result to a chain.
3281 MulAccReduction,
3282 /// Represent an inloop multiply-accumulate reduction, multiplying the
3283 /// extended vector operands, negating the multiplication, performing a
3284 /// reduction.add on the result, and adding the scalar result to a chain.
3285 ExtNegatedMulAccReduction,
3286 };
3287
3288 /// Type of the expression.
3289 ExpressionTypes ExpressionType;
3290
3291 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3292 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3293 /// in the expression) are replaced by temporary VPValues and the original
3294 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3295 /// as needed (excluding last) to ensure they are only used by other recipes
3296 /// in the expression.
3297 VPExpressionRecipe(ExpressionTypes ExpressionType,
3298 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3299
3300public:
3301 VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
3302 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3303 VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
3304 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3305 VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
3306 VPWidenRecipe *Mul, VPReductionRecipe *Red)
3307 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3308 {Ext0, Ext1, Mul, Red}) {}
3309 VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
3310 VPWidenRecipe *Mul, VPWidenRecipe *Sub,
3311 VPReductionRecipe *Red)
3312 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3313 {Ext0, Ext1, Mul, Sub, Red}) {
3314 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3315 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3316 "Expected an add reduction");
3317 assert(getNumOperands() >= 3 && "Expected at least three operands");
3318 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(Val: getOperand(N: 2));
3319 assert(SubConst && SubConst->isZero() &&
3320 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3321 }
3322
3323 ~VPExpressionRecipe() override {
3324 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3325 for (auto *R : reverse(C&: ExpressionRecipes)) {
3326 if (ExpressionRecipesSeen.insert(Ptr: R).second)
3327 delete R;
3328 }
3329 for (VPValue *T : LiveInPlaceholders)
3330 delete T;
3331 }
3332
3333 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3334
3335 VPExpressionRecipe *clone() override {
3336 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3337 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3338 for (auto *R : ExpressionRecipes)
3339 NewExpressiondRecipes.push_back(Elt: R->clone());
3340 for (auto *New : NewExpressiondRecipes) {
3341 for (const auto &[Idx, Old] : enumerate(First&: ExpressionRecipes))
3342 New->replaceUsesOfWith(From: Old, To: NewExpressiondRecipes[Idx]);
3343 // Update placeholder operands in the cloned recipe to use the external
3344 // operands, to be internalized when the cloned expression is constructed.
3345 for (const auto &[Placeholder, OutsideOp] :
3346 zip(t&: LiveInPlaceholders, u: operands()))
3347 New->replaceUsesOfWith(From: Placeholder, To: OutsideOp);
3348 }
3349 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3350 }
3351
3352 /// Return the VPValue to use to infer the result type of the recipe.
3353 VPValue *getOperandOfResultType() const {
3354 unsigned OpIdx =
3355 cast<VPReductionRecipe>(Val: ExpressionRecipes.back())->isConditional() ? 2
3356 : 1;
3357 return getOperand(N: getNumOperands() - OpIdx);
3358 }
3359
3360 /// Insert the recipes of the expression back into the VPlan, directly before
3361 /// the current recipe. Leaves the expression recipe empty, which must be
3362 /// removed before codegen.
3363 void decompose();
3364
3365 unsigned getVFScaleFactor() const {
3366 auto *PR = dyn_cast<VPReductionRecipe>(Val: ExpressionRecipes.back());
3367 return PR ? PR->getVFScaleFactor() : 1;
3368 }
3369
3370 /// Method for generating code, must not be called as this recipe is abstract.
3371 void execute(VPTransformState &State) override {
3372 llvm_unreachable("recipe must be removed before execute");
3373 }
3374
3375 InstructionCost computeCost(ElementCount VF,
3376 VPCostContext &Ctx) const override;
3377
3378 /// Returns true if this expression contains recipes that may read from or
3379 /// write to memory.
3380 bool mayReadOrWriteMemory() const;
3381
3382 /// Returns true if this expression contains recipes that may have side
3383 /// effects.
3384 bool mayHaveSideEffects() const;
3385
3386 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3387 bool isSingleScalar() const;
3388
3389protected:
3390#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3391 /// Print the recipe.
3392 void printRecipe(raw_ostream &O, const Twine &Indent,
3393 VPSlotTracker &SlotTracker) const override;
3394#endif
3395};
3396
3397/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3398/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3399/// order to merge values that are set under such a branch and feed their uses.
3400/// The phi nodes can be scalar or vector depending on the users of the value.
3401/// This recipe works in concert with VPBranchOnMaskRecipe.
3402class LLVM_ABI_FOR_TEST VPPredInstPHIRecipe : public VPSingleDefRecipe {
3403public:
3404 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3405 /// nodes after merging back from a Branch-on-Mask.
3406 VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
3407 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV, DL) {}
3408 ~VPPredInstPHIRecipe() override = default;
3409
3410 VPPredInstPHIRecipe *clone() override {
3411 return new VPPredInstPHIRecipe(getOperand(N: 0), getDebugLoc());
3412 }
3413
3414 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3415
3416 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3417 /// retain SSA form.
3418 void execute(VPTransformState &State) override;
3419
3420 /// Return the cost of this VPPredInstPHIRecipe.
3421 InstructionCost computeCost(ElementCount VF,
3422 VPCostContext &Ctx) const override {
3423 // TODO: Compute accurate cost after retiring the legacy cost model.
3424 return 0;
3425 }
3426
3427 /// Returns true if the recipe uses scalars of operand \p Op.
3428 bool usesScalars(const VPValue *Op) const override {
3429 assert(is_contained(operands(), Op) &&
3430 "Op must be an operand of the recipe");
3431 return true;
3432 }
3433
3434protected:
3435#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3436 /// Print the recipe.
3437 void printRecipe(raw_ostream &O, const Twine &Indent,
3438 VPSlotTracker &SlotTracker) const override;
3439#endif
3440};
3441
3442/// A common base class for widening memory operations. An optional mask can be
3443/// provided as the last operand.
3444class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
3445 public VPIRMetadata {
3446protected:
3447 Instruction &Ingredient;
3448
3449 /// Alignment information for this memory access.
3450 Align Alignment;
3451
3452 /// Whether the accessed addresses are consecutive.
3453 bool Consecutive;
3454
3455 /// Whether the consecutive accessed addresses are in reverse order.
3456 bool Reverse;
3457
3458 /// Whether the memory access is masked.
3459 bool IsMasked = false;
3460
3461 void setMask(VPValue *Mask) {
3462 assert(!IsMasked && "cannot re-set mask");
3463 if (!Mask)
3464 return;
3465 addOperand(Operand: Mask);
3466 IsMasked = true;
3467 }
3468
3469 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3470 std::initializer_list<VPValue *> Operands,
3471 bool Consecutive, bool Reverse,
3472 const VPIRMetadata &Metadata, DebugLoc DL)
3473 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3474 Alignment(getLoadStoreAlignment(I: &I)), Consecutive(Consecutive),
3475 Reverse(Reverse) {
3476 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3477 assert((isa<VPVectorEndPointerRecipe>(getAddr()) || !Reverse) &&
3478 "Reversed acccess without VPVectorEndPointerRecipe address?");
3479 }
3480
3481public:
3482 VPWidenMemoryRecipe *clone() override {
3483 llvm_unreachable("cloning not supported");
3484 }
3485
3486 static inline bool classof(const VPRecipeBase *R) {
3487 return R->getVPRecipeID() == VPRecipeBase::VPWidenLoadSC ||
3488 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreSC ||
3489 R->getVPRecipeID() == VPRecipeBase::VPWidenLoadEVLSC ||
3490 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreEVLSC;
3491 }
3492
3493 static inline bool classof(const VPUser *U) {
3494 auto *R = dyn_cast<VPRecipeBase>(Val: U);
3495 return R && classof(R);
3496 }
3497
3498 /// Return whether the loaded-from / stored-to addresses are consecutive.
3499 bool isConsecutive() const { return Consecutive; }
3500
3501 /// Return whether the consecutive loaded/stored addresses are in reverse
3502 /// order.
3503 bool isReverse() const { return Reverse; }
3504
3505 /// Return the address accessed by this recipe.
3506 VPValue *getAddr() const { return getOperand(N: 0); }
3507
3508 /// Returns true if the recipe is masked.
3509 bool isMasked() const { return IsMasked; }
3510
3511 /// Return the mask used by this recipe. Note that a full mask is represented
3512 /// by a nullptr.
3513 VPValue *getMask() const {
3514 // Mask is optional and therefore the last operand.
3515 return isMasked() ? getOperand(N: getNumOperands() - 1) : nullptr;
3516 }
3517
3518 /// Returns the alignment of the memory access.
3519 Align getAlign() const { return Alignment; }
3520
3521 /// Generate the wide load/store.
3522 void execute(VPTransformState &State) override {
3523 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3524 }
3525
3526 /// Return the cost of this VPWidenMemoryRecipe.
3527 InstructionCost computeCost(ElementCount VF,
3528 VPCostContext &Ctx) const override;
3529
3530 Instruction &getIngredient() const { return Ingredient; }
3531};
3532
3533/// A recipe for widening load operations, using the address to load from and an
3534/// optional mask.
3535struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
3536 public VPRecipeValue {
3537 VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
3538 bool Consecutive, bool Reverse,
3539 const VPIRMetadata &Metadata, DebugLoc DL)
3540 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadSC, Load, {Addr},
3541 Consecutive, Reverse, Metadata, DL),
3542 VPRecipeValue(this, &Load) {
3543 setMask(Mask);
3544 }
3545
3546 VPWidenLoadRecipe *clone() override {
3547 return new VPWidenLoadRecipe(cast<LoadInst>(Val&: Ingredient), getAddr(),
3548 getMask(), Consecutive, Reverse, *this,
3549 getDebugLoc());
3550 }
3551
3552 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3553
3554 /// Generate a wide load or gather.
3555 void execute(VPTransformState &State) override;
3556
3557 /// Returns true if the recipe only uses the first lane of operand \p Op.
3558 bool usesFirstLaneOnly(const VPValue *Op) const override {
3559 assert(is_contained(operands(), Op) &&
3560 "Op must be an operand of the recipe");
3561 // Widened, consecutive loads operations only demand the first lane of
3562 // their address.
3563 return Op == getAddr() && isConsecutive();
3564 }
3565
3566protected:
3567#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3568 /// Print the recipe.
3569 void printRecipe(raw_ostream &O, const Twine &Indent,
3570 VPSlotTracker &SlotTracker) const override;
3571#endif
3572};
3573
3574/// A recipe for widening load operations with vector-predication intrinsics,
3575/// using the address to load from, the explicit vector length and an optional
3576/// mask.
3577struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe,
3578 public VPRecipeValue {
3579 VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL,
3580 VPValue *Mask)
3581 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadEVLSC, L.getIngredient(),
3582 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3583 L.getDebugLoc()),
3584 VPRecipeValue(this, &getIngredient()) {
3585 setMask(Mask);
3586 }
3587
3588 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3589
3590 /// Return the EVL operand.
3591 VPValue *getEVL() const { return getOperand(N: 1); }
3592
3593 /// Generate the wide load or gather.
3594 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3595
3596 /// Return the cost of this VPWidenLoadEVLRecipe.
3597 LLVM_ABI_FOR_TEST InstructionCost
3598 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3599
3600 /// Returns true if the recipe only uses the first lane of operand \p Op.
3601 bool usesFirstLaneOnly(const VPValue *Op) const override {
3602 assert(is_contained(operands(), Op) &&
3603 "Op must be an operand of the recipe");
3604 // Widened loads only demand the first lane of EVL and consecutive loads
3605 // only demand the first lane of their address.
3606 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3607 }
3608
3609protected:
3610#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3611 /// Print the recipe.
3612 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3613 VPSlotTracker &SlotTracker) const override;
3614#endif
3615};
3616
3617/// A recipe for widening store operations, using the stored value, the address
3618/// to store to and an optional mask.
3619struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
3620 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3621 VPValue *Mask, bool Consecutive, bool Reverse,
3622 const VPIRMetadata &Metadata, DebugLoc DL)
3623 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreSC, Store,
3624 {Addr, StoredVal}, Consecutive, Reverse, Metadata,
3625 DL) {
3626 setMask(Mask);
3627 }
3628
3629 VPWidenStoreRecipe *clone() override {
3630 return new VPWidenStoreRecipe(cast<StoreInst>(Val&: Ingredient), getAddr(),
3631 getStoredValue(), getMask(), Consecutive,
3632 Reverse, *this, getDebugLoc());
3633 }
3634
3635 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3636
3637 /// Return the value stored by this recipe.
3638 VPValue *getStoredValue() const { return getOperand(N: 1); }
3639
3640 /// Generate a wide store or scatter.
3641 void execute(VPTransformState &State) override;
3642
3643 /// Returns true if the recipe only uses the first lane of operand \p Op.
3644 bool usesFirstLaneOnly(const VPValue *Op) const override {
3645 assert(is_contained(operands(), Op) &&
3646 "Op must be an operand of the recipe");
3647 // Widened, consecutive stores only demand the first lane of their address,
3648 // unless the same operand is also stored.
3649 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3650 }
3651
3652protected:
3653#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3654 /// Print the recipe.
3655 void printRecipe(raw_ostream &O, const Twine &Indent,
3656 VPSlotTracker &SlotTracker) const override;
3657#endif
3658};
3659
3660/// A recipe for widening store operations with vector-predication intrinsics,
3661/// using the value to store, the address to store to, the explicit vector
3662/// length and an optional mask.
3663struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
3664 VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr,
3665 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3666 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreEVLSC, S.getIngredient(),
3667 {Addr, StoredVal, &EVL}, S.isConsecutive(),
3668 S.isReverse(), S, S.getDebugLoc()) {
3669 setMask(Mask);
3670 }
3671
3672 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3673
3674 /// Return the address accessed by this recipe.
3675 VPValue *getStoredValue() const { return getOperand(N: 1); }
3676
3677 /// Return the EVL operand.
3678 VPValue *getEVL() const { return getOperand(N: 2); }
3679
3680 /// Generate the wide store or scatter.
3681 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3682
3683 /// Return the cost of this VPWidenStoreEVLRecipe.
3684 LLVM_ABI_FOR_TEST InstructionCost
3685 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3686
3687 /// Returns true if the recipe only uses the first lane of operand \p Op.
3688 bool usesFirstLaneOnly(const VPValue *Op) const override {
3689 assert(is_contained(operands(), Op) &&
3690 "Op must be an operand of the recipe");
3691 if (Op == getEVL()) {
3692 assert(getStoredValue() != Op && "unexpected store of EVL");
3693 return true;
3694 }
3695 // Widened, consecutive memory operations only demand the first lane of
3696 // their address, unless the same operand is also stored. That latter can
3697 // happen with opaque pointers.
3698 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3699 }
3700
3701protected:
3702#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3703 /// Print the recipe.
3704 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3705 VPSlotTracker &SlotTracker) const override;
3706#endif
3707};
3708
3709/// Recipe to expand a SCEV expression.
3710class VPExpandSCEVRecipe : public VPSingleDefRecipe {
3711 const SCEV *Expr;
3712
3713public:
3714 VPExpandSCEVRecipe(const SCEV *Expr)
3715 : VPSingleDefRecipe(VPRecipeBase::VPExpandSCEVSC, {}), Expr(Expr) {}
3716
3717 ~VPExpandSCEVRecipe() override = default;
3718
3719 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3720
3721 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
3722
3723 void execute(VPTransformState &State) override {
3724 llvm_unreachable("SCEV expressions must be expanded before final execute");
3725 }
3726
3727 /// Return the cost of this VPExpandSCEVRecipe.
3728 InstructionCost computeCost(ElementCount VF,
3729 VPCostContext &Ctx) const override {
3730 // TODO: Compute accurate cost after retiring the legacy cost model.
3731 return 0;
3732 }
3733
3734 const SCEV *getSCEV() const { return Expr; }
3735
3736protected:
3737#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3738 /// Print the recipe.
3739 void printRecipe(raw_ostream &O, const Twine &Indent,
3740 VPSlotTracker &SlotTracker) const override;
3741#endif
3742};
3743
3744/// Canonical scalar induction phi of the vector loop. Starting at the specified
3745/// start value (either 0 or the resume value when vectorizing the epilogue
3746/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3747/// canonical induction variable.
3748class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
3749public:
3750 VPCanonicalIVPHIRecipe(VPIRValue *StartV, DebugLoc DL)
3751 : VPHeaderPHIRecipe(VPRecipeBase::VPCanonicalIVPHISC, nullptr, StartV,
3752 DL) {}
3753
3754 ~VPCanonicalIVPHIRecipe() override = default;
3755
3756 VPCanonicalIVPHIRecipe *clone() override {
3757 auto *R = new VPCanonicalIVPHIRecipe(getStartValue(), getDebugLoc());
3758 R->addOperand(Operand: getBackedgeValue());
3759 return R;
3760 }
3761
3762 VP_CLASSOF_IMPL(VPRecipeBase::VPCanonicalIVPHISC)
3763
3764 void execute(VPTransformState &State) override {
3765 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3766 "scalar phi recipe");
3767 }
3768
3769 /// Returns the start value of the canonical induction.
3770 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
3771
3772 /// Returns the scalar type of the induction.
3773 Type *getScalarType() const { return getStartValue()->getType(); }
3774
3775 /// Returns true if the recipe only uses the first lane of operand \p Op.
3776 bool usesFirstLaneOnly(const VPValue *Op) const override {
3777 assert(is_contained(operands(), Op) &&
3778 "Op must be an operand of the recipe");
3779 return true;
3780 }
3781
3782 /// Returns true if the recipe only uses the first part of operand \p Op.
3783 bool usesFirstPartOnly(const VPValue *Op) const override {
3784 assert(is_contained(operands(), Op) &&
3785 "Op must be an operand of the recipe");
3786 return true;
3787 }
3788
3789 /// Return the cost of this VPCanonicalIVPHIRecipe.
3790 InstructionCost computeCost(ElementCount VF,
3791 VPCostContext &Ctx) const override {
3792 // For now, match the behavior of the legacy cost model.
3793 return 0;
3794 }
3795
3796protected:
3797#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3798 /// Print the recipe.
3799 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3800 VPSlotTracker &SlotTracker) const override;
3801#endif
3802};
3803
3804/// A recipe for generating the active lane mask for the vector loop that is
3805/// used to predicate the vector operations.
3806class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
3807public:
3808 VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
3809 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
3810 StartMask, DL) {}
3811
3812 ~VPActiveLaneMaskPHIRecipe() override = default;
3813
3814 VPActiveLaneMaskPHIRecipe *clone() override {
3815 auto *R = new VPActiveLaneMaskPHIRecipe(getOperand(N: 0), getDebugLoc());
3816 if (getNumOperands() == 2)
3817 R->addOperand(Operand: getOperand(N: 1));
3818 return R;
3819 }
3820
3821 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
3822
3823 /// Generate the active lane mask phi of the vector loop.
3824 void execute(VPTransformState &State) override;
3825
3826protected:
3827#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3828 /// Print the recipe.
3829 void printRecipe(raw_ostream &O, const Twine &Indent,
3830 VPSlotTracker &SlotTracker) const override;
3831#endif
3832};
3833
3834/// A recipe for generating the phi node tracking the current scalar iteration
3835/// index. It starts at the start value of the canonical induction and gets
3836/// incremented by the number of scalar iterations processed by the vector loop
3837/// iteration. The increment does not have to be loop invariant.
3838class VPCurrentIterationPHIRecipe : public VPHeaderPHIRecipe {
3839public:
3840 VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
3841 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
3842 StartIV, DL) {}
3843
3844 ~VPCurrentIterationPHIRecipe() override = default;
3845
3846 VPCurrentIterationPHIRecipe *clone() override {
3847 llvm_unreachable("cloning not implemented yet");
3848 }
3849
3850 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
3851
3852 void execute(VPTransformState &State) override {
3853 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3854 "scalar phi recipe");
3855 }
3856
3857 /// Return the cost of this VPCurrentIterationPHIRecipe.
3858 InstructionCost computeCost(ElementCount VF,
3859 VPCostContext &Ctx) const override {
3860 // For now, match the behavior of the legacy cost model.
3861 return 0;
3862 }
3863
3864 /// Returns true if the recipe only uses the first lane of operand \p Op.
3865 bool usesFirstLaneOnly(const VPValue *Op) const override {
3866 assert(is_contained(operands(), Op) &&
3867 "Op must be an operand of the recipe");
3868 return true;
3869 }
3870
3871protected:
3872#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3873 /// Print the recipe.
3874 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3875 VPSlotTracker &SlotTracker) const override;
3876#endif
3877};
3878
3879/// A Recipe for widening the canonical induction variable of the vector loop.
3880class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe,
3881 public VPUnrollPartAccessor<1> {
3882public:
3883 VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
3884 : VPSingleDefRecipe(VPRecipeBase::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3885
3886 ~VPWidenCanonicalIVRecipe() override = default;
3887
3888 VPWidenCanonicalIVRecipe *clone() override {
3889 return new VPWidenCanonicalIVRecipe(
3890 cast<VPCanonicalIVPHIRecipe>(Val: getOperand(N: 0)));
3891 }
3892
3893 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
3894
3895 /// Generate a canonical vector induction variable of the vector loop, with
3896 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3897 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3898 void execute(VPTransformState &State) override;
3899
3900 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3901 InstructionCost computeCost(ElementCount VF,
3902 VPCostContext &Ctx) const override {
3903 // TODO: Compute accurate cost after retiring the legacy cost model.
3904 return 0;
3905 }
3906
3907protected:
3908#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3909 /// Print the recipe.
3910 void printRecipe(raw_ostream &O, const Twine &Indent,
3911 VPSlotTracker &SlotTracker) const override;
3912#endif
3913};
3914
3915/// A recipe for converting the input value \p IV value to the corresponding
3916/// value of an IV with different start and step values, using Start + IV *
3917/// Step.
3918class VPDerivedIVRecipe : public VPSingleDefRecipe {
3919 /// Kind of the induction.
3920 const InductionDescriptor::InductionKind Kind;
3921 /// If not nullptr, the floating point induction binary operator. Must be set
3922 /// for floating point inductions.
3923 const FPMathOperator *FPBinOp;
3924
3925 /// Name to use for the generated IR instruction for the derived IV.
3926 std::string Name;
3927
3928public:
3929 VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start,
3930 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3931 const Twine &Name = "")
3932 : VPDerivedIVRecipe(
3933 IndDesc.getKind(),
3934 dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp()),
3935 Start, CanonicalIV, Step, Name) {}
3936
3937 VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
3938 const FPMathOperator *FPBinOp, VPIRValue *Start,
3939 VPValue *IV, VPValue *Step, const Twine &Name = "")
3940 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step}),
3941 Kind(Kind), FPBinOp(FPBinOp), Name(Name.str()) {}
3942
3943 ~VPDerivedIVRecipe() override = default;
3944
3945 VPDerivedIVRecipe *clone() override {
3946 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(N: 1),
3947 getStepValue());
3948 }
3949
3950 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
3951
3952 /// Generate the transformed value of the induction at offset StartValue (1.
3953 /// operand) + IV (2. operand) * StepValue (3, operand).
3954 void execute(VPTransformState &State) override;
3955
3956 /// Return the cost of this VPDerivedIVRecipe.
3957 InstructionCost computeCost(ElementCount VF,
3958 VPCostContext &Ctx) const override {
3959 // TODO: Compute accurate cost after retiring the legacy cost model.
3960 return 0;
3961 }
3962
3963 Type *getScalarType() const { return getStartValue()->getType(); }
3964
3965 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
3966 VPValue *getStepValue() const { return getOperand(N: 2); }
3967
3968 /// Returns true if the recipe only uses the first lane of operand \p Op.
3969 bool usesFirstLaneOnly(const VPValue *Op) const override {
3970 assert(is_contained(operands(), Op) &&
3971 "Op must be an operand of the recipe");
3972 return true;
3973 }
3974
3975protected:
3976#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3977 /// Print the recipe.
3978 void printRecipe(raw_ostream &O, const Twine &Indent,
3979 VPSlotTracker &SlotTracker) const override;
3980#endif
3981};
3982
3983/// A recipe for handling phi nodes of integer and floating-point inductions,
3984/// producing their scalar values. Before unrolling by UF the recipe represents
3985/// the VF*UF scalar values to be produced, or UF scalar values if only first
3986/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
3987/// operand StartIndex to all unroll parts except part 0, as the recipe
3988/// represents the VF scalar values (this number of values is taken from
3989/// State.VF rather than from the VF operand) starting at IV + StartIndex.
3990class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
3991 Instruction::BinaryOps InductionOpcode;
3992
3993public:
3994 VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF,
3995 Instruction::BinaryOps Opcode, FastMathFlags FMFs,
3996 DebugLoc DL)
3997 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
3998 FMFs, DL),
3999 InductionOpcode(Opcode) {}
4000
4001 VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
4002 VPValue *Step, VPValue *VF,
4003 DebugLoc DL = DebugLoc::getUnknown())
4004 : VPScalarIVStepsRecipe(
4005 IV, Step, VF, IndDesc.getInductionOpcode(),
4006 dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp())
4007 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4008 : FastMathFlags(),
4009 DL) {}
4010
4011 ~VPScalarIVStepsRecipe() override = default;
4012
4013 VPScalarIVStepsRecipe *clone() override {
4014 return new VPScalarIVStepsRecipe(getOperand(N: 0), getOperand(N: 1),
4015 getOperand(N: 2), InductionOpcode,
4016 getFastMathFlags(), getDebugLoc());
4017 }
4018
4019 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4020
4021 /// Generate the scalarized versions of the phi node as needed by their users.
4022 void execute(VPTransformState &State) override;
4023
4024 /// Return the cost of this VPScalarIVStepsRecipe.
4025 InstructionCost computeCost(ElementCount VF,
4026 VPCostContext &Ctx) const override {
4027 // TODO: Compute accurate cost after retiring the legacy cost model.
4028 return 0;
4029 }
4030
4031 VPValue *getStepValue() const { return getOperand(N: 1); }
4032
4033 /// Return the number of scalars to produce per unroll part, used to compute
4034 /// StartIndex during unrolling.
4035 VPValue *getVFValue() const { return getOperand(N: 2); }
4036
4037 /// Return the StartIndex, or null if known to be zero, valid only after
4038 /// unrolling.
4039 VPValue *getStartIndex() const {
4040 return getNumOperands() == 4 ? getOperand(N: 3) : nullptr;
4041 }
4042
4043 /// Returns true if the recipe only uses the first lane of operand \p Op.
4044 bool usesFirstLaneOnly(const VPValue *Op) const override {
4045 assert(is_contained(operands(), Op) &&
4046 "Op must be an operand of the recipe");
4047 return true;
4048 }
4049
4050protected:
4051#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4052 /// Print the recipe.
4053 void printRecipe(raw_ostream &O, const Twine &Indent,
4054 VPSlotTracker &SlotTracker) const override;
4055#endif
4056};
4057
4058/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
4059/// types implementing VPPhiAccessors. Used by isa<> & co.
4060template <> struct CastIsPossible<VPPhiAccessors, const VPRecipeBase *> {
4061 static inline bool isPossible(const VPRecipeBase *f) {
4062 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
4063 return isa<VPIRPhi, VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPhi>(Val: f);
4064 }
4065};
4066/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
4067/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
4068template <typename SrcTy>
4069struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
4070
4071 using Self = CastInfo<VPPhiAccessors, SrcTy>;
4072
4073 /// doCast is used by cast<>.
4074 static inline VPPhiAccessors *doCast(SrcTy R) {
4075 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
4076 switch (R->getVPRecipeID()) {
4077 case VPRecipeBase::VPInstructionSC:
4078 return cast<VPPhi>(R);
4079 case VPRecipeBase::VPIRInstructionSC:
4080 return cast<VPIRPhi>(R);
4081 case VPRecipeBase::VPWidenPHISC:
4082 return cast<VPWidenPHIRecipe>(R);
4083 default:
4084 return cast<VPHeaderPHIRecipe>(R);
4085 }
4086 }());
4087 }
4088
4089 /// doCastIfPossible is used by dyn_cast<>.
4090 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
4091 if (!Self::isPossible(f))
4092 return nullptr;
4093 return doCast(R: f);
4094 }
4095};
4096template <>
4097struct CastInfo<VPPhiAccessors, VPRecipeBase *>
4098 : CastInfoVPPhiAccessors<VPRecipeBase *> {};
4099template <>
4100struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
4101 : CastInfoVPPhiAccessors<const VPRecipeBase *> {};
4102
4103/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
4104/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
4105namespace detail {
4106template <typename DstTy, typename RecipeBasePtrTy>
4107static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
4108 switch (R->getVPRecipeID()) {
4109 case VPRecipeBase::VPInstructionSC:
4110 return cast<VPInstruction>(R);
4111 case VPRecipeBase::VPWidenSC:
4112 return cast<VPWidenRecipe>(R);
4113 case VPRecipeBase::VPWidenCastSC:
4114 return cast<VPWidenCastRecipe>(R);
4115 case VPRecipeBase::VPWidenIntrinsicSC:
4116 return cast<VPWidenIntrinsicRecipe>(R);
4117 case VPRecipeBase::VPWidenCallSC:
4118 return cast<VPWidenCallRecipe>(R);
4119 case VPRecipeBase::VPReplicateSC:
4120 return cast<VPReplicateRecipe>(R);
4121 case VPRecipeBase::VPInterleaveSC:
4122 case VPRecipeBase::VPInterleaveEVLSC:
4123 return cast<VPInterleaveBase>(R);
4124 case VPRecipeBase::VPWidenLoadSC:
4125 case VPRecipeBase::VPWidenLoadEVLSC:
4126 case VPRecipeBase::VPWidenStoreSC:
4127 case VPRecipeBase::VPWidenStoreEVLSC:
4128 return cast<VPWidenMemoryRecipe>(R);
4129 default:
4130 llvm_unreachable("invalid recipe for VPIRMetadata cast");
4131 }
4132}
4133} // namespace detail
4134
4135/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
4136/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
4137template <typename DstTy, typename SrcTy>
4138struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
4139 static inline bool isPossible(SrcTy R) {
4140 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
4141 // also handled in castToVPIRMetadata.
4142 return isa<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
4143 VPWidenIntrinsicRecipe, VPWidenCallRecipe, VPReplicateRecipe,
4144 VPInterleaveRecipe, VPInterleaveEVLRecipe, VPWidenLoadRecipe,
4145 VPWidenLoadEVLRecipe, VPWidenStoreRecipe, VPWidenStoreEVLRecipe>(
4146 R);
4147 }
4148
4149 using RetTy = DstTy *;
4150
4151 /// doCast is used by cast<>.
4152 static inline RetTy doCast(SrcTy R) {
4153 return detail::castToVPIRMetadata<RetTy, SrcTy>(R);
4154 }
4155
4156 /// doCastIfPossible is used by dyn_cast<>.
4157 static inline RetTy doCastIfPossible(SrcTy R) {
4158 if (!isPossible(R))
4159 return nullptr;
4160 return doCast(R);
4161 }
4162};
4163template <>
4164struct CastInfo<VPIRMetadata, VPRecipeBase *>
4165 : CastInfoVPIRMetadata<VPIRMetadata, VPRecipeBase *> {};
4166template <>
4167struct CastInfo<VPIRMetadata, const VPRecipeBase *>
4168 : CastInfoVPIRMetadata<const VPIRMetadata, const VPRecipeBase *> {};
4169
4170/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4171/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4172/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4173class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4174 friend class VPlan;
4175
4176 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4177 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4178 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4179 if (Recipe)
4180 appendRecipe(Recipe);
4181 }
4182
4183public:
4184 using RecipeListTy = iplist<VPRecipeBase>;
4185
4186protected:
4187 /// The VPRecipes held in the order of output instructions to generate.
4188 RecipeListTy Recipes;
4189
4190 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4191 : VPBlockBase(BlockSC, Name.str()) {}
4192
4193public:
4194 ~VPBasicBlock() override {
4195 while (!Recipes.empty())
4196 Recipes.pop_back();
4197 }
4198
4199 /// Instruction iterators...
4200 using iterator = RecipeListTy::iterator;
4201 using const_iterator = RecipeListTy::const_iterator;
4202 using reverse_iterator = RecipeListTy::reverse_iterator;
4203 using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
4204
4205 //===--------------------------------------------------------------------===//
4206 /// Recipe iterator methods
4207 ///
4208 inline iterator begin() { return Recipes.begin(); }
4209 inline const_iterator begin() const { return Recipes.begin(); }
4210 inline iterator end() { return Recipes.end(); }
4211 inline const_iterator end() const { return Recipes.end(); }
4212
4213 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4214 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4215 inline reverse_iterator rend() { return Recipes.rend(); }
4216 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4217
4218 inline size_t size() const { return Recipes.size(); }
4219 inline bool empty() const { return Recipes.empty(); }
4220 inline const VPRecipeBase &front() const { return Recipes.front(); }
4221 inline VPRecipeBase &front() { return Recipes.front(); }
4222 inline const VPRecipeBase &back() const { return Recipes.back(); }
4223 inline VPRecipeBase &back() { return Recipes.back(); }
4224
4225 /// Returns a reference to the list of recipes.
4226 RecipeListTy &getRecipeList() { return Recipes; }
4227
4228 /// Returns a pointer to a member of the recipe list.
4229 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4230 return &VPBasicBlock::Recipes;
4231 }
4232
4233 /// Method to support type inquiry through isa, cast, and dyn_cast.
4234 static inline bool classof(const VPBlockBase *V) {
4235 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4236 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4237 }
4238
4239 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4240 assert(Recipe && "No recipe to append.");
4241 assert(!Recipe->Parent && "Recipe already in VPlan");
4242 Recipe->Parent = this;
4243 Recipes.insert(where: InsertPt, New: Recipe);
4244 }
4245
4246 /// Augment the existing recipes of a VPBasicBlock with an additional
4247 /// \p Recipe as the last recipe.
4248 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, InsertPt: end()); }
4249
4250 /// The method which generates the output IR instructions that correspond to
4251 /// this VPBasicBlock, thereby "executing" the VPlan.
4252 void execute(VPTransformState *State) override;
4253
4254 /// Return the cost of this VPBasicBlock.
4255 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4256
4257 /// Return the position of the first non-phi node recipe in the block.
4258 iterator getFirstNonPhi();
4259
4260 /// Returns an iterator range over the PHI-like recipes in the block.
4261 iterator_range<iterator> phis() {
4262 return make_range(x: begin(), y: getFirstNonPhi());
4263 }
4264
4265 /// Split current block at \p SplitAt by inserting a new block between the
4266 /// current block and its successors and moving all recipes starting at
4267 /// SplitAt to the new block. Returns the new block.
4268 VPBasicBlock *splitAt(iterator SplitAt);
4269
4270 VPRegionBlock *getEnclosingLoopRegion();
4271 const VPRegionBlock *getEnclosingLoopRegion() const;
4272
4273#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4274 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4275 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4276 ///
4277 /// Note that the numbering is applied to the whole VPlan, so printing
4278 /// individual blocks is consistent with the whole VPlan printing.
4279 void print(raw_ostream &O, const Twine &Indent,
4280 VPSlotTracker &SlotTracker) const override;
4281 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4282#endif
4283
4284 /// If the block has multiple successors, return the branch recipe terminating
4285 /// the block. If there are no or only a single successor, return nullptr;
4286 VPRecipeBase *getTerminator();
4287 const VPRecipeBase *getTerminator() const;
4288
4289 /// Returns true if the block is exiting it's parent region.
4290 bool isExiting() const;
4291
4292 /// Clone the current block and it's recipes, without updating the operands of
4293 /// the cloned recipes.
4294 VPBasicBlock *clone() override;
4295
4296 /// Returns the predecessor block at index \p Idx with the predecessors as per
4297 /// the corresponding plain CFG. If the block is an entry block to a region,
4298 /// the first predecessor is the single predecessor of a region, and the
4299 /// second predecessor is the exiting block of the region.
4300 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4301
4302protected:
4303 /// Execute the recipes in the IR basic block \p BB.
4304 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4305
4306 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4307 /// generated for this VPBB.
4308 void connectToPredecessors(VPTransformState &State);
4309
4310private:
4311 /// Create an IR BasicBlock to hold the output instructions generated by this
4312 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4313 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4314};
4315
4316inline const VPBasicBlock *
4317VPPhiAccessors::getIncomingBlock(unsigned Idx) const {
4318 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4319}
4320
4321/// A special type of VPBasicBlock that wraps an existing IR basic block.
4322/// Recipes of the block get added before the first non-phi instruction in the
4323/// wrapped block.
4324/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4325/// preheader block.
4326class VPIRBasicBlock : public VPBasicBlock {
4327 friend class VPlan;
4328
4329 BasicBlock *IRBB;
4330
4331 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4332 VPIRBasicBlock(BasicBlock *IRBB)
4333 : VPBasicBlock(VPIRBasicBlockSC,
4334 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4335 IRBB(IRBB) {}
4336
4337public:
4338 ~VPIRBasicBlock() override = default;
4339
4340 static inline bool classof(const VPBlockBase *V) {
4341 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4342 }
4343
4344 /// The method which generates the output IR instructions that correspond to
4345 /// this VPBasicBlock, thereby "executing" the VPlan.
4346 void execute(VPTransformState *State) override;
4347
4348 VPIRBasicBlock *clone() override;
4349
4350 BasicBlock *getIRBasicBlock() const { return IRBB; }
4351};
4352
4353/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4354/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4355/// A VPRegionBlock may indicate that its contents are to be replicated several
4356/// times. This is designed to support predicated scalarization, in which a
4357/// scalar if-then code structure needs to be generated VF * UF times. Having
4358/// this replication indicator helps to keep a single model for multiple
4359/// candidate VF's. The actual replication takes place only once the desired VF
4360/// and UF have been determined.
4361class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4362 friend class VPlan;
4363
4364 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4365 VPBlockBase *Entry;
4366
4367 /// Hold the Single Exiting block of the SESE region modelled by the
4368 /// VPRegionBlock.
4369 VPBlockBase *Exiting;
4370
4371 /// An indicator whether this region is to generate multiple replicated
4372 /// instances of output IR corresponding to its VPBlockBases.
4373 bool IsReplicator;
4374
4375 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4376 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4377 const std::string &Name = "", bool IsReplicator = false)
4378 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4379 IsReplicator(IsReplicator) {
4380 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4381 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4382 Entry->setParent(this);
4383 Exiting->setParent(this);
4384 }
4385 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4386 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4387 IsReplicator(IsReplicator) {}
4388
4389public:
4390 ~VPRegionBlock() override = default;
4391
4392 /// Method to support type inquiry through isa, cast, and dyn_cast.
4393 static inline bool classof(const VPBlockBase *V) {
4394 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4395 }
4396
4397 const VPBlockBase *getEntry() const { return Entry; }
4398 VPBlockBase *getEntry() { return Entry; }
4399
4400 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4401 /// EntryBlock must have no predecessors.
4402 void setEntry(VPBlockBase *EntryBlock) {
4403 assert(EntryBlock->getPredecessors().empty() &&
4404 "Entry block cannot have predecessors.");
4405 Entry = EntryBlock;
4406 EntryBlock->setParent(this);
4407 }
4408
4409 const VPBlockBase *getExiting() const { return Exiting; }
4410 VPBlockBase *getExiting() { return Exiting; }
4411
4412 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4413 /// ExitingBlock must have no successors.
4414 void setExiting(VPBlockBase *ExitingBlock) {
4415 assert(ExitingBlock->getSuccessors().empty() &&
4416 "Exit block cannot have successors.");
4417 Exiting = ExitingBlock;
4418 ExitingBlock->setParent(this);
4419 }
4420
4421 /// Returns the pre-header VPBasicBlock of the loop region.
4422 VPBasicBlock *getPreheaderVPBB() {
4423 assert(!isReplicator() && "should only get pre-header of loop regions");
4424 return getSinglePredecessor()->getExitingBasicBlock();
4425 }
4426
4427 /// An indicator whether this region is to generate multiple replicated
4428 /// instances of output IR corresponding to its VPBlockBases.
4429 bool isReplicator() const { return IsReplicator; }
4430
4431 /// The method which generates the output IR instructions that correspond to
4432 /// this VPRegionBlock, thereby "executing" the VPlan.
4433 void execute(VPTransformState *State) override;
4434
4435 // Return the cost of this region.
4436 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4437
4438#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4439 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4440 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4441 /// consequtive numbers.
4442 ///
4443 /// Note that the numbering is applied to the whole VPlan, so printing
4444 /// individual regions is consistent with the whole VPlan printing.
4445 void print(raw_ostream &O, const Twine &Indent,
4446 VPSlotTracker &SlotTracker) const override;
4447 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4448#endif
4449
4450 /// Clone all blocks in the single-entry single-exit region of the block and
4451 /// their recipes without updating the operands of the cloned recipes.
4452 VPRegionBlock *clone() override;
4453
4454 /// Remove the current region from its VPlan, connecting its predecessor to
4455 /// its entry, and its exiting block to its successor.
4456 void dissolveToCFGLoop();
4457
4458 /// Returns the canonical induction recipe of the region.
4459 VPCanonicalIVPHIRecipe *getCanonicalIV() {
4460 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4461 if (EntryVPBB->empty()) {
4462 // VPlan native path. TODO: Unify both code paths.
4463 EntryVPBB = cast<VPBasicBlock>(Val: EntryVPBB->getSingleSuccessor());
4464 }
4465 return cast<VPCanonicalIVPHIRecipe>(Val: &*EntryVPBB->begin());
4466 }
4467 const VPCanonicalIVPHIRecipe *getCanonicalIV() const {
4468 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4469 }
4470
4471 /// Return the type of the canonical IV for loop regions.
4472 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4473 const Type *getCanonicalIVType() const {
4474 return getCanonicalIV()->getScalarType();
4475 }
4476};
4477
4478inline VPRegionBlock *VPRecipeBase::getRegion() {
4479 return getParent()->getParent();
4480}
4481
4482inline const VPRegionBlock *VPRecipeBase::getRegion() const {
4483 return getParent()->getParent();
4484}
4485
4486/// VPlan models a candidate for vectorization, encoding various decisions take
4487/// to produce efficient output IR, including which branches, basic-blocks and
4488/// output IR instructions to generate, and their cost. VPlan holds a
4489/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4490/// VPBasicBlock.
4491class VPlan {
4492 friend class VPlanPrinter;
4493 friend class VPSlotTracker;
4494
4495 /// VPBasicBlock corresponding to the original preheader. Used to place
4496 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4497 /// rest of VPlan execution.
4498 /// When this VPlan is used for the epilogue vector loop, the entry will be
4499 /// replaced by a new entry block created during skeleton creation.
4500 VPBasicBlock *Entry;
4501
4502 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4503 VPIRBasicBlock *ScalarHeader;
4504
4505 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4506 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4507 /// e.g. if the scalar epilogue always executes.
4508 SmallVector<VPIRBasicBlock *, 2> ExitBlocks;
4509
4510 /// Holds the VFs applicable to this VPlan.
4511 SmallSetVector<ElementCount, 2> VFs;
4512
4513 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4514 /// any UF.
4515 SmallSetVector<unsigned, 2> UFs;
4516
4517 /// Holds the name of the VPlan, for printing.
4518 std::string Name;
4519
4520 /// Represents the trip count of the original loop, for folding
4521 /// the tail.
4522 VPValue *TripCount = nullptr;
4523
4524 /// Represents the backedge taken count of the original loop, for folding
4525 /// the tail. It equals TripCount - 1.
4526 VPSymbolicValue *BackedgeTakenCount = nullptr;
4527
4528 /// Represents the vector trip count.
4529 VPSymbolicValue VectorTripCount;
4530
4531 /// Represents the vectorization factor of the loop.
4532 VPSymbolicValue VF;
4533
4534 /// Represents the unroll factor of the loop.
4535 VPSymbolicValue UF;
4536
4537 /// Represents the loop-invariant VF * UF of the vector loop region.
4538 VPSymbolicValue VFxUF;
4539
4540 /// Contains all the external definitions created for this VPlan, as a mapping
4541 /// from IR Values to VPIRValues.
4542 SmallMapVector<Value *, VPIRValue *, 16> LiveIns;
4543
4544 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4545 /// VPlan is destroyed.
4546 SmallVector<VPBlockBase *> CreatedBlocks;
4547
4548 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4549 /// wrapping the original header of the scalar loop.
4550 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4551 : Entry(Entry), ScalarHeader(ScalarHeader) {
4552 Entry->setPlan(this);
4553 assert(ScalarHeader->getNumSuccessors() == 0 &&
4554 "scalar header must be a leaf node");
4555 }
4556
4557public:
4558 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4559 /// original preheader and scalar header of \p L, to be used as entry and
4560 /// scalar header blocks of the new VPlan.
4561 VPlan(Loop *L);
4562
4563 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4564 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4565 VPlan(BasicBlock *ScalarHeaderBB) {
4566 setEntry(createVPBasicBlock(Name: "preheader"));
4567 ScalarHeader = createVPIRBasicBlock(IRBB: ScalarHeaderBB);
4568 }
4569
4570 LLVM_ABI_FOR_TEST ~VPlan();
4571
4572 void setEntry(VPBasicBlock *VPBB) {
4573 Entry = VPBB;
4574 VPBB->setPlan(this);
4575 }
4576
4577 /// Generate the IR code for this VPlan.
4578 void execute(VPTransformState *State);
4579
4580 /// Return the cost of this plan.
4581 InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
4582
4583 VPBasicBlock *getEntry() { return Entry; }
4584 const VPBasicBlock *getEntry() const { return Entry; }
4585
4586 /// Returns the preheader of the vector loop region, if one exists, or null
4587 /// otherwise.
4588 VPBasicBlock *getVectorPreheader() {
4589 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4590 return VectorRegion
4591 ? cast<VPBasicBlock>(Val: VectorRegion->getSinglePredecessor())
4592 : nullptr;
4593 }
4594
4595 /// Returns the VPRegionBlock of the vector loop.
4596 LLVM_ABI_FOR_TEST VPRegionBlock *getVectorLoopRegion();
4597 LLVM_ABI_FOR_TEST const VPRegionBlock *getVectorLoopRegion() const;
4598
4599 /// Returns the 'middle' block of the plan, that is the block that selects
4600 /// whether to execute the scalar tail loop or the exit block from the loop
4601 /// latch. If there is an early exit from the vector loop, the middle block
4602 /// conceptully has the early exit block as third successor, split accross 2
4603 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4604 /// tail loop or the exit block. If the scalar tail loop or exit block are
4605 /// known to always execute, the middle block may branch directly to that
4606 /// block. This function cannot be called once the vector loop region has been
4607 /// removed.
4608 VPBasicBlock *getMiddleBlock() {
4609 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4610 assert(
4611 LoopRegion &&
4612 "cannot call the function after vector loop region has been removed");
4613 // The middle block is always the last successor of the region.
4614 return cast<VPBasicBlock>(Val: LoopRegion->getSuccessors().back());
4615 }
4616
4617 const VPBasicBlock *getMiddleBlock() const {
4618 return const_cast<VPlan *>(this)->getMiddleBlock();
4619 }
4620
4621 /// Return the VPBasicBlock for the preheader of the scalar loop.
4622 VPBasicBlock *getScalarPreheader() const {
4623 return cast<VPBasicBlock>(Val: getScalarHeader()->getSinglePredecessor());
4624 }
4625
4626 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4627 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4628
4629 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4630 /// the original scalar loop.
4631 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4632
4633 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4634 /// exit block.
4635 VPIRBasicBlock *getExitBlock(BasicBlock *IRBB) const;
4636
4637 /// Returns true if \p VPBB is an exit block.
4638 bool isExitBlock(VPBlockBase *VPBB);
4639
4640 /// The trip count of the original loop.
4641 VPValue *getTripCount() const {
4642 assert(TripCount && "trip count needs to be set before accessing it");
4643 return TripCount;
4644 }
4645
4646 /// Set the trip count assuming it is currently null; if it is not - use
4647 /// resetTripCount().
4648 void setTripCount(VPValue *NewTripCount) {
4649 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4650 TripCount = NewTripCount;
4651 }
4652
4653 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4654 /// the original trip count have been replaced.
4655 void resetTripCount(VPValue *NewTripCount) {
4656 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4657 "TripCount must be set when resetting");
4658 TripCount = NewTripCount;
4659 }
4660
4661 /// The backedge taken count of the original loop.
4662 VPValue *getOrCreateBackedgeTakenCount() {
4663 if (!BackedgeTakenCount)
4664 BackedgeTakenCount = new VPSymbolicValue();
4665 return BackedgeTakenCount;
4666 }
4667 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4668
4669 /// The vector trip count.
4670 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4671
4672 /// Returns the VF of the vector loop region.
4673 VPValue &getVF() { return VF; };
4674 const VPValue &getVF() const { return VF; };
4675
4676 /// Returns the UF of the vector loop region.
4677 VPValue &getUF() { return UF; };
4678
4679 /// Returns VF * UF of the vector loop region.
4680 VPValue &getVFxUF() { return VFxUF; }
4681
4682 LLVMContext &getContext() const {
4683 return getScalarHeader()->getIRBasicBlock()->getContext();
4684 }
4685
4686 void addVF(ElementCount VF) { VFs.insert(X: VF); }
4687
4688 void setVF(ElementCount VF) {
4689 assert(hasVF(VF) && "Cannot set VF not already in plan");
4690 VFs.clear();
4691 VFs.insert(X: VF);
4692 }
4693
4694 /// Remove \p VF from the plan.
4695 void removeVF(ElementCount VF) {
4696 assert(hasVF(VF) && "tried to remove VF not present in plan");
4697 VFs.remove(X: VF);
4698 }
4699
4700 bool hasVF(ElementCount VF) const { return VFs.count(key: VF); }
4701 bool hasScalableVF() const {
4702 return any_of(Range: VFs, P: [](ElementCount VF) { return VF.isScalable(); });
4703 }
4704
4705 /// Returns an iterator range over all VFs of the plan.
4706 iterator_range<SmallSetVector<ElementCount, 2>::iterator>
4707 vectorFactors() const {
4708 return VFs;
4709 }
4710
4711 bool hasScalarVFOnly() const {
4712 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4713 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4714 "Plan with scalar VF should only have a single VF");
4715 return HasScalarVFOnly;
4716 }
4717
4718 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(key: UF); }
4719
4720 /// Returns the concrete UF of the plan, after unrolling.
4721 unsigned getConcreteUF() const {
4722 assert(UFs.size() == 1 && "Expected a single UF");
4723 return UFs[0];
4724 }
4725
4726 void setUF(unsigned UF) {
4727 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4728 UFs.clear();
4729 UFs.insert(X: UF);
4730 }
4731
4732 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4733 /// concrete UF.
4734 bool isUnrolled() const { return UFs.size() == 1; }
4735
4736 /// Return a string with the name of the plan and the applicable VFs and UFs.
4737 std::string getName() const;
4738
4739 void setName(const Twine &newName) { Name = newName.str(); }
4740
4741 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4742 /// yet) for \p V.
4743 VPIRValue *getOrAddLiveIn(Value *V) {
4744 assert(V && "Trying to get or add the VPIRValue of a null Value");
4745 auto [It, Inserted] = LiveIns.try_emplace(Key: V);
4746 if (Inserted) {
4747 if (auto *CI = dyn_cast<ConstantInt>(Val: V))
4748 It->second = new VPConstantInt(CI);
4749 else
4750 It->second = new VPIRValue(V);
4751 }
4752
4753 assert(isa<VPIRValue>(It->second) &&
4754 "Only VPIRValues should be in mapping");
4755 return It->second;
4756 }
4757 VPIRValue *getOrAddLiveIn(VPIRValue *V) {
4758 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4759 return getOrAddLiveIn(V: V->getValue());
4760 }
4761
4762 /// Return a VPIRValue wrapping i1 true.
4763 VPIRValue *getTrue() { return getConstantInt(BitWidth: 1, Val: 1); }
4764
4765 /// Return a VPIRValue wrapping i1 false.
4766 VPIRValue *getFalse() { return getConstantInt(BitWidth: 1, Val: 0); }
4767
4768 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4769 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4770 return getOrAddLiveIn(V: ConstantInt::get(Ty, V: Val, IsSigned));
4771 }
4772
4773 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4774 /// value.
4775 VPIRValue *getConstantInt(unsigned BitWidth, uint64_t Val,
4776 bool IsSigned = false) {
4777 return getConstantInt(Val: APInt(BitWidth, Val, IsSigned));
4778 }
4779
4780 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4781 VPIRValue *getConstantInt(const APInt &Val) {
4782 return getOrAddLiveIn(V: ConstantInt::get(Context&: getContext(), V: Val));
4783 }
4784
4785 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4786 /// otherwise.
4787 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(Key: V); }
4788
4789 /// Return the list of live-in VPValues available in the VPlan.
4790 auto getLiveIns() const { return LiveIns.values(); }
4791
4792#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4793 /// Print the live-ins of this VPlan to \p O.
4794 void printLiveIns(raw_ostream &O) const;
4795
4796 /// Print this VPlan to \p O.
4797 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4798
4799 /// Print this VPlan in DOT format to \p O.
4800 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4801
4802 /// Dump the plan to stderr (for debugging).
4803 LLVM_DUMP_METHOD void dump() const;
4804#endif
4805
4806 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4807 /// recipes to refer to the clones, and return it.
4808 LLVM_ABI_FOR_TEST VPlan *duplicate();
4809
4810 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4811 /// present. The returned block is owned by the VPlan and deleted once the
4812 /// VPlan is destroyed.
4813 VPBasicBlock *createVPBasicBlock(const Twine &Name,
4814 VPRecipeBase *Recipe = nullptr) {
4815 auto *VPB = new VPBasicBlock(Name, Recipe);
4816 CreatedBlocks.push_back(Elt: VPB);
4817 return VPB;
4818 }
4819
4820 /// Create a new loop region with \p Name and entry and exiting blocks set
4821 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4822 /// owned by the VPlan and deleted once the VPlan is destroyed.
4823 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4824 VPBlockBase *Entry = nullptr,
4825 VPBlockBase *Exiting = nullptr) {
4826 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4827 : new VPRegionBlock(Name);
4828 CreatedBlocks.push_back(Elt: VPB);
4829 return VPB;
4830 }
4831
4832 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4833 /// returned block is owned by the VPlan and deleted once the VPlan is
4834 /// destroyed.
4835 VPRegionBlock *createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting,
4836 const std::string &Name = "") {
4837 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4838 CreatedBlocks.push_back(Elt: VPB);
4839 return VPB;
4840 }
4841
4842 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4843 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4844 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4845 VPIRBasicBlock *createEmptyVPIRBasicBlock(BasicBlock *IRBB);
4846
4847 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4848 /// instructions in \p IRBB, except its terminator which is managed by the
4849 /// successors of the block in VPlan. The returned block is owned by the VPlan
4850 /// and deleted once the VPlan is destroyed.
4851 LLVM_ABI_FOR_TEST VPIRBasicBlock *createVPIRBasicBlock(BasicBlock *IRBB);
4852
4853 /// Returns true if the VPlan is based on a loop with an early exit. That is
4854 /// the case if the VPlan has either more than one exit block or a single exit
4855 /// block with multiple predecessors (one for the exit via the latch and one
4856 /// via the other early exit).
4857 bool hasEarlyExit() const {
4858 return count_if(Range: ExitBlocks,
4859 P: [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4860 1 ||
4861 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4862 }
4863
4864 /// Returns true if the scalar tail may execute after the vector loop. Note
4865 /// that this relies on unneeded branches to the scalar tail loop being
4866 /// removed.
4867 bool hasScalarTail() const {
4868 return !(!getScalarPreheader()->hasPredecessors() ||
4869 getScalarPreheader()->getSinglePredecessor() == getEntry());
4870 }
4871};
4872
4873#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4874inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4875 Plan.print(OS);
4876 return OS;
4877}
4878#endif
4879
4880} // end namespace llvm
4881
4882#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
4883