1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
30#include "llvm/ADT/SmallPtrSet.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
35#include "llvm/Analysis/IVDescriptors.h"
36#include "llvm/Analysis/MemoryLocation.h"
37#include "llvm/Analysis/VectorUtils.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/Support/Compiler.h"
42#include "llvm/Support/InstructionCost.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
54class InnerLoopVectorizer;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
58class RecurrenceDescriptor;
59class SCEV;
60class SCEVPredicate;
61class Type;
62class VPBasicBlock;
63class VPBuilder;
64class VPDominatorTree;
65class VPRegionBlock;
66class VPlan;
67class VPLane;
68class VPReplicateRecipe;
69class Value;
70class LoopVectorizationCostModel;
71
72struct VPCostContext;
73
74using VPlanPtr = std::unique_ptr<VPlan>;
75
76/// \enum UncountableExitStyle
77/// Different methods of handling early exits.
78///
79enum class UncountableExitStyle {
80 NoUncountableExit = 0,
81 /// No side effects to worry about, so we can process any uncountable exits
82 /// in the loop and branch either to the middle block if the trip count was
83 /// reached, or an early exitblock to determine which exit was taken.
84 ReadOnly,
85 /// All memory operations other than the load(s) required to determine whether
86 /// an uncountable exit occurre will be masked based on that condition. If an
87 /// uncountable exit is taken, then all lanes before the exiting lane will
88 /// complete, leaving just the final lane to execute in the scalar tail.
89 MaskedHandleExitInScalarLoop,
90};
91
92/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
93/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
94class LLVM_ABI_FOR_TEST VPBlockBase {
95 friend class VPBlockUtils;
96
97 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
98
99 /// An optional name for the block.
100 std::string Name;
101
102 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
103 /// it is a topmost VPBlockBase.
104 VPRegionBlock *Parent = nullptr;
105
106 /// List of predecessor blocks.
107 SmallVector<VPBlockBase *, 1> Predecessors;
108
109 /// List of successor blocks.
110 SmallVector<VPBlockBase *, 1> Successors;
111
112 /// VPlan containing the block. Can only be set on the entry block of the
113 /// plan.
114 VPlan *Plan = nullptr;
115
116 /// Add \p Successor as the last successor to this block.
117 void appendSuccessor(VPBlockBase *Successor) {
118 assert(Successor && "Cannot add nullptr successor!");
119 Successors.push_back(Elt: Successor);
120 }
121
122 /// Add \p Predecessor as the last predecessor to this block.
123 void appendPredecessor(VPBlockBase *Predecessor) {
124 assert(Predecessor && "Cannot add nullptr predecessor!");
125 Predecessors.push_back(Elt: Predecessor);
126 }
127
128 /// Remove \p Predecessor from the predecessors of this block.
129 void removePredecessor(VPBlockBase *Predecessor) {
130 auto Pos = find(Range&: Predecessors, Val: Predecessor);
131 assert(Pos && "Predecessor does not exist");
132 Predecessors.erase(CI: Pos);
133 }
134
135 /// Remove \p Successor from the successors of this block.
136 void removeSuccessor(VPBlockBase *Successor) {
137 auto Pos = find(Range&: Successors, Val: Successor);
138 assert(Pos && "Successor does not exist");
139 Successors.erase(CI: Pos);
140 }
141
142 /// This function replaces one predecessor with another, useful when
143 /// trying to replace an old block in the CFG with a new one.
144 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
145 auto I = find(Range&: Predecessors, Val: Old);
146 assert(I != Predecessors.end());
147 assert(Old->getParent() == New->getParent() &&
148 "replaced predecessor must have the same parent");
149 *I = New;
150 }
151
152 /// This function replaces one successor with another, useful when
153 /// trying to replace an old block in the CFG with a new one.
154 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
155 auto I = find(Range&: Successors, Val: Old);
156 assert(I != Successors.end());
157 assert(Old->getParent() == New->getParent() &&
158 "replaced successor must have the same parent");
159 *I = New;
160 }
161
162protected:
163 VPBlockBase(const unsigned char SC, const std::string &N)
164 : SubclassID(SC), Name(N) {}
165
166public:
167 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
168 /// that are actually instantiated. Values of this enumeration are kept in the
169 /// SubclassID field of the VPBlockBase objects. They are used for concrete
170 /// type identification.
171 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
172
173 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
174
175 virtual ~VPBlockBase() = default;
176
177 const std::string &getName() const { return Name; }
178
179 void setName(const Twine &newName) { Name = newName.str(); }
180
181 /// \return an ID for the concrete type of this object.
182 /// This is used to implement the classof checks. This should not be used
183 /// for any other purpose, as the values may change as LLVM evolves.
184 unsigned getVPBlockID() const { return SubclassID; }
185
186 VPRegionBlock *getParent() { return Parent; }
187 const VPRegionBlock *getParent() const { return Parent; }
188
189 /// \return A pointer to the plan containing the current block.
190 VPlan *getPlan();
191 const VPlan *getPlan() const;
192
193 /// Sets the pointer of the plan containing the block. The block must be the
194 /// entry block into the VPlan.
195 void setPlan(VPlan *ParentPlan);
196
197 void setParent(VPRegionBlock *P) { Parent = P; }
198
199 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
200 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
201 /// VPBlockBase is a VPBasicBlock, it is returned.
202 const VPBasicBlock *getEntryBasicBlock() const;
203 VPBasicBlock *getEntryBasicBlock();
204
205 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
206 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
207 /// VPBlockBase is a VPBasicBlock, it is returned.
208 const VPBasicBlock *getExitingBasicBlock() const;
209 VPBasicBlock *getExitingBasicBlock();
210
211 const VPBlocksTy &getSuccessors() const { return Successors; }
212 VPBlocksTy &getSuccessors() { return Successors; }
213
214 /// Returns true if this block has any successors.
215 bool hasSuccessors() const { return !Successors.empty(); }
216 /// Returns true if this block has any predecessors.
217 bool hasPredecessors() const { return !Predecessors.empty(); }
218
219 iterator_range<VPBlockBase **> successors() { return Successors; }
220 iterator_range<VPBlockBase **> predecessors() { return Predecessors; }
221
222 const VPBlocksTy &getPredecessors() const { return Predecessors; }
223 VPBlocksTy &getPredecessors() { return Predecessors; }
224
225 /// \return the successor of this VPBlockBase if it has a single successor.
226 /// Otherwise return a null pointer.
227 VPBlockBase *getSingleSuccessor() const {
228 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
229 }
230
231 /// \return the predecessor of this VPBlockBase if it has a single
232 /// predecessor. Otherwise return a null pointer.
233 VPBlockBase *getSinglePredecessor() const {
234 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
235 }
236
237 size_t getNumSuccessors() const { return Successors.size(); }
238 size_t getNumPredecessors() const { return Predecessors.size(); }
239
240 /// An Enclosing Block of a block B is any block containing B, including B
241 /// itself. \return the closest enclosing block starting from "this", which
242 /// has successors. \return the root enclosing block if all enclosing blocks
243 /// have no successors.
244 VPBlockBase *getEnclosingBlockWithSuccessors();
245
246 /// \return the closest enclosing block starting from "this", which has
247 /// predecessors. \return the root enclosing block if all enclosing blocks
248 /// have no predecessors.
249 VPBlockBase *getEnclosingBlockWithPredecessors();
250
251 /// \return the successors either attached directly to this VPBlockBase or, if
252 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
253 /// successors of its own, search recursively for the first enclosing
254 /// VPRegionBlock that has successors and return them. If no such
255 /// VPRegionBlock exists, return the (empty) successors of the topmost
256 /// VPBlockBase reached.
257 const VPBlocksTy &getHierarchicalSuccessors() {
258 return getEnclosingBlockWithSuccessors()->getSuccessors();
259 }
260
261 /// \return the hierarchical successor of this VPBlockBase if it has a single
262 /// hierarchical successor. Otherwise return a null pointer.
263 VPBlockBase *getSingleHierarchicalSuccessor() {
264 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
265 }
266
267 /// \return the predecessors either attached directly to this VPBlockBase or,
268 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
269 /// predecessors of its own, search recursively for the first enclosing
270 /// VPRegionBlock that has predecessors and return them. If no such
271 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
272 /// VPBlockBase reached.
273 const VPBlocksTy &getHierarchicalPredecessors() {
274 return getEnclosingBlockWithPredecessors()->getPredecessors();
275 }
276
277 /// \return the hierarchical predecessor of this VPBlockBase if it has a
278 /// single hierarchical predecessor. Otherwise return a null pointer.
279 VPBlockBase *getSingleHierarchicalPredecessor() {
280 return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
281 }
282
283 /// Set a given VPBlockBase \p Successor as the single successor of this
284 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
285 /// This VPBlockBase must have no successors.
286 void setOneSuccessor(VPBlockBase *Successor) {
287 assert(Successors.empty() && "Setting one successor when others exist.");
288 assert(Successor->getParent() == getParent() &&
289 "connected blocks must have the same parent");
290 appendSuccessor(Successor);
291 }
292
293 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
294 /// successors of this VPBlockBase. This VPBlockBase is not added as
295 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
296 /// successors.
297 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
298 assert(Successors.empty() && "Setting two successors when others exist.");
299 appendSuccessor(Successor: IfTrue);
300 appendSuccessor(Successor: IfFalse);
301 }
302
303 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
304 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
305 /// as successor of any VPBasicBlock in \p NewPreds.
306 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
307 assert(Predecessors.empty() && "Block predecessors already set.");
308 for (auto *Pred : NewPreds)
309 appendPredecessor(Predecessor: Pred);
310 }
311
312 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
313 /// This VPBlockBase must have no successors. This VPBlockBase is not added
314 /// as predecessor of any VPBasicBlock in \p NewSuccs.
315 void setSuccessors(ArrayRef<VPBlockBase *> NewSuccs) {
316 assert(Successors.empty() && "Block successors already set.");
317 for (auto *Succ : NewSuccs)
318 appendSuccessor(Successor: Succ);
319 }
320
321 /// Remove all the predecessor of this block.
322 void clearPredecessors() { Predecessors.clear(); }
323
324 /// Remove all the successors of this block.
325 void clearSuccessors() { Successors.clear(); }
326
327 /// Swap predecessors of the block. The block must have exactly 2
328 /// predecessors.
329 void swapPredecessors() {
330 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
331 std::swap(a&: Predecessors[0], b&: Predecessors[1]);
332 }
333
334 /// Swap successors of the block. The block must have exactly 2 successors.
335 // TODO: This should be part of introducing conditional branch recipes rather
336 // than being independent.
337 void swapSuccessors() {
338 assert(Successors.size() == 2 && "must have 2 successors to swap");
339 std::swap(a&: Successors[0], b&: Successors[1]);
340 }
341
342 /// Returns the index for \p Pred in the blocks predecessors list.
343 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
344 assert(count(Predecessors, Pred) == 1 &&
345 "must have Pred exactly once in Predecessors");
346 return std::distance(first: Predecessors.begin(), last: find(Range: Predecessors, Val: Pred));
347 }
348
349 /// Returns the index for \p Succ in the blocks successor list.
350 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
351 assert(count(Successors, Succ) == 1 &&
352 "must have Succ exactly once in Successors");
353 return std::distance(first: Successors.begin(), last: find(Range: Successors, Val: Succ));
354 }
355
356 /// The method which generates the output IR that correspond to this
357 /// VPBlockBase, thereby "executing" the VPlan.
358 virtual void execute(VPTransformState *State) = 0;
359
360 /// Return the cost of the block.
361 virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
362
363#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
364 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
365 OS << getName();
366 }
367
368 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
369 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
370 /// consequtive numbers.
371 ///
372 /// Note that the numbering is applied to the whole VPlan, so printing
373 /// individual blocks is consistent with the whole VPlan printing.
374 virtual void print(raw_ostream &O, const Twine &Indent,
375 VPSlotTracker &SlotTracker) const = 0;
376
377 /// Print plain-text dump of this VPlan to \p O.
378 void print(raw_ostream &O) const;
379
380 /// Print the successors of this block to \p O, prefixing all lines with \p
381 /// Indent.
382 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
383
384 /// Dump this VPBlockBase to dbgs().
385 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
386#endif
387
388 /// Clone the current block and it's recipes without updating the operands of
389 /// the cloned recipes, including all blocks in the single-entry single-exit
390 /// region for VPRegionBlocks.
391 virtual VPBlockBase *clone() = 0;
392};
393
394/// VPRecipeBase is a base class modeling a sequence of one or more output IR
395/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
396/// and is responsible for deleting its defined values. Single-value
397/// recipes must inherit from VPSingleDef instead of inheriting from both
398/// VPRecipeBase and VPValue separately.
399class LLVM_ABI_FOR_TEST VPRecipeBase
400 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
401 public VPDef,
402 public VPUser {
403 friend VPBasicBlock;
404 friend class VPBlockUtils;
405
406 /// Subclass identifier (for isa/dyn_cast).
407 const unsigned char SubclassID;
408
409 /// Each VPRecipe belongs to a single VPBasicBlock.
410 VPBasicBlock *Parent = nullptr;
411
412 /// The debug location for the recipe.
413 DebugLoc DL;
414
415public:
416 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
417 /// that is actually instantiated. Values of this enumeration are kept in the
418 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
419 /// type identification.
420 using VPRecipeTy = enum {
421 VPBranchOnMaskSC,
422 VPDerivedIVSC,
423 VPExpandSCEVSC,
424 VPExpressionSC,
425 VPIRInstructionSC,
426 VPInstructionSC,
427 VPInterleaveEVLSC,
428 VPInterleaveSC,
429 VPReductionEVLSC,
430 VPReductionSC,
431 VPReplicateSC,
432 VPScalarIVStepsSC,
433 VPVectorPointerSC,
434 VPVectorEndPointerSC,
435 VPWidenCallSC,
436 VPWidenCanonicalIVSC,
437 VPWidenCastSC,
438 VPWidenGEPSC,
439 VPWidenIntrinsicSC,
440 VPWidenMemIntrinsicSC,
441 VPWidenLoadEVLSC,
442 VPWidenLoadSC,
443 VPWidenStoreEVLSC,
444 VPWidenStoreSC,
445 VPWidenSC,
446 VPBlendSC,
447 VPHistogramSC,
448 // START: Phi-like recipes. Need to be kept together.
449 VPWidenPHISC,
450 VPPredInstPHISC,
451 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
452 // VPHeaderPHIRecipe need to be kept together.
453 VPCurrentIterationPHISC,
454 VPActiveLaneMaskPHISC,
455 VPFirstOrderRecurrencePHISC,
456 VPWidenIntOrFpInductionSC,
457 VPWidenPointerInductionSC,
458 VPReductionPHISC,
459 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
460 // END: Phi-like recipes
461 VPFirstPHISC = VPWidenPHISC,
462 VPFirstHeaderPHISC = VPCurrentIterationPHISC,
463 VPLastHeaderPHISC = VPReductionPHISC,
464 VPLastPHISC = VPReductionPHISC,
465 };
466
467 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
468 DebugLoc DL = DebugLoc::getUnknown())
469 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
470
471 ~VPRecipeBase() override = default;
472
473 /// Clone the current recipe.
474 virtual VPRecipeBase *clone() = 0;
475
476 /// \return the VPBasicBlock which this VPRecipe belongs to.
477 VPBasicBlock *getParent() { return Parent; }
478 const VPBasicBlock *getParent() const { return Parent; }
479
480 /// \return the VPRegionBlock which the recipe belongs to.
481 VPRegionBlock *getRegion();
482 const VPRegionBlock *getRegion() const;
483
484 /// The method which generates the output IR instructions that correspond to
485 /// this VPRecipe, thereby "executing" the VPlan.
486 virtual void execute(VPTransformState &State) = 0;
487
488 /// Return the cost of this recipe, taking into account if the cost
489 /// computation should be skipped and the ForceTargetInstructionCost flag.
490 /// Also takes care of printing the cost for debugging.
491 InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
492
493 /// Insert an unlinked recipe into a basic block immediately before
494 /// the specified recipe.
495 void insertBefore(VPRecipeBase *InsertPos);
496 /// Insert an unlinked recipe into \p BB immediately before the insertion
497 /// point \p IP;
498 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
499
500 /// Insert an unlinked Recipe into a basic block immediately after
501 /// the specified Recipe.
502 void insertAfter(VPRecipeBase *InsertPos);
503
504 /// Unlink this recipe from its current VPBasicBlock and insert it into
505 /// the VPBasicBlock that MovePos lives in, right after MovePos.
506 void moveAfter(VPRecipeBase *MovePos);
507
508 /// Unlink this recipe and insert into BB before I.
509 ///
510 /// \pre I is a valid iterator into BB.
511 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
512
513 /// This method unlinks 'this' from the containing basic block, but does not
514 /// delete it.
515 void removeFromParent();
516
517 /// This method unlinks 'this' from the containing basic block and deletes it.
518 ///
519 /// \returns an iterator pointing to the element after the erased one
520 iplist<VPRecipeBase>::iterator eraseFromParent();
521
522 /// \return an ID for the concrete type of this object.
523 unsigned getVPRecipeID() const { return SubclassID; }
524
525 /// Method to support type inquiry through isa, cast, and dyn_cast.
526 static inline bool classof(const VPDef *D) {
527 // All VPDefs are also VPRecipeBases.
528 return true;
529 }
530
531 static inline bool classof(const VPUser *U) { return true; }
532
533 /// Returns true if the recipe may have side-effects.
534 bool mayHaveSideEffects() const;
535
536 /// Return true if we can safely execute this recipe unconditionally even if
537 /// it is masked originally.
538 bool isSafeToSpeculativelyExecute() const;
539
540 /// Returns true for PHI-like recipes.
541 bool isPhi() const;
542
543 /// Returns true if the recipe may read from memory.
544 bool mayReadFromMemory() const;
545
546 /// Returns true if the recipe may write to memory.
547 bool mayWriteToMemory() const;
548
549 /// Returns true if the recipe may read from or write to memory.
550 bool mayReadOrWriteMemory() const {
551 return mayReadFromMemory() || mayWriteToMemory();
552 }
553
554 /// Returns the debug location of the recipe.
555 DebugLoc getDebugLoc() const { return DL; }
556
557 /// Set the recipe's debug location to \p NewDL.
558 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
559
560#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
561 /// Dump the recipe to stderr (for debugging).
562 LLVM_ABI_FOR_TEST void dump() const;
563
564 /// Print the recipe, delegating to printRecipe().
565 void print(raw_ostream &O, const Twine &Indent,
566 VPSlotTracker &SlotTracker) const;
567#endif
568
569protected:
570 /// Compute the cost of this recipe either using a recipe's specialized
571 /// implementation or using the legacy cost model and the underlying
572 /// instructions.
573 virtual InstructionCost computeCost(ElementCount VF,
574 VPCostContext &Ctx) const;
575
576#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
577 /// Each concrete VPRecipe prints itself, without printing common information,
578 /// like debug info or metadata.
579 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
580 VPSlotTracker &SlotTracker) const = 0;
581#endif
582};
583
584// Helper macro to define common classof implementations for recipes.
585#define VP_CLASSOF_IMPL(VPRecipeID) \
586 static inline bool classof(const VPRecipeBase *R) { \
587 return R->getVPRecipeID() == VPRecipeID; \
588 } \
589 static inline bool classof(const VPValue *V) { \
590 auto *R = V->getDefiningRecipe(); \
591 return R && R->getVPRecipeID() == VPRecipeID; \
592 } \
593 static inline bool classof(const VPUser *U) { \
594 auto *R = dyn_cast<VPRecipeBase>(U); \
595 return R && R->getVPRecipeID() == VPRecipeID; \
596 } \
597 static inline bool classof(const VPSingleDefRecipe *R) { \
598 return R->getVPRecipeID() == VPRecipeID; \
599 }
600
601/// Compute the scalar result type for an IR \p Opcode given \p Operands.
602LLVM_ABI Type *computeScalarTypeForInstruction(unsigned Opcode,
603 ArrayRef<VPValue *> Operands);
604
605/// VPSingleDefRecipe is a base class for recipes that model a sequence of one
606/// or more output IR that define a single result VPValue. Note that
607/// VPSingleDefRecipe must inherit from VPRecipeBase before VPSingleDefValue.
608class LLVM_ABI_FOR_TEST VPSingleDefRecipe : public VPRecipeBase,
609 public VPSingleDefValue {
610public:
611 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
612 DebugLoc DL = DebugLoc::getUnknown())
613 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this) {}
614
615 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
616 Value *UV, DebugLoc DL = DebugLoc::getUnknown())
617 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV) {}
618
619 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
620 Type *ResultTy, Value *UV = nullptr,
621 DebugLoc DL = DebugLoc::getUnknown())
622 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV, ResultTy) {}
623
624 static inline bool classof(const VPRecipeBase *R) {
625 switch (R->getVPRecipeID()) {
626 case VPRecipeBase::VPDerivedIVSC:
627 case VPRecipeBase::VPExpandSCEVSC:
628 case VPRecipeBase::VPExpressionSC:
629 case VPRecipeBase::VPInstructionSC:
630 case VPRecipeBase::VPReductionEVLSC:
631 case VPRecipeBase::VPReductionSC:
632 case VPRecipeBase::VPReplicateSC:
633 case VPRecipeBase::VPScalarIVStepsSC:
634 case VPRecipeBase::VPVectorPointerSC:
635 case VPRecipeBase::VPVectorEndPointerSC:
636 case VPRecipeBase::VPWidenCallSC:
637 case VPRecipeBase::VPWidenCanonicalIVSC:
638 case VPRecipeBase::VPWidenCastSC:
639 case VPRecipeBase::VPWidenGEPSC:
640 case VPRecipeBase::VPWidenIntrinsicSC:
641 case VPRecipeBase::VPWidenMemIntrinsicSC:
642 case VPRecipeBase::VPWidenSC:
643 case VPRecipeBase::VPBlendSC:
644 case VPRecipeBase::VPPredInstPHISC:
645 case VPRecipeBase::VPCurrentIterationPHISC:
646 case VPRecipeBase::VPActiveLaneMaskPHISC:
647 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
648 case VPRecipeBase::VPWidenPHISC:
649 case VPRecipeBase::VPWidenIntOrFpInductionSC:
650 case VPRecipeBase::VPWidenPointerInductionSC:
651 case VPRecipeBase::VPReductionPHISC:
652 case VPRecipeBase::VPWidenLoadEVLSC:
653 case VPRecipeBase::VPWidenLoadSC:
654 return true;
655 case VPRecipeBase::VPBranchOnMaskSC:
656 case VPRecipeBase::VPInterleaveEVLSC:
657 case VPRecipeBase::VPInterleaveSC:
658 case VPRecipeBase::VPIRInstructionSC:
659 case VPRecipeBase::VPWidenStoreEVLSC:
660 case VPRecipeBase::VPWidenStoreSC:
661 case VPRecipeBase::VPHistogramSC:
662 return false;
663 }
664 llvm_unreachable("Unhandled VPRecipeID");
665 }
666
667 static inline bool classof(const VPValue *V) {
668 auto *R = V->getDefiningRecipe();
669 return R && classof(R);
670 }
671
672 static inline bool classof(const VPUser *U) {
673 auto *R = dyn_cast<VPRecipeBase>(Val: U);
674 return R && classof(R);
675 }
676
677 VPSingleDefRecipe *clone() override = 0;
678
679 /// Returns the underlying instruction.
680 Instruction *getUnderlyingInstr() {
681 return cast<Instruction>(Val: getUnderlyingValue());
682 }
683 const Instruction *getUnderlyingInstr() const {
684 return cast<Instruction>(Val: getUnderlyingValue());
685 }
686
687#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
688 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
689 LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const;
690#endif
691};
692
693/// Class to record and manage LLVM IR flags.
694LLVM_PACKED_START
695class VPIRFlags {
696 enum class OperationType : unsigned char {
697 Cmp,
698 FCmp,
699 OverflowingBinOp,
700 Trunc,
701 DisjointOp,
702 PossiblyExactOp,
703 GEPOp,
704 FPMathOp,
705 NonNegOp,
706 ReductionOp,
707 Other
708 };
709
710public:
711 struct WrapFlagsTy {
712 char HasNUW : 1;
713 char HasNSW : 1;
714
715 WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
716 };
717
718 struct TruncFlagsTy {
719 char HasNUW : 1;
720 char HasNSW : 1;
721
722 TruncFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
723 };
724
725 struct DisjointFlagsTy {
726 char IsDisjoint : 1;
727 DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
728 };
729
730 struct NonNegFlagsTy {
731 char NonNeg : 1;
732 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
733 };
734
735private:
736 struct ExactFlagsTy {
737 char IsExact : 1;
738 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
739 };
740 struct FastMathFlagsTy {
741 char AllowReassoc : 1;
742 char NoNaNs : 1;
743 char NoInfs : 1;
744 char NoSignedZeros : 1;
745 char AllowReciprocal : 1;
746 char AllowContract : 1;
747 char ApproxFunc : 1;
748
749 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
750 };
751 /// Holds both the predicate and fast-math flags for floating-point
752 /// comparisons.
753 struct FCmpFlagsTy {
754 uint8_t CmpPredStorage;
755 FastMathFlagsTy FMFs;
756 };
757 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
758 struct ReductionFlagsTy {
759 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
760 // additional kinds.
761 unsigned char Kind : 6;
762 // TODO: Derive order/in-loop from plan and remove here.
763 unsigned char IsOrdered : 1;
764 unsigned char IsInLoop : 1;
765 FastMathFlagsTy FMFs;
766
767 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
768 FastMathFlags FMFs)
769 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
770 IsInLoop(IsInLoop), FMFs(FMFs) {}
771 };
772
773 OperationType OpType;
774
775 union {
776 uint8_t CmpPredStorage;
777 WrapFlagsTy WrapFlags;
778 TruncFlagsTy TruncFlags;
779 DisjointFlagsTy DisjointFlags;
780 ExactFlagsTy ExactFlags;
781 uint8_t GEPFlagsStorage;
782 NonNegFlagsTy NonNegFlags;
783 FastMathFlagsTy FMFs;
784 FCmpFlagsTy FCmpFlags;
785 ReductionFlagsTy ReductionFlags;
786 uint8_t AllFlags[2];
787 };
788
789public:
790 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
791
792 VPIRFlags(Instruction &I) : VPIRFlags() {
793 if (auto *FCmp = dyn_cast<FCmpInst>(Val: &I)) {
794 OpType = OperationType::FCmp;
795 Bitfield::set<CmpInst::PredicateField>(Packed&: FCmpFlags.CmpPredStorage,
796 Value: FCmp->getPredicate());
797 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
798 FCmpFlags.FMFs = FCmp->getFastMathFlags();
799 } else if (auto *Op = dyn_cast<CmpInst>(Val: &I)) {
800 OpType = OperationType::Cmp;
801 Bitfield::set<CmpInst::PredicateField>(Packed&: CmpPredStorage,
802 Value: Op->getPredicate());
803 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
804 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(Val: &I)) {
805 OpType = OperationType::DisjointOp;
806 DisjointFlags.IsDisjoint = Op->isDisjoint();
807 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(Val: &I)) {
808 OpType = OperationType::OverflowingBinOp;
809 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
810 } else if (auto *Op = dyn_cast<TruncInst>(Val: &I)) {
811 OpType = OperationType::Trunc;
812 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
813 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(Val: &I)) {
814 OpType = OperationType::PossiblyExactOp;
815 ExactFlags.IsExact = Op->isExact();
816 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) {
817 OpType = OperationType::GEPOp;
818 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
819 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
820 "wrap flags truncated");
821 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(Val: &I)) {
822 OpType = OperationType::NonNegOp;
823 NonNegFlags.NonNeg = PNNI->hasNonNeg();
824 } else if (auto *Op = dyn_cast<FPMathOperator>(Val: &I)) {
825 OpType = OperationType::FPMathOp;
826 FMFs = Op->getFastMathFlags();
827 }
828 }
829
830 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
831 Bitfield::set<CmpInst::PredicateField>(Packed&: CmpPredStorage, Value: Pred);
832 assert(getPredicate() == Pred && "predicate truncated");
833 }
834
835 VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
836 : OpType(OperationType::FCmp), AllFlags() {
837 Bitfield::set<CmpInst::PredicateField>(Packed&: FCmpFlags.CmpPredStorage, Value: Pred);
838 assert(getPredicate() == Pred && "predicate truncated");
839 FCmpFlags.FMFs = FMFs;
840 }
841
842 VPIRFlags(WrapFlagsTy WrapFlags)
843 : OpType(OperationType::OverflowingBinOp), AllFlags() {
844 this->WrapFlags = WrapFlags;
845 }
846
847 VPIRFlags(TruncFlagsTy TruncFlags)
848 : OpType(OperationType::Trunc), AllFlags() {
849 this->TruncFlags = TruncFlags;
850 }
851
852 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
853 this->FMFs = FMFs;
854 }
855
856 VPIRFlags(DisjointFlagsTy DisjointFlags)
857 : OpType(OperationType::DisjointOp), AllFlags() {
858 this->DisjointFlags = DisjointFlags;
859 }
860
861 VPIRFlags(NonNegFlagsTy NonNegFlags)
862 : OpType(OperationType::NonNegOp), AllFlags() {
863 this->NonNegFlags = NonNegFlags;
864 }
865
866 VPIRFlags(ExactFlagsTy ExactFlags)
867 : OpType(OperationType::PossiblyExactOp), AllFlags() {
868 this->ExactFlags = ExactFlags;
869 }
870
871 VPIRFlags(GEPNoWrapFlags GEPFlags)
872 : OpType(OperationType::GEPOp), AllFlags() {
873 GEPFlagsStorage = GEPFlags.getRaw();
874 }
875
876 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
877 : OpType(OperationType::ReductionOp), AllFlags() {
878 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
879 }
880
881 void transferFlags(VPIRFlags &Other) {
882 OpType = Other.OpType;
883 AllFlags[0] = Other.AllFlags[0];
884 AllFlags[1] = Other.AllFlags[1];
885 }
886
887 /// Only keep flags also present in \p Other. \p Other must have the same
888 /// OpType as the current object.
889 void intersectFlags(const VPIRFlags &Other);
890
891 /// Drop all poison-generating flags.
892 void dropPoisonGeneratingFlags() {
893 // NOTE: This needs to be kept in-sync with
894 // Instruction::dropPoisonGeneratingFlags.
895 switch (OpType) {
896 case OperationType::OverflowingBinOp:
897 WrapFlags.HasNUW = false;
898 WrapFlags.HasNSW = false;
899 break;
900 case OperationType::Trunc:
901 TruncFlags.HasNUW = false;
902 TruncFlags.HasNSW = false;
903 break;
904 case OperationType::DisjointOp:
905 DisjointFlags.IsDisjoint = false;
906 break;
907 case OperationType::PossiblyExactOp:
908 ExactFlags.IsExact = false;
909 break;
910 case OperationType::GEPOp:
911 GEPFlagsStorage = 0;
912 break;
913 case OperationType::FPMathOp:
914 case OperationType::FCmp:
915 case OperationType::ReductionOp:
916 getFMFsRef().NoNaNs = false;
917 getFMFsRef().NoInfs = false;
918 break;
919 case OperationType::NonNegOp:
920 NonNegFlags.NonNeg = false;
921 break;
922 case OperationType::Cmp:
923 case OperationType::Other:
924 break;
925 }
926 }
927
928 /// Apply the IR flags to \p I.
929 void applyFlags(Instruction &I) const {
930 switch (OpType) {
931 case OperationType::OverflowingBinOp:
932 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
933 I.setHasNoSignedWrap(WrapFlags.HasNSW);
934 break;
935 case OperationType::Trunc:
936 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
937 I.setHasNoSignedWrap(TruncFlags.HasNSW);
938 break;
939 case OperationType::DisjointOp:
940 cast<PossiblyDisjointInst>(Val: &I)->setIsDisjoint(DisjointFlags.IsDisjoint);
941 break;
942 case OperationType::PossiblyExactOp:
943 I.setIsExact(ExactFlags.IsExact);
944 break;
945 case OperationType::GEPOp:
946 cast<GetElementPtrInst>(Val: &I)->setNoWrapFlags(
947 GEPNoWrapFlags::fromRaw(Flags: GEPFlagsStorage));
948 break;
949 case OperationType::FPMathOp:
950 case OperationType::FCmp: {
951 const FastMathFlagsTy &F = getFMFsRef();
952 I.setHasAllowReassoc(F.AllowReassoc);
953 I.setHasNoNaNs(F.NoNaNs);
954 I.setHasNoInfs(F.NoInfs);
955 I.setHasNoSignedZeros(F.NoSignedZeros);
956 I.setHasAllowReciprocal(F.AllowReciprocal);
957 I.setHasAllowContract(F.AllowContract);
958 I.setHasApproxFunc(F.ApproxFunc);
959 break;
960 }
961 case OperationType::NonNegOp:
962 I.setNonNeg(NonNegFlags.NonNeg);
963 break;
964 case OperationType::ReductionOp:
965 llvm_unreachable("reduction ops should not use applyFlags");
966 case OperationType::Cmp:
967 case OperationType::Other:
968 break;
969 }
970 }
971
972 CmpInst::Predicate getPredicate() const {
973 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
974 "recipe doesn't have a compare predicate");
975 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
976 : CmpPredStorage;
977 return Bitfield::get<CmpInst::PredicateField>(Packed: Storage);
978 }
979
980 void setPredicate(CmpInst::Predicate Pred) {
981 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
982 "recipe doesn't have a compare predicate");
983 if (OpType == OperationType::FCmp)
984 Bitfield::set<CmpInst::PredicateField>(Packed&: FCmpFlags.CmpPredStorage, Value: Pred);
985 else
986 Bitfield::set<CmpInst::PredicateField>(Packed&: CmpPredStorage, Value: Pred);
987 assert(getPredicate() == Pred && "predicate truncated");
988 }
989
990 GEPNoWrapFlags getGEPNoWrapFlags() const {
991 return GEPNoWrapFlags::fromRaw(Flags: GEPFlagsStorage);
992 }
993
994 /// Returns true if the recipe has a comparison predicate.
995 bool hasPredicate() const {
996 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
997 }
998
999 /// Returns true if the recipe has fast-math flags.
1000 bool hasFastMathFlags() const {
1001 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
1002 OpType == OperationType::ReductionOp;
1003 }
1004
1005 LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlagsOrNone() const;
1006
1007 /// Returns true if the recipe has non-negative flag.
1008 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
1009
1010 bool isNonNeg() const {
1011 assert(OpType == OperationType::NonNegOp &&
1012 "recipe doesn't have a NNEG flag");
1013 return NonNegFlags.NonNeg;
1014 }
1015
1016 bool hasNoUnsignedWrap() const {
1017 switch (OpType) {
1018 case OperationType::OverflowingBinOp:
1019 return WrapFlags.HasNUW;
1020 case OperationType::Trunc:
1021 return TruncFlags.HasNUW;
1022 default:
1023 llvm_unreachable("recipe doesn't have a NUW flag");
1024 }
1025 }
1026
1027 bool hasNoSignedWrap() const {
1028 switch (OpType) {
1029 case OperationType::OverflowingBinOp:
1030 return WrapFlags.HasNSW;
1031 case OperationType::Trunc:
1032 return TruncFlags.HasNSW;
1033 default:
1034 llvm_unreachable("recipe doesn't have a NSW flag");
1035 }
1036 }
1037
1038 bool hasNoWrapFlags() const {
1039 switch (OpType) {
1040 case OperationType::OverflowingBinOp:
1041 case OperationType::Trunc:
1042 return true;
1043 default:
1044 return false;
1045 }
1046 }
1047
1048 WrapFlagsTy getNoWrapFlags() const {
1049 return {hasNoUnsignedWrap(), hasNoSignedWrap()};
1050 }
1051
1052 bool isDisjoint() const {
1053 assert(OpType == OperationType::DisjointOp &&
1054 "recipe cannot have a disjoing flag");
1055 return DisjointFlags.IsDisjoint;
1056 }
1057
1058 RecurKind getRecurKind() const {
1059 assert(OpType == OperationType::ReductionOp &&
1060 "recipe doesn't have reduction flags");
1061 return static_cast<RecurKind>(ReductionFlags.Kind);
1062 }
1063
1064 bool isReductionOrdered() const {
1065 assert(OpType == OperationType::ReductionOp &&
1066 "recipe doesn't have reduction flags");
1067 return ReductionFlags.IsOrdered;
1068 }
1069
1070 bool isReductionInLoop() const {
1071 assert(OpType == OperationType::ReductionOp &&
1072 "recipe doesn't have reduction flags");
1073 return ReductionFlags.IsInLoop;
1074 }
1075
1076private:
1077 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1078 FastMathFlagsTy &getFMFsRef() {
1079 if (OpType == OperationType::FCmp)
1080 return FCmpFlags.FMFs;
1081 if (OpType == OperationType::ReductionOp)
1082 return ReductionFlags.FMFs;
1083 return FMFs;
1084 }
1085 const FastMathFlagsTy &getFMFsRef() const {
1086 if (OpType == OperationType::FCmp)
1087 return FCmpFlags.FMFs;
1088 if (OpType == OperationType::ReductionOp)
1089 return ReductionFlags.FMFs;
1090 return FMFs;
1091 }
1092
1093public:
1094 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1095 /// otherwise. Opcodes not supporting default flags include compares and
1096 /// ComputeReductionResult.
1097 static VPIRFlags getDefaultFlags(unsigned Opcode);
1098
1099#if !defined(NDEBUG)
1100 /// Returns true if the set flags are valid for \p Opcode.
1101 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1102
1103 /// Returns true if \p Opcode has its required flags set.
1104 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1105#endif
1106
1107#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1108 void printFlags(raw_ostream &O) const;
1109#endif
1110};
1111LLVM_PACKED_END
1112
1113static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1114
1115/// A pure-virtual common base class for recipes defining a single VPValue and
1116/// using IR flags.
1117struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
1118 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1119 const VPIRFlags &Flags,
1120 DebugLoc DL = DebugLoc::getUnknown())
1121 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1122
1123 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1124 Type *ResultTy, const VPIRFlags &Flags,
1125 DebugLoc DL = DebugLoc::getUnknown())
1126 : VPSingleDefRecipe(SC, Operands, ResultTy, /*UV=*/nullptr, DL),
1127 VPIRFlags(Flags) {}
1128
1129 static inline bool classof(const VPRecipeBase *R) {
1130 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1131 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1132 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1133 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1134 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1135 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1136 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1137 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC ||
1138 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1139 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1140 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1141 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1142 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC ||
1143 R->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC;
1144 }
1145
1146 static inline bool classof(const VPUser *U) {
1147 auto *R = dyn_cast<VPRecipeBase>(Val: U);
1148 return R && classof(R);
1149 }
1150
1151 static inline bool classof(const VPValue *V) {
1152 auto *R = V->getDefiningRecipe();
1153 return R && classof(R);
1154 }
1155
1156 VPRecipeWithIRFlags *clone() override = 0;
1157
1158 static inline bool classof(const VPSingleDefRecipe *R) {
1159 return classof(R: static_cast<const VPRecipeBase *>(R));
1160 }
1161
1162 void execute(VPTransformState &State) override = 0;
1163
1164 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1165 InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF,
1166 VPCostContext &Ctx) const;
1167};
1168
1169/// Helper to manage IR metadata for recipes. It filters out metadata that
1170/// cannot be propagated.
1171class LLVM_ABI_FOR_TEST VPIRMetadata {
1172 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
1173
1174public:
1175 VPIRMetadata() = default;
1176
1177 /// Adds metatadata that can be preserved from the original instruction
1178 /// \p I.
1179 VPIRMetadata(Instruction &I) { getMetadataToPropagate(Inst: &I, Metadata); }
1180
1181 /// Copy constructor for cloning.
1182 VPIRMetadata(const VPIRMetadata &Other) = default;
1183
1184 VPIRMetadata &operator=(const VPIRMetadata &Other) = default;
1185
1186 /// Add all metadata to \p I.
1187 void applyMetadata(Instruction &I) const;
1188
1189 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1190 /// already exists, it will be replaced. Otherwise, it will be added.
1191 void setMetadata(unsigned Kind, MDNode *Node) {
1192 auto It =
1193 llvm::find_if(Range&: Metadata, P: [Kind](const std::pair<unsigned, MDNode *> &P) {
1194 return P.first == Kind;
1195 });
1196 if (It != Metadata.end())
1197 It->second = Node;
1198 else
1199 Metadata.emplace_back(Args&: Kind, Args&: Node);
1200 }
1201
1202 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1203 /// nodes that are common to both.
1204 void intersect(const VPIRMetadata &MD);
1205
1206 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1207 MDNode *getMetadata(unsigned Kind) const {
1208 auto It =
1209 find_if(Range: Metadata, P: [Kind](const auto &P) { return P.first == Kind; });
1210 return It != Metadata.end() ? It->second : nullptr;
1211 }
1212
1213#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1214 /// Print metadata with node IDs.
1215 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1216#endif
1217};
1218
1219/// This is a concrete Recipe that models a single VPlan-level instruction.
1220/// While as any Recipe it may generate a sequence of IR instructions when
1221/// executed, these instructions would always form a single-def expression as
1222/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1223/// opcodes can take an optional mask. Masks may be assigned during
1224/// predication.
1225class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
1226 public VPIRMetadata {
1227public:
1228 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1229 enum {
1230 FirstOrderRecurrenceSplice =
1231 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1232 // values of a first-order recurrence.
1233 Not,
1234 // Creates a mask where each lane is active (true) whilst the current
1235 // counter (first operand + index) is less than the second operand. i.e.
1236 // mask[i] = icmpt ult (op0 + i), op1
1237 // The size of the mask returned is VF * Multiplier (UF, third op).
1238 ActiveLaneMask,
1239 ExplicitVectorLength,
1240 // Represents the incoming loop-invariant alias-mask. All memory accesses
1241 // in the loop must stay within the active lanes.
1242 IncomingAliasMask,
1243 CalculateTripCountMinusVF,
1244 // Increment the canonical IV separately for each unrolled part.
1245 CanonicalIVIncrementForPart,
1246 // Abstract instruction that compares two values and branches. This is
1247 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1248 BranchOnCount,
1249 BranchOnCond,
1250 // Branch with 2 boolean condition operands and 3 successors. If condition
1251 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1252 // successor 1; otherwise branches to successor 2. Expanded after region
1253 // dissolution into: (1) an OR of the two conditions branching to
1254 // middle.split or successor 2, and (2) middle.split branching to successor
1255 // 0 or successor 1 based on condition 0.
1256 BranchOnTwoConds,
1257 Broadcast,
1258 /// Given operands of (the same) struct type, creates a struct of fixed-
1259 /// width vectors each containing a struct field of all operands. The
1260 /// number of operands matches the element count of every vector.
1261 BuildStructVector,
1262 /// Creates a fixed-width vector containing all operands. The number of
1263 /// operands matches the vector element count.
1264 BuildVector,
1265 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1266 /// abstract VPInstruction whose single defined VPValue represents VF
1267 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1268 /// VPInstructions.
1269 Unpack,
1270 /// Reduce the operands to the final reduction result using the operation
1271 /// specified via the operation's VPIRFlags.
1272 ComputeReductionResult,
1273 // Extracts the last part of its operand. Removed during unrolling.
1274 ExtractLastPart,
1275 // Extracts the last lane of its vector operand, per part.
1276 ExtractLastLane,
1277 // Extracts the second-to-last lane from its operand or the second-to-last
1278 // part if it is scalar. In the latter case, the recipe will be removed
1279 // during unrolling.
1280 ExtractPenultimateElement,
1281 LogicalAnd, // Non-poison propagating logical And.
1282 LogicalOr, // Non-poison propagating logical Or.
1283 NumActiveLanes, // Counts the number of active lanes in a mask.
1284 // Add an offset in bytes (second operand) to a base pointer (first
1285 // operand). Only generates scalar values (either for the first lane only or
1286 // for all lanes, depending on its uses).
1287 PtrAdd,
1288 // Add a vector offset in bytes (second operand) to a scalar base pointer
1289 // (first operand).
1290 WidePtrAdd,
1291 // Returns a scalar boolean value, which is true if any lane of its
1292 // (boolean) vector operands is true. It produces the reduced value across
1293 // all unrolled iterations. Unrolling will add all copies of its original
1294 // operand as additional operands. AnyOf is poison-safe as all operands
1295 // will be frozen.
1296 AnyOf,
1297 // Calculates the first active lane index of the vector predicate operands.
1298 // It produces the lane index across all unrolled iterations. Unrolling will
1299 // add all copies of its original operand as additional operands.
1300 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1301 // result even with operands that are all zeroes.
1302 FirstActiveLane,
1303 // Calculates the last active lane index of the vector predicate operands.
1304 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1305 // tail-folding to extract the correct live-out value from the last active
1306 // iteration. It produces the lane index across all unrolled iterations.
1307 // Unrolling will add all copies of its original operand as additional
1308 // operands.
1309 LastActiveLane,
1310 // Returns a reversed vector for the operand.
1311 Reverse,
1312 /// Start vector for reductions with 3 operands: the original start value,
1313 /// the identity value for the reduction and an integer indicating the
1314 /// scaling factor.
1315 ReductionStartVector,
1316 /// Extracts a single lane (first operand) from a set of vector operands.
1317 /// The lane specifies an index into a vector formed by combining all vector
1318 /// operands (all operands after the first one).
1319 ExtractLane,
1320 /// Explicit user for the resume phi of the canonical induction in the main
1321 /// VPlan, used by the epilogue vector loop.
1322 ResumeForEpilogue,
1323 /// Extracts the last active lane from a set of vectors. The first operand
1324 /// is the default value if no lanes in the masks are active. Conceptually,
1325 /// this concatenates all data vectors (odd operands), concatenates all
1326 /// masks (even operands -- ignoring the default value), and returns the
1327 /// last active value from the combined data vector using the combined mask.
1328 ExtractLastActive,
1329 /// Compute the exiting value of a wide induction after vectorization, that
1330 /// is the value of the last lane of the induction increment (i.e. its
1331 /// backedge value). Has the wide induction recipe as operand.
1332 ExitingIVValue,
1333 MaskedCond,
1334
1335 // The opcodes below are used for VPInstructionWithType.
1336 // NOTE: VPInstructionWithType classes are also used for:
1337 // 1. All CastInst variants - see createVPInstructionsForVPBB, and other
1338 // cases where createScalarCast, createScalarZExtOrTrunc and
1339 // createScalarSExtOrTrunc are invoked.
1340 // 2. Scalar load instructions - see createVPInstructionsForVPBB.
1341
1342 /// Scale the first operand (vector step) by the second operand
1343 /// (scalar-step). Casts both operands to the result type if needed.
1344 WideIVStep,
1345 // Creates a step vector starting from 0 to VF with a step of 1.
1346 StepVector,
1347 /// Returns the value for vscale.
1348 VScale,
1349
1350 OpsEnd = VScale,
1351 };
1352
1353 /// Returns true if this VPInstruction generates scalar values for all lanes.
1354 /// Most VPInstructions generate a single value per part, either vector or
1355 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1356 /// values per all lanes, stemming from an original ingredient. This method
1357 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1358 /// underlying ingredient.
1359 bool doesGeneratePerAllLanes() const;
1360
1361 /// Return the number of operands determined by the opcode of the
1362 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1363 /// cannot be determined directly by the opcode.
1364 unsigned getNumOperandsForOpcode() const;
1365
1366private:
1367 typedef unsigned char OpcodeTy;
1368 OpcodeTy Opcode;
1369
1370 /// An optional name that can be used for the generated IR instruction.
1371 std::string Name;
1372
1373 /// Returns true if we can generate a scalar for the first lane only if
1374 /// needed.
1375 bool canGenerateScalarForFirstLane() const;
1376
1377 /// Utility methods serving execute(): generates a single vector instance of
1378 /// the modeled instruction. \returns the generated value. . In some cases an
1379 /// existing value is returned rather than a generated one.
1380 Value *generate(VPTransformState &State);
1381
1382 /// Returns true if the VPInstruction does not need masking.
1383 bool alwaysUnmasked() const {
1384 if (Opcode == VPInstruction::MaskedCond)
1385 return false;
1386
1387 // For now only VPInstructions with underlying values use masks.
1388 // TODO: provide masks to VPInstructions w/o underlying values.
1389 if (!getUnderlyingValue())
1390 return true;
1391
1392 return Instruction::isCast(Opcode) || Opcode == Instruction::PHI ||
1393 Opcode == Instruction::GetElementPtr;
1394 }
1395
1396public:
1397 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1398 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1399 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "",
1400 Type *ResultTy = nullptr);
1401
1402 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1403
1404 VPInstruction *clone() override {
1405 return cloneWithOperands(NewOperands: operands(), ResultTy: getScalarType());
1406 }
1407
1408 VPInstruction *cloneWithOperands(ArrayRef<VPValue *> NewOperands,
1409 Type *ResultTy = nullptr) {
1410 auto *New = new VPInstruction(Opcode, NewOperands, *this, *this,
1411 getDebugLoc(), Name, ResultTy);
1412 if (getUnderlyingValue())
1413 New->setUnderlyingValue(getUnderlyingInstr());
1414 return New;
1415 }
1416
1417 unsigned getOpcode() const { return Opcode; }
1418
1419 /// Add \p Op as operand of this VPInstruction. Only supported for AnyOf,
1420 /// ComputeReductionResult, BuildVector, BuildStructVector, ExtractLane,
1421 /// ExtractLastActive, FirstActiveLane, LastActiveLane.
1422 void addOperand(VPValue *Op);
1423
1424 /// Generate the instruction.
1425 /// TODO: We currently execute only per-part unless a specific instance is
1426 /// provided.
1427 void execute(VPTransformState &State) override;
1428
1429 /// Return the cost of this VPInstruction.
1430 InstructionCost computeCost(ElementCount VF,
1431 VPCostContext &Ctx) const override;
1432
1433#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1434 /// Print the VPInstruction to dbgs() (for debugging).
1435 LLVM_DUMP_METHOD void dump() const;
1436#endif
1437
1438 bool hasResult() const {
1439 // CallInst may or may not have a result, depending on the called function.
1440 // Conservatively return calls have results for now.
1441 switch (getOpcode()) {
1442 case Instruction::Ret:
1443 case Instruction::UncondBr:
1444 case Instruction::CondBr:
1445 case Instruction::Store:
1446 case Instruction::Switch:
1447 case Instruction::IndirectBr:
1448 case Instruction::Resume:
1449 case Instruction::CatchRet:
1450 case Instruction::Unreachable:
1451 case Instruction::Fence:
1452 case Instruction::AtomicRMW:
1453 case VPInstruction::BranchOnCond:
1454 case VPInstruction::BranchOnTwoConds:
1455 case VPInstruction::BranchOnCount:
1456 return false;
1457 default:
1458 return true;
1459 }
1460 }
1461
1462 /// Returns true if the VPInstruction has a mask operand.
1463 bool isMasked() const {
1464 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1465 // VPInstructions without a fixed number of operands cannot be masked.
1466 if (NumOpsForOpcode == -1u)
1467 return false;
1468 return NumOpsForOpcode + 1 == getNumOperands();
1469 }
1470
1471 /// Returns the number of operands, excluding the mask if the VPInstruction is
1472 /// masked.
1473 unsigned getNumOperandsWithoutMask() const {
1474 return getNumOperands() - isMasked();
1475 }
1476
1477 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1478 void addMask(VPValue *Mask) {
1479 assert(!isMasked() && "recipe is already masked");
1480 if (alwaysUnmasked())
1481 return;
1482 assert(Mask->getScalarType()->isIntegerTy(1) &&
1483 "Mask must be an i1 (vector)");
1484 VPUser::addOperand(Operand: Mask);
1485 }
1486
1487 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1488 /// VPInstructions.
1489 VPValue *getMask() const {
1490 return isMasked() ? getOperand(N: getNumOperands() - 1) : nullptr;
1491 }
1492
1493 /// Returns an iterator range over the operands excluding the mask operand
1494 /// if present.
1495 iterator_range<operand_iterator> operandsWithoutMask() {
1496 return make_range(x: op_begin(), y: op_begin() + getNumOperandsWithoutMask());
1497 }
1498 iterator_range<const_operand_iterator> operandsWithoutMask() const {
1499 return make_range(x: op_begin(), y: op_begin() + getNumOperandsWithoutMask());
1500 }
1501
1502 /// Returns true if the underlying opcode may read from or write to memory.
1503 bool opcodeMayReadOrWriteFromMemory() const;
1504
1505 /// Returns true if the recipe only uses the first lane of operand \p Op.
1506 bool usesFirstLaneOnly(const VPValue *Op) const override;
1507
1508 /// Returns true if the recipe only uses the first part of operand \p Op.
1509 bool usesFirstPartOnly(const VPValue *Op) const override;
1510
1511 /// Returns true if this VPInstruction produces a scalar value from a vector,
1512 /// e.g. by performing a reduction or extracting a lane.
1513 bool isVectorToScalar() const;
1514
1515 /// Returns true if this VPInstruction's operands are single scalars and the
1516 /// result is also a single scalar.
1517 bool isSingleScalar() const;
1518
1519 /// Returns the symbolic name assigned to the VPInstruction.
1520 StringRef getName() const { return Name; }
1521
1522 /// Set the symbolic name for the VPInstruction.
1523 void setName(StringRef NewName) { Name = NewName.str(); }
1524
1525protected:
1526#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1527 /// Print the VPInstruction to \p O.
1528 void printRecipe(raw_ostream &O, const Twine &Indent,
1529 VPSlotTracker &SlotTracker) const override;
1530#endif
1531};
1532
1533/// A specialization of VPInstruction augmenting it with a dedicated result
1534/// type, to be used when the opcode and operands of the VPInstruction don't
1535/// directly determine the result type. Note that there is no separate recipe ID
1536/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1537/// distinguished purely by the opcode.
1538/// TODO: Merge with VPInstruction, now that VPRecipeValue provides the type.
1539class VPInstructionWithType : public VPInstruction {
1540public:
1541 VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
1542 Type *ResultTy, const VPIRFlags &Flags = {},
1543 const VPIRMetadata &Metadata = {},
1544 DebugLoc DL = DebugLoc::getUnknown(),
1545 const Twine &Name = "", Value *UV = nullptr)
1546 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name, ResultTy) {
1547 setUnderlyingValue(UV);
1548 }
1549
1550 static inline bool classof(const VPRecipeBase *R) {
1551 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1552 // type information.
1553 auto *VPI = dyn_cast<VPInstruction>(Val: R);
1554 if (!VPI)
1555 return false;
1556 unsigned Opc = VPI->getOpcode();
1557 if (Instruction::isCast(Opcode: Opc))
1558 return true;
1559 switch (Opc) {
1560 case VPInstruction::WideIVStep:
1561 case VPInstruction::StepVector:
1562 case VPInstruction::VScale:
1563 case Instruction::Load:
1564 return true;
1565 default:
1566 return false;
1567 }
1568 }
1569
1570 static inline bool classof(const VPUser *R) {
1571 return isa<VPInstructionWithType>(Val: cast<VPRecipeBase>(Val: R));
1572 }
1573
1574 VPInstruction *clone() override {
1575 auto *New =
1576 new VPInstructionWithType(getOpcode(), operands(), getResultType(),
1577 *this, *this, getDebugLoc(), getName());
1578 New->setUnderlyingValue(getUnderlyingValue());
1579 return New;
1580 }
1581
1582 void execute(VPTransformState &State) override;
1583
1584 /// Return the cost of this VPInstruction.
1585 InstructionCost computeCost(ElementCount VF,
1586 VPCostContext &Ctx) const override;
1587
1588 Type *getResultType() const { return getScalarType(); }
1589
1590 /// Cast recipes always use scalars of their operand.
1591 bool usesScalars(const VPValue *Op) const override {
1592 if (Instruction::isCast(Opcode: getOpcode()))
1593 return true;
1594 return VPInstruction::usesScalars(Op);
1595 }
1596
1597protected:
1598#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1599 /// Print the recipe.
1600 void printRecipe(raw_ostream &O, const Twine &Indent,
1601 VPSlotTracker &SlotTracker) const override;
1602#endif
1603};
1604
1605/// Helper type to provide functions to access incoming values and blocks for
1606/// phi-like recipes.
1607class VPPhiAccessors {
1608protected:
1609 /// Return a VPRecipeBase* to the current object.
1610 virtual const VPRecipeBase *getAsRecipe() const = 0;
1611
1612public:
1613 virtual ~VPPhiAccessors() = default;
1614
1615 /// Returns the incoming VPValue with index \p Idx.
1616 VPValue *getIncomingValue(unsigned Idx) const {
1617 return getAsRecipe()->getOperand(N: Idx);
1618 }
1619
1620 /// Returns the incoming block with index \p Idx.
1621 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1622
1623 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1624 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1625
1626 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1627 /// block.
1628 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1629
1630 /// Returns the number of incoming values, also number of incoming blocks.
1631 virtual unsigned getNumIncoming() const {
1632 return getAsRecipe()->getNumOperands();
1633 }
1634
1635 /// Returns an interator range over the incoming values.
1636 VPUser::const_operand_range incoming_values() const {
1637 return make_range(x: getAsRecipe()->op_begin(),
1638 y: getAsRecipe()->op_begin() + getNumIncoming());
1639 }
1640
1641 using const_incoming_blocks_range = iterator_range<mapped_iterator<
1642 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1643
1644 /// Returns an iterator range over the incoming blocks.
1645 const_incoming_blocks_range incoming_blocks() const {
1646 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1647 return getIncomingBlock(Idx);
1648 };
1649 return map_range(C: index_range(0, getNumIncoming()), F: GetBlock);
1650 }
1651
1652 /// Returns an iterator range over pairs of incoming values and corresponding
1653 /// incoming blocks.
1654 detail::zippy<llvm::detail::zip_first, VPUser::const_operand_range,
1655 const_incoming_blocks_range>
1656 incoming_values_and_blocks() const {
1657 return zip_equal(t: incoming_values(), u: incoming_blocks());
1658 }
1659
1660 /// Removes the incoming value for \p IncomingBlock, which must be a
1661 /// predecessor.
1662 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1663
1664 /// Append \p IncomingV as an incoming value to the phi-like recipe.
1665 void addIncoming(VPValue *IncomingV) {
1666 auto *R = const_cast<VPRecipeBase *>(getAsRecipe());
1667 assert((R->getNumOperands() == 0 ||
1668 IncomingV->getScalarType() == R->getOperand(0)->getScalarType()) &&
1669 "all incoming values must have the same type");
1670 R->addOperand(Operand: IncomingV);
1671 }
1672
1673#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1674 /// Print the recipe.
1675 void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1676#endif
1677};
1678
1679struct LLVM_ABI_FOR_TEST VPPhi : public VPInstruction, public VPPhiAccessors {
1680 VPPhi(ArrayRef<VPValue *> Operands, const VPIRFlags &Flags, DebugLoc DL,
1681 const Twine &Name = "", Type *ResultTy = nullptr)
1682 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name,
1683 ResultTy) {}
1684
1685 static inline bool classof(const VPUser *U) {
1686 auto *VPI = dyn_cast<VPInstruction>(Val: U);
1687 return VPI && VPI->getOpcode() == Instruction::PHI;
1688 }
1689
1690 static inline bool classof(const VPValue *V) {
1691 auto *VPI = dyn_cast<VPInstruction>(Val: V);
1692 return VPI && VPI->getOpcode() == Instruction::PHI;
1693 }
1694
1695 static inline bool classof(const VPSingleDefRecipe *SDR) {
1696 auto *VPI = dyn_cast<VPInstruction>(Val: SDR);
1697 return VPI && VPI->getOpcode() == Instruction::PHI;
1698 }
1699
1700 VPPhi *clone() override {
1701 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1702 PhiR->setUnderlyingValue(getUnderlyingValue());
1703 return PhiR;
1704 }
1705
1706 void execute(VPTransformState &State) override;
1707
1708protected:
1709#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1710 /// Print the recipe.
1711 void printRecipe(raw_ostream &O, const Twine &Indent,
1712 VPSlotTracker &SlotTracker) const override;
1713#endif
1714
1715 const VPRecipeBase *getAsRecipe() const override { return this; }
1716};
1717
1718/// A recipe to wrap on original IR instruction not to be modified during
1719/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1720/// Expect PHIs, VPIRInstructions cannot have any operands.
1721class VPIRInstruction : public VPRecipeBase {
1722 Instruction &I;
1723
1724protected:
1725 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1726 /// subclasses may need to be created, e.g. VPIRPhi.
1727 VPIRInstruction(Instruction &I)
1728 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1729
1730public:
1731 ~VPIRInstruction() override = default;
1732
1733 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1734 /// VPIRInstruction.
1735 LLVM_ABI_FOR_TEST static VPIRInstruction *create(Instruction &I);
1736
1737 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1738
1739 VPIRInstruction *clone() override {
1740 auto *R = create(I);
1741 for (auto *Op : operands())
1742 R->addOperand(Operand: Op);
1743 return R;
1744 }
1745
1746 void execute(VPTransformState &State) override;
1747
1748 /// Return the cost of this VPIRInstruction.
1749 LLVM_ABI_FOR_TEST InstructionCost
1750 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1751
1752 Instruction &getInstruction() const { return I; }
1753
1754 bool usesScalars(const VPValue *Op) const override {
1755 assert(is_contained(operands(), Op) &&
1756 "Op must be an operand of the recipe");
1757 return true;
1758 }
1759
1760 bool usesFirstPartOnly(const VPValue *Op) const override {
1761 assert(is_contained(operands(), Op) &&
1762 "Op must be an operand of the recipe");
1763 return true;
1764 }
1765
1766 bool usesFirstLaneOnly(const VPValue *Op) const override {
1767 assert(is_contained(operands(), Op) &&
1768 "Op must be an operand of the recipe");
1769 return true;
1770 }
1771
1772protected:
1773#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1774 /// Print the recipe.
1775 void printRecipe(raw_ostream &O, const Twine &Indent,
1776 VPSlotTracker &SlotTracker) const override;
1777#endif
1778};
1779
1780/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1781/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1782/// allowed, and it is used to add a new incoming value for the single
1783/// predecessor VPBB.
1784struct LLVM_ABI_FOR_TEST VPIRPhi : public VPIRInstruction,
1785 public VPPhiAccessors {
1786 VPIRPhi(PHINode &PN) : VPIRInstruction(PN) {}
1787
1788 static inline bool classof(const VPRecipeBase *U) {
1789 auto *R = dyn_cast<VPIRInstruction>(Val: U);
1790 return R && isa<PHINode>(Val: R->getInstruction());
1791 }
1792
1793 static inline bool classof(const VPUser *U) {
1794 auto *R = dyn_cast<VPRecipeBase>(Val: U);
1795 return R && classof(U: R);
1796 }
1797
1798 PHINode &getIRPhi() { return cast<PHINode>(Val&: getInstruction()); }
1799
1800 void execute(VPTransformState &State) override;
1801
1802protected:
1803#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1804 /// Print the recipe.
1805 void printRecipe(raw_ostream &O, const Twine &Indent,
1806 VPSlotTracker &SlotTracker) const override;
1807#endif
1808
1809 const VPRecipeBase *getAsRecipe() const override { return this; }
1810};
1811
1812/// VPWidenRecipe is a recipe for producing a widened instruction using the
1813/// opcode and operands of the recipe. This recipe covers most of the
1814/// traditional vectorization cases where each recipe transforms into a
1815/// vectorized version of itself.
1816class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags,
1817 public VPIRMetadata {
1818 unsigned Opcode;
1819
1820public:
1821 VPWidenRecipe(Instruction &I, ArrayRef<VPValue *> Operands,
1822 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1823 DebugLoc DL = {})
1824 : VPWidenRecipe(I.getOpcode(), Operands, Flags, Metadata, DL) {
1825 setUnderlyingValue(&I);
1826 }
1827
1828 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1829 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1830 DebugLoc DL = {})
1831 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands,
1832 computeScalarTypeForInstruction(Opcode, Operands),
1833 Flags, DL),
1834 VPIRMetadata(Metadata), Opcode(Opcode) {}
1835
1836 ~VPWidenRecipe() override = default;
1837
1838 VPWidenRecipe *clone() override { return cloneWithOperands(NewOperands: operands()); }
1839
1840 VPWidenRecipe *cloneWithOperands(ArrayRef<VPValue *> NewOperands) {
1841 if (auto *UV = getUnderlyingValue())
1842 return new VPWidenRecipe(*cast<Instruction>(Val: UV), NewOperands, *this,
1843 *this, getDebugLoc());
1844 return new VPWidenRecipe(Opcode, NewOperands, *this, *this, getDebugLoc());
1845 }
1846
1847 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1848
1849 /// Produce a widened instruction using the opcode and operands of the recipe,
1850 /// processing State.VF elements.
1851 void execute(VPTransformState &State) override;
1852
1853 /// Return the cost of this VPWidenRecipe.
1854 InstructionCost computeCost(ElementCount VF,
1855 VPCostContext &Ctx) const override;
1856
1857 unsigned getOpcode() const { return Opcode; }
1858
1859protected:
1860#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1861 /// Print the recipe.
1862 void printRecipe(raw_ostream &O, const Twine &Indent,
1863 VPSlotTracker &SlotTracker) const override;
1864#endif
1865
1866 /// Returns true if the recipe only uses the first lane of operand \p Op.
1867 bool usesFirstLaneOnly(const VPValue *Op) const override {
1868 assert(is_contained(operands(), Op) &&
1869 "Op must be an operand of the recipe");
1870 return Opcode == Instruction::Select && Op == getOperand(N: 0) &&
1871 Op->isDefinedOutsideLoopRegions();
1872 }
1873};
1874
1875/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1876/// TODO: Merge with VPWidenRecipe now that type is associated to every
1877/// VPRecipeValue.
1878class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
1879 /// Cast instruction opcode.
1880 Instruction::CastOps Opcode;
1881
1882public:
1883 VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1884 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1885 const VPIRMetadata &Metadata = {},
1886 DebugLoc DL = DebugLoc::getUnknown())
1887 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, ResultTy, Flags,
1888 DL),
1889 VPIRMetadata(Metadata), Opcode(Opcode) {
1890 assert(flagsValidForOpcode(Opcode) &&
1891 "Set flags not supported for the provided opcode");
1892 assert(hasRequiredFlagsForOpcode(Opcode) &&
1893 "Opcode requires specific flags to be set");
1894 setUnderlyingValue(CI);
1895 }
1896
1897 ~VPWidenCastRecipe() override = default;
1898
1899 VPWidenCastRecipe *clone() override {
1900 return new VPWidenCastRecipe(Opcode, getOperand(N: 0), getScalarType(),
1901 cast_or_null<CastInst>(Val: getUnderlyingValue()),
1902 *this, *this, getDebugLoc());
1903 }
1904
1905 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1906
1907 /// Produce widened copies of the cast.
1908 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1909
1910 /// Return the cost of this VPWidenCastRecipe.
1911 LLVM_ABI_FOR_TEST InstructionCost
1912 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1913
1914 Instruction::CastOps getOpcode() const { return Opcode; }
1915
1916protected:
1917#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1918 /// Print the recipe.
1919 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1920 VPSlotTracker &SlotTracker) const override;
1921#endif
1922};
1923
1924/// A recipe for widening vector intrinsics.
1925class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
1926 /// ID of the vector intrinsic to widen.
1927 Intrinsic::ID VectorIntrinsicID;
1928
1929 /// True if the intrinsic may read from memory.
1930 bool MayReadFromMemory;
1931
1932 /// True if the intrinsic may read write to memory.
1933 bool MayWriteToMemory;
1934
1935 /// True if the intrinsic may have side-effects.
1936 bool MayHaveSideEffects;
1937
1938protected:
1939 VPWidenIntrinsicRecipe(const unsigned char SC,
1940 Intrinsic::ID VectorIntrinsicID,
1941 ArrayRef<VPValue *> CallArguments, Type *Ty,
1942 const VPIRFlags &Flags = {},
1943 const VPIRMetadata &MD = {},
1944 DebugLoc DL = DebugLoc::getUnknown())
1945 : VPRecipeWithIRFlags(SC, CallArguments, Ty, Flags, DL), VPIRMetadata(MD),
1946 VectorIntrinsicID(VectorIntrinsicID) {
1947 LLVMContext &Ctx = Ty->getContext();
1948 AttributeSet Attrs = Intrinsic::getFnAttributes(C&: Ctx, id: VectorIntrinsicID);
1949 MemoryEffects ME = Attrs.getMemoryEffects();
1950 MayReadFromMemory = !ME.onlyWritesMemory();
1951 MayWriteToMemory = !ME.onlyReadsMemory();
1952 MayHaveSideEffects = MayWriteToMemory ||
1953 !Attrs.hasAttribute(Kind: Attribute::NoUnwind) ||
1954 !Attrs.hasAttribute(Kind: Attribute::WillReturn);
1955 }
1956
1957 /// Helper function to produce the widened intrinsic call.
1958 CallInst *createVectorCall(VPTransformState &State);
1959
1960public:
1961 VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
1962 ArrayRef<VPValue *> CallArguments, Type *Ty,
1963 const VPIRFlags &Flags = {},
1964 const VPIRMetadata &MD = {},
1965 DebugLoc DL = DebugLoc::getUnknown())
1966 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments, Ty,
1967 Flags, DL),
1968 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID),
1969 MayReadFromMemory(CI.mayReadFromMemory()),
1970 MayWriteToMemory(CI.mayWriteToMemory()),
1971 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1972 setUnderlyingValue(&CI);
1973 }
1974
1975 VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
1976 ArrayRef<VPValue *> CallArguments, Type *Ty,
1977 const VPIRFlags &Flags = {},
1978 const VPIRMetadata &Metadata = {},
1979 DebugLoc DL = DebugLoc::getUnknown())
1980 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenIntrinsicSC,
1981 VectorIntrinsicID, CallArguments, Ty, Flags,
1982 Metadata, DL) {}
1983
1984 ~VPWidenIntrinsicRecipe() override = default;
1985
1986 VPWidenIntrinsicRecipe *clone() override {
1987 if (Value *CI = getUnderlyingValue())
1988 return new VPWidenIntrinsicRecipe(*cast<CallInst>(Val: CI), VectorIntrinsicID,
1989 operands(), getScalarType(), *this,
1990 *this, getDebugLoc());
1991 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(),
1992 getScalarType(), *this, *this,
1993 getDebugLoc());
1994 }
1995
1996 static inline bool classof(const VPRecipeBase *R) {
1997 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1998 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC;
1999 }
2000
2001 static inline bool classof(const VPUser *U) {
2002 auto *R = dyn_cast<VPRecipeBase>(Val: U);
2003 return R && classof(R);
2004 }
2005
2006 static inline bool classof(const VPValue *V) {
2007 auto *R = V->getDefiningRecipe();
2008 return R && classof(R);
2009 }
2010
2011 static inline bool classof(const VPSingleDefRecipe *R) {
2012 return classof(R: static_cast<const VPRecipeBase *>(R));
2013 }
2014
2015 /// Produce a widened version of the vector intrinsic.
2016 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
2017
2018 /// Compute the cost of a vector intrinsic with \p ID and \p Operands.
2019 static InstructionCost computeCallCost(Intrinsic::ID ID,
2020 ArrayRef<const VPValue *> Operands,
2021 const VPRecipeWithIRFlags &R,
2022 ElementCount VF, VPCostContext &Ctx);
2023
2024 /// Return the cost of this vector intrinsic.
2025 LLVM_ABI_FOR_TEST InstructionCost
2026 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
2027
2028 /// Return the ID of the intrinsic.
2029 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
2030
2031 /// Return to name of the intrinsic as string.
2032 StringRef getIntrinsicName() const;
2033
2034 /// Returns true if the intrinsic may read from memory.
2035 bool mayReadFromMemory() const { return MayReadFromMemory; }
2036
2037 /// Returns true if the intrinsic may write to memory.
2038 bool mayWriteToMemory() const { return MayWriteToMemory; }
2039
2040 /// Returns true if the intrinsic may have side-effects.
2041 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
2042
2043 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
2044
2045protected:
2046#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2047 /// Print the recipe.
2048 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
2049 VPSlotTracker &SlotTracker) const override;
2050#endif
2051};
2052
2053/// A recipe for widening vector memory intrinsics.
2054class VPWidenMemIntrinsicRecipe final : public VPWidenIntrinsicRecipe {
2055 /// Alignment information for this memory access.
2056 Align Alignment;
2057
2058public:
2059 // TODO: support StoreInst for strided store
2060 VPWidenMemIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
2061 ArrayRef<VPValue *> CallArguments, Type *Ty,
2062 Align Alignment, const VPIRMetadata &MD = {},
2063 DebugLoc DL = DebugLoc::getUnknown())
2064 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenMemIntrinsicSC,
2065 VectorIntrinsicID, CallArguments, Ty, {}, MD,
2066 DL),
2067 Alignment(Alignment) {
2068 assert(VectorIntrinsicID == Intrinsic::experimental_vp_strided_load &&
2069 "Unexpected intrinsic");
2070 }
2071
2072 ~VPWidenMemIntrinsicRecipe() override = default;
2073
2074 VPWidenMemIntrinsicRecipe *clone() override {
2075 return new VPWidenMemIntrinsicRecipe(getVectorIntrinsicID(), operands(),
2076 getScalarType(), Alignment, *this,
2077 getDebugLoc());
2078 }
2079
2080 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenMemIntrinsicSC)
2081
2082 /// Produce a widened version of the vector memory intrinsic.
2083 void execute(VPTransformState &State) override;
2084
2085 /// Helper function for computing the cost of vector memory intrinsic.
2086 static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty,
2087 bool IsMasked, Align Alignment,
2088 VPCostContext &Ctx);
2089
2090 /// Return the cost of this vector memory intrinsic.
2091 InstructionCost computeCost(ElementCount VF,
2092 VPCostContext &Ctx) const override;
2093};
2094
2095/// A recipe for widening Call instructions using library calls.
2096class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags,
2097 public VPIRMetadata {
2098 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
2099 /// between a given VF and the chosen vectorized variant, so there will be a
2100 /// different VPlan for each VF with a valid variant.
2101 Function *Variant;
2102
2103public:
2104 VPWidenCallRecipe(Value *UV, Function *Variant,
2105 ArrayRef<VPValue *> CallArguments,
2106 const VPIRFlags &Flags = {},
2107 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
2108 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments,
2109 toScalarizedTy(Ty: Variant->getReturnType()), Flags,
2110 DL),
2111 VPIRMetadata(Metadata), Variant(Variant) {
2112 setUnderlyingValue(UV);
2113 assert(
2114 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
2115 "last operand must be the called function");
2116 assert(cast<Function>(CallArguments.back()->getLiveInIRValue())
2117 ->getReturnType() == getScalarType() &&
2118 "Scalar type must match return type of called scalar function");
2119 }
2120
2121 ~VPWidenCallRecipe() override = default;
2122
2123 VPWidenCallRecipe *clone() override {
2124 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
2125 *this, *this, getDebugLoc());
2126 }
2127
2128 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
2129
2130 /// Produce a widened version of the call instruction.
2131 void execute(VPTransformState &State) override;
2132
2133 /// Return the cost of this VPWidenCallRecipe.
2134 InstructionCost computeCost(ElementCount VF,
2135 VPCostContext &Ctx) const override;
2136
2137 /// Return the cost of widening a call using the vector function \p Variant.
2138 static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx);
2139
2140 Function *getCalledScalarFunction() const {
2141 return cast<Function>(Val: getOperand(N: getNumOperands() - 1)->getLiveInIRValue());
2142 }
2143
2144 operand_range args() { return drop_end(RangeOrContainer: operands()); }
2145 const_operand_range args() const { return drop_end(RangeOrContainer: operands()); }
2146
2147 /// Returns true if the recipe only uses the first lane of operand \p Op.
2148 bool usesFirstLaneOnly(const VPValue *Op) const override;
2149
2150protected:
2151#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2152 /// Print the recipe.
2153 void printRecipe(raw_ostream &O, const Twine &Indent,
2154 VPSlotTracker &SlotTracker) const override;
2155#endif
2156};
2157
2158/// A recipe representing a sequence of load -> update -> store as part of
2159/// a histogram operation. This means there may be aliasing between vector
2160/// lanes, which is handled by the llvm.experimental.vector.histogram family
2161/// of intrinsics. The only update operations currently supported are
2162/// 'add' and 'sub' where the other term is loop-invariant.
2163class VPHistogramRecipe : public VPRecipeBase, public VPIRMetadata {
2164 /// Opcode of the update operation, currently either add or sub.
2165 unsigned Opcode;
2166
2167public:
2168 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2169 const VPIRMetadata &Metadata = {},
2170 DebugLoc DL = DebugLoc::getUnknown())
2171 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2172 VPIRMetadata(Metadata), Opcode(Opcode) {}
2173
2174 ~VPHistogramRecipe() override = default;
2175
2176 VPHistogramRecipe *clone() override {
2177 return new VPHistogramRecipe(Opcode, operands(), *this, getDebugLoc());
2178 }
2179
2180 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2181
2182 /// Produce a vectorized histogram operation.
2183 void execute(VPTransformState &State) override;
2184
2185 /// Return the cost of this VPHistogramRecipe.
2186 InstructionCost computeCost(ElementCount VF,
2187 VPCostContext &Ctx) const override;
2188
2189 unsigned getOpcode() const { return Opcode; }
2190
2191 /// Return the mask operand if one was provided, or a null pointer if all
2192 /// lanes should be executed unconditionally.
2193 VPValue *getMask() const {
2194 return getNumOperands() == 3 ? getOperand(N: 2) : nullptr;
2195 }
2196
2197protected:
2198#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2199 /// Print the recipe
2200 void printRecipe(raw_ostream &O, const Twine &Indent,
2201 VPSlotTracker &SlotTracker) const override;
2202#endif
2203};
2204
2205/// A recipe for handling GEP instructions.
2206class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
2207 Type *SourceElementTy;
2208
2209public:
2210 VPWidenGEPRecipe(Type *SourceElementTy, ArrayRef<VPValue *> Operands,
2211 const VPIRFlags &Flags = {},
2212 DebugLoc DL = DebugLoc::getUnknown(),
2213 GetElementPtrInst *UV = nullptr)
2214 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands,
2215 Operands[0]->getScalarType(), Flags, DL),
2216 SourceElementTy(SourceElementTy) {
2217 if (UV) {
2218 setUnderlyingValue(UV);
2219 [[maybe_unused]] SmallVector<std::pair<unsigned, MDNode *>> Metadata;
2220 getMetadataToPropagate(Inst: UV, Metadata);
2221 assert(Metadata.empty() && "unexpected metadata on GEP");
2222 }
2223 }
2224
2225 ~VPWidenGEPRecipe() override = default;
2226
2227 VPWidenGEPRecipe *clone() override {
2228 return new VPWidenGEPRecipe(
2229 getSourceElementType(), operands(), *this, getDebugLoc(),
2230 cast_or_null<GetElementPtrInst>(Val: getUnderlyingValue()));
2231 }
2232
2233 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2234
2235 /// This recipe generates a GEP instruction.
2236 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2237
2238 /// Generate the gep nodes.
2239 void execute(VPTransformState &State) override;
2240
2241 Type *getSourceElementType() const { return SourceElementTy; }
2242
2243 /// Return the cost of this VPWidenGEPRecipe.
2244 InstructionCost computeCost(ElementCount VF,
2245 VPCostContext &Ctx) const override {
2246 // TODO: Compute accurate cost after retiring the legacy cost model.
2247 return 0;
2248 }
2249
2250 /// Returns true if the recipe only uses the first lane of operand \p Op.
2251 bool usesFirstLaneOnly(const VPValue *Op) const override;
2252
2253protected:
2254#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2255 /// Print the recipe.
2256 void printRecipe(raw_ostream &O, const Twine &Indent,
2257 VPSlotTracker &SlotTracker) const override;
2258#endif
2259};
2260
2261/// A recipe to compute a pointer to the last element of each part of a widened
2262/// memory access for widened memory accesses of SourceElementTy. Used for
2263/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2264/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2265/// unroller otherwise.
2266class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags {
2267 Type *SourceElementTy;
2268
2269 /// The constant stride of the pointer computed by this recipe, expressed in
2270 /// units of SourceElementTy.
2271 int64_t Stride;
2272
2273public:
2274 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2275 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2276 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2277 Ptr->getScalarType(), GEPFlags, DL),
2278 SourceElementTy(SourceElementTy), Stride(Stride) {
2279 assert(Stride < 0 && "Stride must be negative");
2280 }
2281
2282 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2283
2284 Type *getSourceElementType() const { return SourceElementTy; }
2285 int64_t getStride() const { return Stride; }
2286 VPValue *getPointer() const { return getOperand(N: 0); }
2287 VPValue *getVFValue() const { return getOperand(N: 1); }
2288 VPValue *getOffset() const {
2289 return getNumOperands() == 3 ? getOperand(N: 2) : nullptr;
2290 }
2291
2292 /// Adds the offset operand to the recipe.
2293 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2294 void materializeOffset(unsigned Part = 0);
2295
2296 /// Append \p Offset as the offset operand. The offset is an integer index
2297 /// expressed in units of SourceElementTy.
2298 void addOffset(VPValue *Offset) {
2299 assert(Offset->getScalarType()->isIntegerTy() &&
2300 "offset must be an integer index");
2301 VPUser::addOperand(Operand: Offset);
2302 }
2303
2304 void execute(VPTransformState &State) override;
2305
2306 bool usesFirstLaneOnly(const VPValue *Op) const override {
2307 assert(is_contained(operands(), Op) &&
2308 "Op must be an operand of the recipe");
2309 return true;
2310 }
2311
2312 /// Return the cost of this VPVectorPointerRecipe.
2313 InstructionCost computeCost(ElementCount VF,
2314 VPCostContext &Ctx) const override {
2315 // TODO: Compute accurate cost after retiring the legacy cost model.
2316 return 0;
2317 }
2318
2319 /// Returns true if the recipe only uses the first part of operand \p Op.
2320 bool usesFirstPartOnly(const VPValue *Op) const override {
2321 assert(is_contained(operands(), Op) &&
2322 "Op must be an operand of the recipe");
2323 assert(getNumOperands() <= 2 && "must have at most two operands");
2324 return true;
2325 }
2326
2327 VPVectorEndPointerRecipe *clone() override {
2328 auto *VEPR = new VPVectorEndPointerRecipe(
2329 getPointer(), getVFValue(), getSourceElementType(), getStride(),
2330 getGEPNoWrapFlags(), getDebugLoc());
2331 if (auto *Offset = getOffset())
2332 VEPR->addOffset(Offset);
2333 return VEPR;
2334 }
2335
2336protected:
2337#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2338 /// Print the recipe.
2339 void printRecipe(raw_ostream &O, const Twine &Indent,
2340 VPSlotTracker &SlotTracker) const override;
2341#endif
2342};
2343
2344/// A recipe to compute the pointers for widened memory accesses of \p
2345/// SourceElementTy, with the \p Stride expressed in units of \p
2346/// SourceElementTy. Unrolling adds an extra \p VFxPart operand for unrolled
2347/// parts > 0 and it produces `GEP SourceElementTy Ptr, VFxPart * Stride`.
2348class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
2349 Type *SourceElementTy;
2350
2351public:
2352 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride,
2353 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2354 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC,
2355 ArrayRef<VPValue *>({Ptr, Stride}),
2356 Ptr->getScalarType(), GEPFlags, DL),
2357 SourceElementTy(SourceElementTy) {}
2358
2359 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2360
2361 VPValue *getStride() const { return getOperand(N: 1); }
2362
2363 VPValue *getVFxPart() const {
2364 return getNumOperands() > 2 ? getOperand(N: 2) : nullptr;
2365 }
2366
2367 /// Add the per-part offset (VFxPart) used for unrolled parts > 0.
2368 void addPerPartOffset(VPValue *VFxPart) {
2369 assert(VFxPart->getScalarType()->isIntegerTy() &&
2370 "per-part offset must be an integer index");
2371 VPUser::addOperand(Operand: VFxPart);
2372 }
2373
2374 void execute(VPTransformState &State) override;
2375
2376 Type *getSourceElementType() const { return SourceElementTy; }
2377
2378 bool usesFirstLaneOnly(const VPValue *Op) const override {
2379 assert(is_contained(operands(), Op) &&
2380 "Op must be an operand of the recipe");
2381 return true;
2382 }
2383
2384 /// Returns true if the recipe only uses the first part of operand \p Op.
2385 bool usesFirstPartOnly(const VPValue *Op) const override {
2386 assert(is_contained(operands(), Op) &&
2387 "Op must be an operand of the recipe");
2388 assert(getNumOperands() <= 2 && "must have at most two operands");
2389 return true;
2390 }
2391
2392 VPVectorPointerRecipe *clone() override {
2393 auto *Clone =
2394 new VPVectorPointerRecipe(getOperand(N: 0), SourceElementTy, getStride(),
2395 getGEPNoWrapFlags(), getDebugLoc());
2396 if (auto *VFxPart = getVFxPart())
2397 Clone->addPerPartOffset(VFxPart);
2398 return Clone;
2399 }
2400
2401 /// Return the cost of this VPHeaderPHIRecipe.
2402 InstructionCost computeCost(ElementCount VF,
2403 VPCostContext &Ctx) const override {
2404 // TODO: Compute accurate cost after retiring the legacy cost model.
2405 return 0;
2406 }
2407
2408protected:
2409#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2410 /// Print the recipe.
2411 void printRecipe(raw_ostream &O, const Twine &Indent,
2412 VPSlotTracker &SlotTracker) const override;
2413#endif
2414};
2415
2416/// A pure virtual base class for all recipes modeling header phis, including
2417/// phis for first order recurrences, pointer inductions and reductions. The
2418/// start value is the first operand of the recipe and the incoming value from
2419/// the backedge is the second operand.
2420///
2421/// Inductions are modeled using the following sub-classes:
2422/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2423/// floating point inductions with arbitrary start and step values. Produces
2424/// a vector PHI per-part.
2425/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2426/// pointer induction. Produces either a vector PHI per-part or scalar values
2427/// per-lane based on the canonical induction.
2428/// * VPFirstOrderRecurrencePHIRecipe
2429/// * VPReductionPHIRecipe
2430/// * VPActiveLaneMaskPHIRecipe
2431/// * VPEVLBasedIVPHIRecipe
2432///
2433/// Note that the canonical IV is modeled as a VPRegionValue associated with
2434/// its loop region.
2435class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe,
2436 public VPPhiAccessors {
2437protected:
2438 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2439 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2440 : VPHeaderPHIRecipe(VPRecipeID, UnderlyingInstr, Start,
2441 Start->getScalarType(), DL) {}
2442
2443 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2444 VPValue *Start, Type *ResultTy, DebugLoc DL)
2445 : VPSingleDefRecipe(VPRecipeID, Start, ResultTy, UnderlyingInstr, DL) {}
2446
2447 const VPRecipeBase *getAsRecipe() const override { return this; }
2448
2449public:
2450 ~VPHeaderPHIRecipe() override = default;
2451
2452 /// Method to support type inquiry through isa, cast, and dyn_cast.
2453 static inline bool classof(const VPRecipeBase *R) {
2454 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2455 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2456 }
2457 static inline bool classof(const VPValue *V) {
2458 return isa<VPHeaderPHIRecipe>(Val: V->getDefiningRecipe());
2459 }
2460 static inline bool classof(const VPSingleDefRecipe *R) {
2461 return isa<VPHeaderPHIRecipe>(Val: static_cast<const VPRecipeBase *>(R));
2462 }
2463
2464 /// Generate the phi nodes.
2465 void execute(VPTransformState &State) override = 0;
2466
2467 /// Return the cost of this header phi recipe.
2468 InstructionCost computeCost(ElementCount VF,
2469 VPCostContext &Ctx) const override;
2470
2471 /// Returns the start value of the phi, if one is set.
2472 VPValue *getStartValue() {
2473 return getNumOperands() == 0 ? nullptr : getOperand(N: 0);
2474 }
2475 VPValue *getStartValue() const {
2476 return getNumOperands() == 0 ? nullptr : getOperand(N: 0);
2477 }
2478
2479 /// Update the start value of the recipe.
2480 void setStartValue(VPValue *V) { setOperand(I: 0, New: V); }
2481
2482 /// Returns the incoming value from the loop backedge.
2483 virtual VPValue *getBackedgeValue() {
2484 return getOperand(N: 1);
2485 }
2486
2487 /// Update the incoming value from the loop backedge.
2488 void setBackedgeValue(VPValue *V) { setOperand(I: 1, New: V); }
2489
2490 /// Add \p V as the incoming value from the loop backedge.
2491 void addBackedgeValue(VPValue *V) {
2492 assert(getNumOperands() == 1 &&
2493 "backedge value must be appended right after construction");
2494 assert(V->getScalarType() == getScalarType() &&
2495 "backedge value must have the same type as the start value");
2496 VPUser::addOperand(Operand: V);
2497 }
2498
2499protected:
2500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2501 /// Print the recipe.
2502 void printRecipe(raw_ostream &O, const Twine &Indent,
2503 VPSlotTracker &SlotTracker) const override = 0;
2504#endif
2505};
2506
2507/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2508/// VPWidenPointerInductionRecipe), providing shared functionality, including
2509/// retrieving the step value, induction descriptor and original phi node.
2510class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
2511 InductionDescriptor IndDesc;
2512
2513public:
2514 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2515 VPValue *Step, const InductionDescriptor &IndDesc,
2516 DebugLoc DL)
2517 : VPWidenInductionRecipe(Kind, IV, Start, Step, IndDesc,
2518 Start->getScalarType(), DL) {}
2519
2520 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2521 VPValue *Step, const InductionDescriptor &IndDesc,
2522 Type *ResultTy, DebugLoc DL)
2523 : VPHeaderPHIRecipe(Kind, IV, Start, ResultTy, DL), IndDesc(IndDesc) {
2524 addOperand(Operand: Step);
2525 }
2526
2527 /// After unrolling, append the splat-VF step (`VF * step`) and the value of
2528 /// the induction at the last unrolled part.
2529 void addUnrolledPartOperands(VPValue *SplatVFStep, VPValue *LastPart) {
2530 assert(LastPart->getScalarType() == getScalarType() &&
2531 "last-part value must match the induction recipe's scalar type");
2532 assert((getScalarType()->isPointerTy()
2533 ? SplatVFStep->getScalarType()->isIntegerTy()
2534 : SplatVFStep->getScalarType() == getScalarType()) &&
2535 "splat-step must match the induction type for non-pointer "
2536 "inductions, or be an integer index for pointer inductions");
2537 VPUser::addOperand(Operand: SplatVFStep);
2538 VPUser::addOperand(Operand: LastPart);
2539 }
2540
2541 static inline bool classof(const VPRecipeBase *R) {
2542 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2543 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2544 }
2545
2546 static inline bool classof(const VPValue *V) {
2547 auto *R = V->getDefiningRecipe();
2548 return R && classof(R);
2549 }
2550
2551 static inline bool classof(const VPSingleDefRecipe *R) {
2552 return classof(R: static_cast<const VPRecipeBase *>(R));
2553 }
2554
2555 void execute(VPTransformState &State) override = 0;
2556
2557 /// Returns the start value of the induction.
2558 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
2559
2560 /// Returns the step value of the induction.
2561 VPValue *getStepValue() { return getOperand(N: 1); }
2562 const VPValue *getStepValue() const { return getOperand(N: 1); }
2563
2564 /// Update the step value of the recipe.
2565 void setStepValue(VPValue *V) { setOperand(I: 1, New: V); }
2566
2567 VPValue *getVFValue() { return getOperand(N: 2); }
2568 const VPValue *getVFValue() const { return getOperand(N: 2); }
2569
2570 /// Returns the number of incoming values, also number of incoming blocks.
2571 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2572 /// incoming value, its start value.
2573 unsigned getNumIncoming() const override { return 1; }
2574
2575 /// Returns the underlying PHINode if one exists, or null otherwise.
2576 PHINode *getPHINode() const {
2577 return cast_if_present<PHINode>(Val: getUnderlyingValue());
2578 }
2579
2580 /// Returns the induction descriptor for the recipe.
2581 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2582
2583 /// Returns the SCEV predicates associated with this induction.
2584 ArrayRef<const SCEVPredicate *> getNoWrapPredicates() const {
2585 return IndDesc.getNoWrapPredicates();
2586 }
2587
2588 VPValue *getBackedgeValue() override {
2589 // TODO: All operands of base recipe must exist and be at same index in
2590 // derived recipe.
2591 llvm_unreachable(
2592 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2593 }
2594
2595 /// Returns true if the recipe only uses the first lane of operand \p Op.
2596 bool usesFirstLaneOnly(const VPValue *Op) const override {
2597 assert(is_contained(operands(), Op) &&
2598 "Op must be an operand of the recipe");
2599 // The recipe creates its own wide start value, so it only requests the
2600 // first lane of the operand.
2601 // TODO: Remove once creating the start value is modeled separately.
2602 return Op == getStartValue() || Op == getStepValue();
2603 }
2604};
2605
2606/// A recipe for handling phi nodes of integer and floating-point inductions,
2607/// producing their vector values. This is an abstract recipe and must be
2608/// converted to concrete recipes before executing.
2609class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe,
2610 public VPIRFlags {
2611 TruncInst *Trunc;
2612
2613 // If this recipe is unrolled it will have 2 additional operands.
2614 bool isUnrolled() const { return getNumOperands() == 5; }
2615
2616public:
2617 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step,
2618 VPValue *VF, const InductionDescriptor &IndDesc,
2619 const VPIRFlags &Flags, DebugLoc DL)
2620 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2621 Start, Step, IndDesc, DL),
2622 VPIRFlags(Flags), Trunc(nullptr) {
2623 addOperand(Operand: VF);
2624 }
2625
2626 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step,
2627 VPValue *VF, const InductionDescriptor &IndDesc,
2628 TruncInst *Trunc, const VPIRFlags &Flags,
2629 DebugLoc DL)
2630 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2631 Start, Step, IndDesc,
2632 Trunc ? Trunc->getType() : Start->getType(), DL),
2633 VPIRFlags(Flags), Trunc(Trunc) {
2634 addOperand(Operand: VF);
2635 SmallVector<std::pair<unsigned, MDNode *>> Metadata;
2636 if (Trunc)
2637 getMetadataToPropagate(Inst: Trunc, Metadata);
2638 assert(Metadata.empty() && "unexpected metadata on Trunc");
2639 }
2640
2641 ~VPWidenIntOrFpInductionRecipe() override = default;
2642
2643 VPWidenIntOrFpInductionRecipe *clone() override {
2644 return new VPWidenIntOrFpInductionRecipe(
2645 getPHINode(), getStartValue(), getStepValue(), getVFValue(),
2646 getInductionDescriptor(), Trunc, *this, getDebugLoc());
2647 }
2648
2649 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2650
2651 void execute(VPTransformState &State) override {
2652 llvm_unreachable("cannot execute this recipe, should be expanded via "
2653 "expandVPWidenIntOrFpInductionRecipe");
2654 }
2655
2656 /// Returns the start value of the induction.
2657 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
2658
2659 /// If the recipe has been unrolled, return the VPValue for the induction
2660 /// increment, otherwise return null.
2661 VPValue *getSplatVFValue() const {
2662 return isUnrolled() ? getOperand(N: getNumOperands() - 2) : nullptr;
2663 }
2664
2665 /// Returns the number of incoming values, also number of incoming blocks.
2666 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2667 /// incoming value, its start value.
2668 unsigned getNumIncoming() const override { return 1; }
2669
2670 /// Returns the first defined value as TruncInst, if it is one or nullptr
2671 /// otherwise.
2672 TruncInst *getTruncInst() { return Trunc; }
2673 const TruncInst *getTruncInst() const { return Trunc; }
2674
2675 /// Returns true if the induction is canonical, i.e. starting at 0 and
2676 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2677 /// same type as the canonical induction.
2678 bool isCanonical() const;
2679
2680 /// Returns the VPValue representing the value of this induction at
2681 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2682 /// take place.
2683 VPValue *getLastUnrolledPartOperand() {
2684 return isUnrolled() ? getOperand(N: getNumOperands() - 1) : this;
2685 }
2686
2687protected:
2688#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2689 /// Print the recipe.
2690 void printRecipe(raw_ostream &O, const Twine &Indent,
2691 VPSlotTracker &SlotTracker) const override;
2692#endif
2693};
2694
2695class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
2696public:
2697 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2698 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2699 /// VF*UF.
2700 VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
2701 VPValue *NumUnrolledElems,
2702 const InductionDescriptor &IndDesc, DebugLoc DL)
2703 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2704 Start, Step, IndDesc, DL) {
2705 addOperand(Operand: NumUnrolledElems);
2706 }
2707
2708 ~VPWidenPointerInductionRecipe() override = default;
2709
2710 VPWidenPointerInductionRecipe *clone() override {
2711 return new VPWidenPointerInductionRecipe(
2712 cast<PHINode>(Val: getUnderlyingInstr()), getOperand(N: 0), getOperand(N: 1),
2713 getOperand(N: 2), getInductionDescriptor(), getDebugLoc());
2714 }
2715
2716 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2717
2718 /// Generate vector values for the pointer induction.
2719 void execute(VPTransformState &State) override {
2720 llvm_unreachable("cannot execute this recipe, should be expanded via "
2721 "expandVPWidenPointerInduction");
2722 };
2723
2724 /// Returns true if only scalar values will be generated.
2725 bool onlyScalarsGenerated(bool IsScalable);
2726
2727protected:
2728#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2729 /// Print the recipe.
2730 void printRecipe(raw_ostream &O, const Twine &Indent,
2731 VPSlotTracker &SlotTracker) const override;
2732#endif
2733};
2734
2735/// A recipe for widened phis. Incoming values are operands of the recipe and
2736/// their operand index corresponds to the incoming predecessor block. If the
2737/// recipe is placed in an entry block to a (non-replicate) region, it must have
2738/// exactly 2 incoming values, the first from the predecessor of the region and
2739/// the second from the exiting block of the region.
2740class LLVM_ABI_FOR_TEST VPWidenPHIRecipe : public VPSingleDefRecipe,
2741 public VPPhiAccessors {
2742 /// Name to use for the generated IR instruction for the widened phi.
2743 std::string Name;
2744
2745public:
2746 /// Create a new VPWidenPHIRecipe with incoming values \p IncomingValues,
2747 /// debug location \p DL and \p Name.
2748 VPWidenPHIRecipe(ArrayRef<VPValue *> IncomingValues,
2749 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2750 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, IncomingValues,
2751 IncomingValues[0]->getScalarType(),
2752 /*UV=*/nullptr, DL),
2753 Name(Name.str()) {
2754 assert(all_of(IncomingValues,
2755 [this](VPValue *VPV) {
2756 return VPV->getScalarType() == getScalarType();
2757 }) &&
2758 "all incoming values must have the same type");
2759 }
2760
2761 VPWidenPHIRecipe *clone() override {
2762 return new VPWidenPHIRecipe(operands(), getDebugLoc(), Name);
2763 }
2764
2765 ~VPWidenPHIRecipe() override = default;
2766
2767 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2768
2769 /// Generate the phi/select nodes.
2770 void execute(VPTransformState &State) override;
2771
2772 /// Return the cost of this VPWidenPHIRecipe.
2773 InstructionCost computeCost(ElementCount VF,
2774 VPCostContext &Ctx) const override;
2775
2776protected:
2777#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2778 /// Print the recipe.
2779 void printRecipe(raw_ostream &O, const Twine &Indent,
2780 VPSlotTracker &SlotTracker) const override;
2781#endif
2782
2783 const VPRecipeBase *getAsRecipe() const override { return this; }
2784};
2785
2786/// A recipe for handling first-order recurrence phis. The start value is the
2787/// first operand of the recipe and the incoming value from the backedge is the
2788/// second operand.
2789struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
2790 VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start,
2791 VPValue &BackedgeValue)
2792 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2793 &Start) {
2794 addOperand(Operand: &BackedgeValue);
2795 }
2796
2797 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2798
2799 VPFirstOrderRecurrencePHIRecipe *clone() override {
2800 return new VPFirstOrderRecurrencePHIRecipe(
2801 cast<PHINode>(Val: getUnderlyingInstr()), *getOperand(N: 0), *getOperand(N: 1));
2802 }
2803
2804 void execute(VPTransformState &State) override;
2805
2806 /// Return the cost of this first-order recurrence phi recipe.
2807 InstructionCost computeCost(ElementCount VF,
2808 VPCostContext &Ctx) const override;
2809
2810 /// Returns true if the recipe only uses the first lane of operand \p Op.
2811 bool usesFirstLaneOnly(const VPValue *Op) const override {
2812 assert(is_contained(operands(), Op) &&
2813 "Op must be an operand of the recipe");
2814 return Op == getStartValue();
2815 }
2816
2817protected:
2818#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2819 /// Print the recipe.
2820 void printRecipe(raw_ostream &O, const Twine &Indent,
2821 VPSlotTracker &SlotTracker) const override;
2822#endif
2823};
2824
2825/// Possible variants of a reduction.
2826
2827/// This reduction is ordered and in-loop.
2828struct RdxOrdered {};
2829/// This reduction is in-loop.
2830struct RdxInLoop {};
2831/// This reduction is unordered with the partial result scaled down by some
2832/// factor.
2833struct RdxUnordered {
2834 unsigned VFScaleFactor;
2835};
2836using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2837
2838inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2839 unsigned ScaleFactor) {
2840 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2841 if (Ordered)
2842 return RdxOrdered{};
2843 if (InLoop)
2844 return RdxInLoop{};
2845 return RdxUnordered{/*VFScaleFactor=*/.VFScaleFactor: ScaleFactor};
2846}
2847
2848/// A recipe for handling reduction phis. The start value is the first operand
2849/// of the recipe and the incoming value from the backedge is the second
2850/// operand.
2851class VPReductionPHIRecipe : public VPHeaderPHIRecipe, public VPIRFlags {
2852 /// The recurrence kind of the reduction.
2853 const RecurKind Kind;
2854
2855 ReductionStyle Style;
2856
2857 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2858 /// patterns for argmin/argmax).
2859 /// TODO: Also support cases where the phi itself has a single use, but its
2860 /// compare has multiple uses.
2861 bool HasUsesOutsideReductionChain;
2862
2863public:
2864 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2865 VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start,
2866 VPValue &BackedgeValue, ReductionStyle Style,
2867 const VPIRFlags &Flags,
2868 bool HasUsesOutsideReductionChain = false)
2869 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2870 VPIRFlags(Flags), Kind(Kind), Style(Style),
2871 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2872 addOperand(Operand: &BackedgeValue);
2873 }
2874
2875 ~VPReductionPHIRecipe() override = default;
2876
2877 VPReductionPHIRecipe *cloneWithOperands(VPValue *Start,
2878 VPValue *BackedgeValue) {
2879 return new VPReductionPHIRecipe(
2880 dyn_cast_or_null<PHINode>(Val: getUnderlyingValue()), getRecurrenceKind(),
2881 *Start, *BackedgeValue, Style, *this, HasUsesOutsideReductionChain);
2882 }
2883
2884 VPReductionPHIRecipe *clone() override {
2885 return cloneWithOperands(Start: getOperand(N: 0), BackedgeValue: getBackedgeValue());
2886 }
2887
2888 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2889
2890 /// Generate the phi/select nodes.
2891 void execute(VPTransformState &State) override;
2892
2893 /// Get the factor that the VF of this recipe's output should be scaled by, or
2894 /// 1 if it isn't scaled.
2895 unsigned getVFScaleFactor() const {
2896 auto *Partial = std::get_if<RdxUnordered>(ptr: &Style);
2897 return Partial ? Partial->VFScaleFactor : 1;
2898 }
2899
2900 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2901 /// > 1.
2902 void setVFScaleFactor(unsigned ScaleFactor) {
2903 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2904 Style = RdxUnordered{.VFScaleFactor: ScaleFactor};
2905 }
2906
2907 /// Returns the recurrence kind of the reduction.
2908 RecurKind getRecurrenceKind() const { return Kind; }
2909
2910 /// Returns true, if the phi is part of an ordered reduction.
2911 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(v: Style); }
2912
2913 /// Returns true if the phi is part of an in-loop reduction.
2914 bool isInLoop() const {
2915 return std::holds_alternative<RdxInLoop>(v: Style) ||
2916 std::holds_alternative<RdxOrdered>(v: Style);
2917 }
2918
2919 /// Returns true if the reduction outputs a vector with a scaled down VF.
2920 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2921
2922 /// Returns true, if the phi is part of a multi-use reduction.
2923 bool hasUsesOutsideReductionChain() const {
2924 return HasUsesOutsideReductionChain;
2925 }
2926
2927 /// Returns true if the recipe only uses the first lane of operand \p Op.
2928 bool usesFirstLaneOnly(const VPValue *Op) const override {
2929 assert(is_contained(operands(), Op) &&
2930 "Op must be an operand of the recipe");
2931 return isOrdered() || isInLoop();
2932 }
2933
2934protected:
2935#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2936 /// Print the recipe.
2937 void printRecipe(raw_ostream &O, const Twine &Indent,
2938 VPSlotTracker &SlotTracker) const override;
2939#endif
2940};
2941
2942/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2943/// instructions.
2944class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPRecipeWithIRFlags {
2945public:
2946 /// The blend operation is a User of the incoming values and of their
2947 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2948 /// be omitted (implied by passing an odd number of operands) in which case
2949 /// all other incoming values are merged into it.
2950 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands,
2951 const VPIRFlags &Flags, DebugLoc DL)
2952 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands,
2953 Operands[0]->getScalarType(), Flags, DL) {
2954 assert(Operands.size() >= 2 && "Expected at least two operands!");
2955 assert(all_of(seq<unsigned>(0, getNumIncomingValues()),
2956 [this](unsigned I) {
2957 return getIncomingValue(I)->getScalarType() ==
2958 getScalarType();
2959 }) &&
2960 "all incoming values must have the same type");
2961 assert(all_of(seq<unsigned>(isNormalized(), getNumIncomingValues()),
2962 [this](unsigned I) {
2963 return getMask(I)->getScalarType()->isIntegerTy(1);
2964 }) &&
2965 "masks must be a bool");
2966 setUnderlyingValue(Phi);
2967 }
2968
2969 VPBlendRecipe *clone() override { return cloneWithOperands(NewOperands: operands()); }
2970
2971 VPBlendRecipe *cloneWithOperands(ArrayRef<VPValue *> NewOperands) {
2972 return new VPBlendRecipe(cast_or_null<PHINode>(Val: getUnderlyingValue()),
2973 NewOperands, *this, getDebugLoc());
2974 }
2975
2976 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2977
2978 /// A normalized blend is one that has an odd number of operands, whereby the
2979 /// first operand does not have an associated mask.
2980 bool isNormalized() const { return getNumOperands() % 2; }
2981
2982 /// Return the number of incoming values, taking into account when normalized
2983 /// the first incoming value will have no mask.
2984 unsigned getNumIncomingValues() const {
2985 return (getNumOperands() + isNormalized()) / 2;
2986 }
2987
2988 /// Return incoming value number \p Idx.
2989 VPValue *getIncomingValue(unsigned Idx) const {
2990 return Idx == 0 ? getOperand(N: 0) : getOperand(N: Idx * 2 - isNormalized());
2991 }
2992
2993 /// Return mask number \p Idx.
2994 VPValue *getMask(unsigned Idx) const {
2995 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2996 return Idx == 0 ? getOperand(N: 1) : getOperand(N: Idx * 2 + !isNormalized());
2997 }
2998
2999 /// Set mask number \p Idx to \p V.
3000 void setMask(unsigned Idx, VPValue *V) {
3001 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
3002 assert(V->getScalarType()->isIntegerTy(1) && "Mask must be an i1 (vector)");
3003 Idx == 0 ? setOperand(I: 1, New: V) : setOperand(I: Idx * 2 + !isNormalized(), New: V);
3004 }
3005
3006 void execute(VPTransformState &State) override {
3007 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
3008 }
3009
3010 /// Return the cost of this VPWidenMemoryRecipe.
3011 InstructionCost computeCost(ElementCount VF,
3012 VPCostContext &Ctx) const override;
3013
3014 /// Returns true if the recipe only uses the first lane of operand \p Op.
3015 bool usesFirstLaneOnly(const VPValue *Op) const override;
3016
3017protected:
3018#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3019 /// Print the recipe.
3020 void printRecipe(raw_ostream &O, const Twine &Indent,
3021 VPSlotTracker &SlotTracker) const override;
3022#endif
3023};
3024
3025/// A common base class for interleaved memory operations.
3026/// An Interleaved memory operation is a memory access method that combines
3027/// multiple strided loads/stores into a single wide load/store with shuffles.
3028/// The first operand is the start address. The optional operands are, in order,
3029/// the stored values and the mask.
3030class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase,
3031 public VPIRMetadata {
3032 const InterleaveGroup<Instruction> *IG;
3033
3034 /// Indicates if the interleave group is in a conditional block and requires a
3035 /// mask.
3036 bool HasMask = false;
3037
3038 /// Indicates if gaps between members of the group need to be masked out or if
3039 /// unusued gaps can be loaded speculatively.
3040 bool NeedsMaskForGaps = false;
3041
3042protected:
3043 VPInterleaveBase(const unsigned char SC,
3044 const InterleaveGroup<Instruction> *IG,
3045 ArrayRef<VPValue *> Operands,
3046 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
3047 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
3048 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
3049 NeedsMaskForGaps(NeedsMaskForGaps) {
3050 // TODO: extend the masked interleaved-group support to reversed access.
3051 assert((!Mask || !IG->isReverse()) &&
3052 "Reversed masked interleave-group not supported.");
3053 if (StoredValues.empty()) {
3054 for (Instruction *Inst : IG->members()) {
3055 assert(!Inst->getType()->isVoidTy() && "must have result");
3056 new VPMultiDefValue(this, Inst, Inst->getType());
3057 }
3058 } else {
3059 for (auto *SV : StoredValues)
3060 addOperand(Operand: SV);
3061 }
3062 if (Mask) {
3063 HasMask = true;
3064 addOperand(Operand: Mask);
3065 }
3066 }
3067
3068public:
3069 VPInterleaveBase *clone() override = 0;
3070
3071 static inline bool classof(const VPRecipeBase *R) {
3072 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
3073 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
3074 }
3075
3076 static inline bool classof(const VPUser *U) {
3077 auto *R = dyn_cast<VPRecipeBase>(Val: U);
3078 return R && classof(R);
3079 }
3080
3081 /// Return the address accessed by this recipe.
3082 VPValue *getAddr() const {
3083 return getOperand(N: 0); // Address is the 1st, mandatory operand.
3084 }
3085
3086 /// Return the mask used by this recipe. Note that a full mask is represented
3087 /// by a nullptr.
3088 VPValue *getMask() const {
3089 // Mask is optional and the last operand.
3090 return HasMask ? getOperand(N: getNumOperands() - 1) : nullptr;
3091 }
3092
3093 /// Return true if the access needs a mask because of the gaps.
3094 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
3095
3096 const InterleaveGroup<Instruction> *getInterleaveGroup() const { return IG; }
3097
3098 Instruction *getInsertPos() const { return IG->getInsertPos(); }
3099
3100 void execute(VPTransformState &State) override {
3101 llvm_unreachable("VPInterleaveBase should not be instantiated.");
3102 }
3103
3104 /// Return the cost of this recipe.
3105 InstructionCost computeCost(ElementCount VF,
3106 VPCostContext &Ctx) const override;
3107
3108 /// Returns true if the recipe only uses the first lane of operand \p Op.
3109 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
3110
3111 /// Returns the number of stored operands of this interleave group. Returns 0
3112 /// for load interleave groups.
3113 virtual unsigned getNumStoreOperands() const = 0;
3114
3115 /// Return the VPValues stored by this interleave group. If it is a load
3116 /// interleave group, return an empty ArrayRef.
3117 ArrayRef<VPValue *> getStoredValues() const {
3118 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
3119 getNumStoreOperands()};
3120 }
3121};
3122
3123/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
3124/// or stores into one wide load/store and shuffles. The first operand of a
3125/// VPInterleave recipe is the address, followed by the stored values, followed
3126/// by an optional mask.
3127class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase {
3128public:
3129 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
3130 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
3131 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
3132 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
3133 Mask, NeedsMaskForGaps, MD, DL) {}
3134
3135 ~VPInterleaveRecipe() override = default;
3136
3137 VPInterleaveRecipe *clone() override {
3138 return new VPInterleaveRecipe(getInterleaveGroup(), getAddr(),
3139 getStoredValues(), getMask(),
3140 needsMaskForGaps(), *this, getDebugLoc());
3141 }
3142
3143 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
3144
3145 /// Generate the wide load or store, and shuffles.
3146 void execute(VPTransformState &State) override;
3147
3148 bool usesFirstLaneOnly(const VPValue *Op) const override {
3149 assert(is_contained(operands(), Op) &&
3150 "Op must be an operand of the recipe");
3151 return Op == getAddr() && !llvm::is_contained(Range: getStoredValues(), Element: Op);
3152 }
3153
3154 unsigned getNumStoreOperands() const override {
3155 return getNumOperands() - (getMask() ? 2 : 1);
3156 }
3157
3158protected:
3159#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3160 /// Print the recipe.
3161 void printRecipe(raw_ostream &O, const Twine &Indent,
3162 VPSlotTracker &SlotTracker) const override;
3163#endif
3164};
3165
3166/// A recipe for interleaved memory operations with vector-predication
3167/// intrinsics. The first operand is the address, the second operand is the
3168/// explicit vector length. Stored values and mask are optional operands.
3169class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
3170public:
3171 VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
3172 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
3173 R.getInterleaveGroup(), {R.getAddr(), &EVL},
3174 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
3175 R.getDebugLoc()) {
3176 assert(!getInterleaveGroup()->isReverse() &&
3177 "Reversed interleave-group with tail folding is not supported.");
3178 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
3179 "supported for scalable vector.");
3180 }
3181
3182 ~VPInterleaveEVLRecipe() override = default;
3183
3184 VPInterleaveEVLRecipe *clone() override {
3185 llvm_unreachable("cloning not implemented yet");
3186 }
3187
3188 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3189
3190 /// The VPValue of the explicit vector length.
3191 VPValue *getEVL() const { return getOperand(N: 1); }
3192
3193 /// Generate the wide load or store, and shuffles.
3194 void execute(VPTransformState &State) override;
3195
3196 /// The recipe only uses the first lane of the address, and EVL operand.
3197 bool usesFirstLaneOnly(const VPValue *Op) const override {
3198 assert(is_contained(operands(), Op) &&
3199 "Op must be an operand of the recipe");
3200 return (Op == getAddr() && !llvm::is_contained(Range: getStoredValues(), Element: Op)) ||
3201 Op == getEVL();
3202 }
3203
3204 unsigned getNumStoreOperands() const override {
3205 return getNumOperands() - (getMask() ? 3 : 2);
3206 }
3207
3208protected:
3209#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3210 /// Print the recipe.
3211 void printRecipe(raw_ostream &O, const Twine &Indent,
3212 VPSlotTracker &SlotTracker) const override;
3213#endif
3214};
3215
3216/// A recipe to represent inloop, ordered or partial reduction operations. It
3217/// performs a reduction on a vector operand into a scalar (vector in the case
3218/// of a partial reduction) value, and adds the result to a chain. The Operands
3219/// are {ChainOp, VecOp, [Condition]}.
3220class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
3221
3222 /// The recurrence kind for the reduction in question.
3223 RecurKind RdxKind;
3224 /// Whether the reduction is conditional.
3225 bool IsConditional = false;
3226 ReductionStyle Style;
3227
3228protected:
3229 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3230 FastMathFlags FMFs, Instruction *I,
3231 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3232 ReductionStyle Style, DebugLoc DL)
3233 : VPRecipeWithIRFlags(SC, Operands, Operands[0]->getScalarType(), FMFs,
3234 DL),
3235 RdxKind(RdxKind), Style(Style) {
3236 assert(all_of(Operands,
3237 [this](VPValue *VPV) {
3238 return VPV->getScalarType() == getScalarType() ||
3239 (isa<VPInstruction>(VPV) &&
3240 cast<VPInstruction>(VPV)->getOpcode() ==
3241 VPInstruction::ExplicitVectorLength);
3242 }) &&
3243 "all incoming values must have the same type");
3244 if (CondOp) {
3245 assert(CondOp->getScalarType()->isIntegerTy(1) &&
3246 "CondOp must be a bool");
3247 IsConditional = true;
3248 addOperand(Operand: CondOp);
3249 }
3250 setUnderlyingValue(I);
3251 }
3252
3253public:
3254 VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
3255 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3256 ReductionStyle Style, DebugLoc DL = DebugLoc::getUnknown())
3257 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3258 {ChainOp, VecOp}, CondOp, Style, DL) {}
3259
3260 VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs,
3261 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3262 ReductionStyle Style, DebugLoc DL = DebugLoc::getUnknown())
3263 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3264 {ChainOp, VecOp}, CondOp, Style, DL) {}
3265
3266 ~VPReductionRecipe() override = default;
3267
3268 VPReductionRecipe *clone() override {
3269 return new VPReductionRecipe(RdxKind, getFastMathFlagsOrNone(),
3270 getUnderlyingInstr(), getChainOp(), getVecOp(),
3271 getCondOp(), Style, getDebugLoc());
3272 }
3273
3274 static inline bool classof(const VPRecipeBase *R) {
3275 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3276 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3277 }
3278
3279 static inline bool classof(const VPUser *U) {
3280 auto *R = dyn_cast<VPRecipeBase>(Val: U);
3281 return R && classof(R);
3282 }
3283
3284 static inline bool classof(const VPValue *VPV) {
3285 const VPRecipeBase *R = VPV->getDefiningRecipe();
3286 return R && classof(R);
3287 }
3288
3289 static inline bool classof(const VPSingleDefRecipe *R) {
3290 return classof(R: static_cast<const VPRecipeBase *>(R));
3291 }
3292
3293 /// Generate the reduction in the loop.
3294 void execute(VPTransformState &State) override;
3295
3296 /// Return the cost of VPReductionRecipe.
3297 InstructionCost computeCost(ElementCount VF,
3298 VPCostContext &Ctx) const override;
3299
3300 /// Return the recurrence kind for the in-loop reduction.
3301 RecurKind getRecurrenceKind() const { return RdxKind; }
3302 /// Return true if the in-loop reduction is ordered.
3303 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(v: Style); };
3304 /// Return true if the in-loop reduction is conditional.
3305 bool isConditional() const { return IsConditional; };
3306 /// Returns true if the reduction outputs a vector with a scaled down VF.
3307 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3308 /// Returns true if the reduction is in-loop.
3309 bool isInLoop() const {
3310 return std::holds_alternative<RdxInLoop>(v: Style) ||
3311 std::holds_alternative<RdxOrdered>(v: Style);
3312 }
3313 /// The VPValue of the scalar Chain being accumulated.
3314 VPValue *getChainOp() const { return getOperand(N: 0); }
3315 /// The VPValue of the vector value to be reduced.
3316 VPValue *getVecOp() const { return getOperand(N: 1); }
3317 /// The VPValue of the condition for the block.
3318 VPValue *getCondOp() const {
3319 return isConditional() ? getOperand(N: getNumOperands() - 1) : nullptr;
3320 }
3321 /// Get the factor that the VF of this recipe's output should be scaled by, or
3322 /// 1 if it isn't scaled.
3323 unsigned getVFScaleFactor() const {
3324 auto *Partial = std::get_if<RdxUnordered>(ptr: &Style);
3325 return Partial ? Partial->VFScaleFactor : 1;
3326 }
3327
3328protected:
3329#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3330 /// Print the recipe.
3331 void printRecipe(raw_ostream &O, const Twine &Indent,
3332 VPSlotTracker &SlotTracker) const override;
3333#endif
3334};
3335
3336/// A recipe to represent inloop reduction operations with vector-predication
3337/// intrinsics, performing a reduction on a vector operand with the explicit
3338/// vector length (EVL) into a scalar value, and adding the result to a chain.
3339/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3340class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe {
3341public:
3342 VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp,
3343 DebugLoc DL = DebugLoc::getUnknown())
3344 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3345 R.getFastMathFlagsOrNone(),
3346 cast_or_null<Instruction>(Val: R.getUnderlyingValue()),
3347 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3348 getReductionStyle(/*InLoop=*/InLoop: true, Ordered: R.isOrdered(), ScaleFactor: 1),
3349 DL) {}
3350
3351 ~VPReductionEVLRecipe() override = default;
3352
3353 VPReductionEVLRecipe *clone() override {
3354 llvm_unreachable("cloning not implemented yet");
3355 }
3356
3357 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3358
3359 /// Generate the reduction in the loop
3360 void execute(VPTransformState &State) override;
3361
3362 /// The VPValue of the explicit vector length.
3363 VPValue *getEVL() const { return getOperand(N: 2); }
3364
3365 /// Returns true if the recipe only uses the first lane of operand \p Op.
3366 bool usesFirstLaneOnly(const VPValue *Op) const override {
3367 assert(is_contained(operands(), Op) &&
3368 "Op must be an operand of the recipe");
3369 return Op == getEVL();
3370 }
3371
3372protected:
3373#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3374 /// Print the recipe.
3375 void printRecipe(raw_ostream &O, const Twine &Indent,
3376 VPSlotTracker &SlotTracker) const override;
3377#endif
3378};
3379
3380/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3381/// copies of the original scalar type, one per lane, instead of producing a
3382/// single copy of widened type for all lanes. If the instruction is known to be
3383/// a single scalar, only one copy will be generated.
3384class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags,
3385 public VPIRMetadata {
3386 /// Indicator if only a single replica per lane is needed.
3387 bool IsSingleScalar;
3388
3389 /// Indicator if the replicas are also predicated.
3390 bool IsPredicated;
3391
3392public:
3393 VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands,
3394 bool IsSingleScalar, VPValue *Mask = nullptr,
3395 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3396 DebugLoc DL = DebugLoc::getUnknown())
3397 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands,
3398 computeScalarType(I, Operands), Flags, DL),
3399 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3400 IsPredicated(Mask) {
3401 assert((!IsSingleScalar || !I->isCast()) &&
3402 "single-scalar casts should use VPInstructionWithType");
3403 setUnderlyingValue(I);
3404 if (Mask)
3405 addOperand(Operand: Mask);
3406 }
3407
3408 ~VPReplicateRecipe() override = default;
3409
3410 /// Compute the scalar result type for a VPReplicateRecipe wrapping \p I with
3411 /// \p Operands (excluding any predicate mask).
3412 static Type *computeScalarType(const Instruction *I,
3413 ArrayRef<VPValue *> Operands);
3414
3415 VPReplicateRecipe *clone() override { return cloneWithOperands(NewOperands: operands()); }
3416
3417 VPReplicateRecipe *cloneWithOperands(ArrayRef<VPValue *> NewOperands) {
3418 auto *Copy = new VPReplicateRecipe(
3419 getUnderlyingInstr(), NewOperands, IsSingleScalar,
3420 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3421 Copy->transferFlags(Other&: *this);
3422 return Copy;
3423 }
3424
3425 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3426
3427 /// Generate replicas of the desired Ingredient. Replicas will be generated
3428 /// for all parts and lanes unless a specific part and lane are specified in
3429 /// the \p State.
3430 void execute(VPTransformState &State) override;
3431
3432 /// Return the cost of this VPReplicateRecipe.
3433 InstructionCost computeCost(ElementCount VF,
3434 VPCostContext &Ctx) const override;
3435
3436 /// Return the cost of scalarizing a call to \p CalledFn with argument
3437 /// operands \p ArgOps for a given \p VF.
3438 static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy,
3439 ArrayRef<const VPValue *> ArgOps,
3440 bool IsSingleScalar, ElementCount VF,
3441 VPCostContext &Ctx);
3442
3443 bool isSingleScalar() const { return IsSingleScalar; }
3444
3445 bool isPredicated() const { return IsPredicated; }
3446
3447 /// Returns true if the recipe only uses the first lane of operand \p Op.
3448 bool usesFirstLaneOnly(const VPValue *Op) const override {
3449 assert(is_contained(operands(), Op) &&
3450 "Op must be an operand of the recipe");
3451 return isSingleScalar();
3452 }
3453
3454 /// Returns true if the recipe uses scalars of operand \p Op.
3455 bool usesScalars(const VPValue *Op) const override {
3456 assert(is_contained(operands(), Op) &&
3457 "Op must be an operand of the recipe");
3458 return true;
3459 }
3460
3461 /// Return the mask of a predicated VPReplicateRecipe.
3462 VPValue *getMask() {
3463 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3464 return getOperand(N: getNumOperands() - 1);
3465 }
3466
3467 /// Return the recipe's operands, excluding the mask of a predicated recipe.
3468 operand_range operandsWithoutMask() {
3469 return isPredicated() ? drop_end(RangeOrContainer: operands()) : operands();
3470 }
3471
3472 /// Returns the number of operands, excluding the mask if the recipe is
3473 /// predicated.
3474 unsigned getNumOperandsWithoutMask() const {
3475 return getNumOperands() - isPredicated();
3476 }
3477
3478 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3479
3480protected:
3481#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3482 /// Print the recipe.
3483 void printRecipe(raw_ostream &O, const Twine &Indent,
3484 VPSlotTracker &SlotTracker) const override;
3485#endif
3486};
3487
3488/// A recipe for generating conditional branches on the bits of a mask.
3489class LLVM_ABI_FOR_TEST VPBranchOnMaskRecipe : public VPRecipeBase {
3490public:
3491 VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
3492 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3493
3494 VPBranchOnMaskRecipe *clone() override {
3495 return new VPBranchOnMaskRecipe(getOperand(N: 0), getDebugLoc());
3496 }
3497
3498 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3499
3500 /// Generate the extraction of the appropriate bit from the block mask and the
3501 /// conditional branch.
3502 void execute(VPTransformState &State) override;
3503
3504 /// Return the cost of this VPBranchOnMaskRecipe.
3505 InstructionCost computeCost(ElementCount VF,
3506 VPCostContext &Ctx) const override;
3507
3508#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3509 /// Print the recipe.
3510 void printRecipe(raw_ostream &O, const Twine &Indent,
3511 VPSlotTracker &SlotTracker) const override {
3512 O << Indent << "BRANCH-ON-MASK ";
3513 printOperands(O, SlotTracker);
3514 }
3515#endif
3516
3517 /// Returns true if the recipe uses scalars of operand \p Op.
3518 bool usesScalars(const VPValue *Op) const override {
3519 assert(is_contained(operands(), Op) &&
3520 "Op must be an operand of the recipe");
3521 return true;
3522 }
3523};
3524
3525/// A recipe to combine multiple recipes into a single 'expression' recipe,
3526/// which should be considered a single entity for cost-modeling and transforms.
3527/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3528/// expression recipes, before execute. The individual expression recipes are
3529/// completely disconnected from the def-use graph of other recipes not part of
3530/// the expression. Def-use edges between pairs of expression recipes remain
3531/// intact, whereas every edge between an expression recipe and a recipe outside
3532/// the expression is elevated to connect the non-expression recipe with the
3533/// VPExpressionRecipe itself.
3534class VPExpressionRecipe : public VPSingleDefRecipe {
3535 /// Recipes included in this VPExpressionRecipe. This could contain
3536 /// duplicates.
3537 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3538
3539 /// Temporary VPValues used for external operands of the expression, i.e.
3540 /// operands not defined by recipes in the expression.
3541 SmallVector<VPValue *> LiveInPlaceholders;
3542
3543 enum class ExpressionTypes {
3544 /// Represents an inloop extended reduction operation, performing a
3545 /// reduction on an extended vector operand into a scalar value, and adding
3546 /// the result to a chain.
3547 ExtendedReduction,
3548 /// Represents an inloop extended reduction operation, which is negated,
3549 /// then reduced before adding the result to a chain.
3550 NegatedExtendedReduction,
3551 /// Represent an inloop multiply-accumulate reduction, multiplying the
3552 /// extended vector operands, performing a reduction.add on the result, and
3553 /// adding the scalar result to a chain.
3554 ExtMulAccReduction,
3555 /// Represent an inloop multiply-accumulate reduction, multiplying the
3556 /// vector operands, performing a reduction.add on the result, and adding
3557 /// the scalar result to a chain.
3558 MulAccReduction,
3559 /// Represent an inloop multiply-accumulate reduction, multiplying the
3560 /// extended vector operands, negating the multiplication, performing a
3561 /// reduction.add on the result, and adding the scalar result to a chain.
3562 ExtNegatedMulAccReduction,
3563 };
3564
3565 /// Type of the expression.
3566 ExpressionTypes ExpressionType;
3567
3568 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3569 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3570 /// in the expression) are replaced by temporary VPValues and the original
3571 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3572 /// as needed (excluding last) to ensure they are only used by other recipes
3573 /// in the expression.
3574 VPExpressionRecipe(ExpressionTypes ExpressionType,
3575 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3576
3577public:
3578 VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
3579 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3580 VPExpressionRecipe(VPWidenCastRecipe *Ext, VPWidenRecipe *Neg,
3581 VPReductionRecipe *Red)
3582 : VPExpressionRecipe(ExpressionTypes::NegatedExtendedReduction,
3583 {Ext, Neg, Red}) {
3584 assert((Red->getRecurrenceKind() == RecurKind::Add ||
3585 Red->getRecurrenceKind() == RecurKind::FAdd ||
3586 Red->getRecurrenceKind() == RecurKind::AddChainWithSubs) &&
3587 "Expected an add or add-chain-with-subs reduction");
3588 if (Neg->getOpcode() == Instruction::Sub) {
3589 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(Val: getOperand(N: 1));
3590 assert(SubConst && SubConst->isZero() && "Expected a negating sub");
3591 } else
3592 assert(Neg->getOpcode() == Instruction::FNeg && "Unexpected opcode");
3593 }
3594 VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
3595 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3596 VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
3597 VPWidenRecipe *Mul, VPReductionRecipe *Red)
3598 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3599 {Ext0, Ext1, Mul, Red}) {}
3600 VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
3601 VPWidenRecipe *Mul, VPWidenRecipe *Neg,
3602 VPReductionRecipe *Red)
3603 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3604 {Ext0, Ext1, Mul, Neg, Red}) {
3605 assert((Mul->getOpcode() == Instruction::Mul ||
3606 Mul->getOpcode() == Instruction::FMul) &&
3607 "Expected a mul");
3608 assert((Red->getRecurrenceKind() == RecurKind::Add ||
3609 Red->getRecurrenceKind() == RecurKind::FAdd ||
3610 Red->getRecurrenceKind() == RecurKind::AddChainWithSubs) &&
3611 "Expected an add or add-chain-with-subs reduction");
3612 assert(getNumOperands() >= 3 && "Expected at least three operands");
3613 if (Neg->getOpcode() == Instruction::Sub) {
3614 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(Val: getOperand(N: 2));
3615 assert(SubConst && SubConst->isZero() &&
3616 Neg->getOpcode() == Instruction::Sub && "Expected a negating sub");
3617 } else
3618 assert(Neg->getOpcode() == Instruction::FNeg && "Unexpected opcode");
3619 }
3620
3621 ~VPExpressionRecipe() override {
3622 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3623 for (auto *R : reverse(C&: ExpressionRecipes)) {
3624 if (ExpressionRecipesSeen.insert(Ptr: R).second)
3625 delete R;
3626 }
3627 for (VPValue *T : LiveInPlaceholders)
3628 delete T;
3629 }
3630
3631 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3632
3633 VPExpressionRecipe *clone() override {
3634 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3635 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3636 for (auto *R : ExpressionRecipes)
3637 NewExpressiondRecipes.push_back(Elt: R->clone());
3638 for (auto *New : NewExpressiondRecipes) {
3639 for (const auto &[Idx, Old] : enumerate(First&: ExpressionRecipes))
3640 New->replaceUsesOfWith(From: Old, To: NewExpressiondRecipes[Idx]);
3641 // Update placeholder operands in the cloned recipe to use the external
3642 // operands, to be internalized when the cloned expression is constructed.
3643 for (const auto &[Placeholder, OutsideOp] :
3644 zip(t&: LiveInPlaceholders, u: operands()))
3645 New->replaceUsesOfWith(From: Placeholder, To: OutsideOp);
3646 }
3647 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3648 }
3649
3650 /// Return the VPValue to use to infer the result type of the recipe.
3651 VPValue *getOperandOfResultType() const {
3652 unsigned OpIdx =
3653 cast<VPReductionRecipe>(Val: ExpressionRecipes.back())->isConditional() ? 2
3654 : 1;
3655 return getOperand(N: getNumOperands() - OpIdx);
3656 }
3657
3658 /// Insert the recipes of the expression back into the VPlan, directly before
3659 /// the current recipe. Leaves the expression recipe empty, which must be
3660 /// removed before codegen.
3661 void decompose();
3662
3663 unsigned getVFScaleFactor() const {
3664 auto *PR = dyn_cast<VPReductionRecipe>(Val: ExpressionRecipes.back());
3665 return PR ? PR->getVFScaleFactor() : 1;
3666 }
3667
3668 /// Method for generating code, must not be called as this recipe is abstract.
3669 void execute(VPTransformState &State) override {
3670 llvm_unreachable("recipe must be removed before execute");
3671 }
3672
3673 InstructionCost computeCost(ElementCount VF,
3674 VPCostContext &Ctx) const override;
3675
3676 /// Returns true if this expression contains recipes that may read from or
3677 /// write to memory.
3678 bool mayReadOrWriteMemory() const;
3679
3680 /// Returns true if this expression contains recipes that may have side
3681 /// effects.
3682 bool mayHaveSideEffects() const;
3683
3684 /// Returns true if this VPExpressionRecipe produces a single scalar.
3685 bool isVectorToScalar() const;
3686
3687protected:
3688#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3689 /// Print the recipe.
3690 void printRecipe(raw_ostream &O, const Twine &Indent,
3691 VPSlotTracker &SlotTracker) const override;
3692#endif
3693};
3694
3695/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3696/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3697/// order to merge values that are set under such a branch and feed their uses.
3698/// The phi nodes can be scalar or vector depending on the users of the value.
3699/// This recipe works in concert with VPBranchOnMaskRecipe.
3700class LLVM_ABI_FOR_TEST VPPredInstPHIRecipe : public VPSingleDefRecipe {
3701public:
3702 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3703 /// nodes after merging back from a Branch-on-Mask.
3704 VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
3705 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV,
3706 PredV->getScalarType(), /*UV=*/nullptr, DL) {}
3707 ~VPPredInstPHIRecipe() override = default;
3708
3709 VPPredInstPHIRecipe *clone() override {
3710 return new VPPredInstPHIRecipe(getOperand(N: 0), getDebugLoc());
3711 }
3712
3713 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3714
3715 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3716 /// retain SSA form.
3717 void execute(VPTransformState &State) override;
3718
3719 /// Return the cost of this VPPredInstPHIRecipe.
3720 InstructionCost computeCost(ElementCount VF,
3721 VPCostContext &Ctx) const override {
3722 // TODO: Compute accurate cost after retiring the legacy cost model.
3723 return 0;
3724 }
3725
3726protected:
3727#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3728 /// Print the recipe.
3729 void printRecipe(raw_ostream &O, const Twine &Indent,
3730 VPSlotTracker &SlotTracker) const override;
3731#endif
3732};
3733
3734/// A common mixin class for widening memory operations. An optional mask can be
3735/// provided as the last operand.
3736class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPIRMetadata {
3737protected:
3738 Instruction &Ingredient;
3739
3740 /// Alignment information for this memory access.
3741 Align Alignment;
3742
3743 /// Whether the accessed addresses are consecutive.
3744 bool Consecutive;
3745
3746 /// Whether the memory access is masked.
3747 bool IsMasked = false;
3748
3749 void setMask(VPValue *Mask) {
3750 assert(!IsMasked && "cannot re-set mask");
3751 if (!Mask)
3752 return;
3753 assert(Mask->getScalarType()->isIntegerTy(1) &&
3754 "Mask must be an i1 (vector)");
3755 getAsRecipe()->addOperand(Operand: Mask);
3756 IsMasked = true;
3757 }
3758
3759 VPWidenMemoryRecipe(Instruction &I, bool Consecutive,
3760 const VPIRMetadata &Metadata)
3761 : VPIRMetadata(Metadata), Ingredient(I),
3762 Alignment(getLoadStoreAlignment(I: &I)), Consecutive(Consecutive) {}
3763
3764public:
3765 virtual ~VPWidenMemoryRecipe() = default;
3766
3767 /// Return a VPRecipeBase* to the current object.
3768 virtual VPRecipeBase *getAsRecipe() = 0;
3769 virtual const VPRecipeBase *getAsRecipe() const = 0;
3770
3771 /// Return whether the loaded-from / stored-to addresses are consecutive.
3772 bool isConsecutive() const { return Consecutive; }
3773
3774 /// Return the address accessed by this recipe.
3775 VPValue *getAddr() const { return getAsRecipe()->getOperand(N: 0); }
3776
3777 /// Returns true if the recipe is masked.
3778 bool isMasked() const { return IsMasked; }
3779
3780 /// Return the mask used by this recipe. Note that a full mask is represented
3781 /// by a nullptr.
3782 VPValue *getMask() const {
3783 // Mask is optional and therefore the last operand.
3784 const VPRecipeBase *R = getAsRecipe();
3785 return isMasked() ? R->getOperand(N: R->getNumOperands() - 1) : nullptr;
3786 }
3787
3788 /// Returns the alignment of the memory access.
3789 Align getAlign() const { return Alignment; }
3790
3791 /// Return the cost of this VPWidenMemoryRecipe.
3792 InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
3793
3794 Instruction &getIngredient() const { return Ingredient; }
3795};
3796
3797/// A recipe for widening load operations, using the address to load from and an
3798/// optional mask.
3799struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPSingleDefRecipe,
3800 public VPWidenMemoryRecipe {
3801 VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
3802 bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
3803 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadSC, {Addr}, Load.getType(),
3804 &Load, DL),
3805 VPWidenMemoryRecipe(Load, Consecutive, Metadata) {
3806 setMask(Mask);
3807 }
3808
3809 VPWidenLoadRecipe *clone() override {
3810 return new VPWidenLoadRecipe(cast<LoadInst>(Val&: Ingredient), getAddr(),
3811 getMask(), Consecutive, *this, getDebugLoc());
3812 }
3813
3814 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3815
3816 /// Generate a wide load or gather.
3817 void execute(VPTransformState &State) override;
3818
3819 /// Return the cost of this VPWidenLoadRecipe.
3820 InstructionCost computeCost(ElementCount VF,
3821 VPCostContext &Ctx) const override {
3822 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3823 }
3824
3825 /// Returns true if the recipe only uses the first lane of operand \p Op.
3826 bool usesFirstLaneOnly(const VPValue *Op) const override {
3827 assert(is_contained(operands(), Op) &&
3828 "Op must be an operand of the recipe");
3829 // Widened, consecutive loads operations only demand the first lane of
3830 // their address.
3831 return Op == getAddr() && isConsecutive();
3832 }
3833
3834protected:
3835 VPRecipeBase *getAsRecipe() override;
3836 const VPRecipeBase *getAsRecipe() const override;
3837
3838#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3839 /// Print the recipe.
3840 void printRecipe(raw_ostream &O, const Twine &Indent,
3841 VPSlotTracker &SlotTracker) const override;
3842#endif
3843};
3844
3845/// A recipe for widening load operations with vector-predication intrinsics,
3846/// using the address to load from, the explicit vector length and an optional
3847/// mask.
3848struct LLVM_ABI_FOR_TEST VPWidenLoadEVLRecipe final
3849 : public VPSingleDefRecipe,
3850 public VPWidenMemoryRecipe {
3851 VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL,
3852 VPValue *Mask)
3853 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadEVLSC, {Addr, &EVL},
3854 L.getIngredient().getType(), &L.getIngredient(),
3855 L.getDebugLoc()),
3856 VPWidenMemoryRecipe(L.getIngredient(), L.isConsecutive(), L) {
3857 setMask(Mask);
3858 }
3859
3860 VPWidenLoadEVLRecipe *clone() override {
3861 llvm_unreachable("cloning not supported");
3862 }
3863
3864 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3865
3866 /// Return the EVL operand.
3867 VPValue *getEVL() const { return getOperand(N: 1); }
3868
3869 /// Generate the wide load or gather.
3870 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3871
3872 /// Return the cost of this VPWidenLoadEVLRecipe.
3873 LLVM_ABI_FOR_TEST InstructionCost
3874 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3875
3876 /// Returns true if the recipe only uses the first lane of operand \p Op.
3877 bool usesFirstLaneOnly(const VPValue *Op) const override {
3878 assert(is_contained(operands(), Op) &&
3879 "Op must be an operand of the recipe");
3880 // Widened loads only demand the first lane of EVL and consecutive loads
3881 // only demand the first lane of their address.
3882 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3883 }
3884
3885protected:
3886 LLVM_ABI_FOR_TEST VPRecipeBase *getAsRecipe() override;
3887 LLVM_ABI_FOR_TEST const VPRecipeBase *getAsRecipe() const override;
3888
3889#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3890 /// Print the recipe.
3891 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3892 VPSlotTracker &SlotTracker) const override;
3893#endif
3894};
3895
3896/// A recipe for widening store operations, using the stored value, the address
3897/// to store to and an optional mask.
3898struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPRecipeBase,
3899 public VPWidenMemoryRecipe {
3900 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3901 VPValue *Mask, bool Consecutive,
3902 const VPIRMetadata &Metadata, DebugLoc DL)
3903 : VPRecipeBase(VPRecipeBase::VPWidenStoreSC, {Addr, StoredVal}, DL),
3904 VPWidenMemoryRecipe(Store, Consecutive, Metadata) {
3905 setMask(Mask);
3906 }
3907
3908 VPWidenStoreRecipe *clone() override {
3909 return new VPWidenStoreRecipe(cast<StoreInst>(Val&: Ingredient), getAddr(),
3910 getStoredValue(), getMask(), Consecutive,
3911 *this, getDebugLoc());
3912 }
3913
3914 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3915
3916 /// Return the value stored by this recipe.
3917 VPValue *getStoredValue() const { return getOperand(N: 1); }
3918
3919 /// Generate a wide store or scatter.
3920 void execute(VPTransformState &State) override;
3921
3922 /// Return the cost of this VPWidenStoreRecipe.
3923 InstructionCost computeCost(ElementCount VF,
3924 VPCostContext &Ctx) const override {
3925 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3926 }
3927
3928 /// Returns true if the recipe only uses the first lane of operand \p Op.
3929 bool usesFirstLaneOnly(const VPValue *Op) const override {
3930 assert(is_contained(operands(), Op) &&
3931 "Op must be an operand of the recipe");
3932 // Widened, consecutive stores only demand the first lane of their address,
3933 // unless the same operand is also stored.
3934 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3935 }
3936
3937protected:
3938 VPRecipeBase *getAsRecipe() override;
3939 const VPRecipeBase *getAsRecipe() const override;
3940
3941#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3942 /// Print the recipe.
3943 void printRecipe(raw_ostream &O, const Twine &Indent,
3944 VPSlotTracker &SlotTracker) const override;
3945#endif
3946};
3947
3948/// A recipe for widening store operations with vector-predication intrinsics,
3949/// using the value to store, the address to store to, the explicit vector
3950/// length and an optional mask.
3951struct LLVM_ABI_FOR_TEST VPWidenStoreEVLRecipe final
3952 : public VPRecipeBase,
3953 public VPWidenMemoryRecipe {
3954 VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr,
3955 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3956 : VPRecipeBase(VPRecipeBase::VPWidenStoreEVLSC, {Addr, StoredVal, &EVL},
3957 S.getDebugLoc()),
3958 VPWidenMemoryRecipe(S.getIngredient(), S.isConsecutive(), S) {
3959 setMask(Mask);
3960 }
3961
3962 VPWidenStoreEVLRecipe *clone() override {
3963 llvm_unreachable("cloning not supported");
3964 }
3965
3966 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3967
3968 /// Return the address accessed by this recipe.
3969 VPValue *getStoredValue() const { return getOperand(N: 1); }
3970
3971 /// Return the EVL operand.
3972 VPValue *getEVL() const { return getOperand(N: 2); }
3973
3974 /// Generate the wide store or scatter.
3975 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3976
3977 /// Return the cost of this VPWidenStoreEVLRecipe.
3978 LLVM_ABI_FOR_TEST InstructionCost
3979 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3980
3981 /// Returns true if the recipe only uses the first lane of operand \p Op.
3982 bool usesFirstLaneOnly(const VPValue *Op) const override {
3983 assert(is_contained(operands(), Op) &&
3984 "Op must be an operand of the recipe");
3985 if (Op == getEVL()) {
3986 assert(getStoredValue() != Op && "unexpected store of EVL");
3987 return true;
3988 }
3989 // Widened, consecutive memory operations only demand the first lane of
3990 // their address, unless the same operand is also stored. That latter can
3991 // happen with opaque pointers.
3992 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3993 }
3994
3995protected:
3996 LLVM_ABI_FOR_TEST VPRecipeBase *getAsRecipe() override;
3997 LLVM_ABI_FOR_TEST const VPRecipeBase *getAsRecipe() const override;
3998
3999#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4000 /// Print the recipe.
4001 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
4002 VPSlotTracker &SlotTracker) const override;
4003#endif
4004};
4005
4006/// Recipe to expand a SCEV expression.
4007class VPExpandSCEVRecipe : public VPSingleDefRecipe {
4008 const SCEV *Expr;
4009
4010public:
4011 VPExpandSCEVRecipe(const SCEV *Expr);
4012
4013 ~VPExpandSCEVRecipe() override = default;
4014
4015 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
4016
4017 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
4018
4019 void execute(VPTransformState &State) override {
4020 llvm_unreachable("SCEV expressions must be expanded before final execute");
4021 }
4022
4023 /// Return the cost of this VPExpandSCEVRecipe.
4024 InstructionCost computeCost(ElementCount VF,
4025 VPCostContext &Ctx) const override {
4026 // TODO: Compute accurate cost after retiring the legacy cost model.
4027 return 0;
4028 }
4029
4030 const SCEV *getSCEV() const { return Expr; }
4031
4032protected:
4033#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4034 /// Print the recipe.
4035 void printRecipe(raw_ostream &O, const Twine &Indent,
4036 VPSlotTracker &SlotTracker) const override;
4037#endif
4038};
4039
4040/// A recipe for generating the active lane mask for the vector loop that is
4041/// used to predicate the vector operations.
4042class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
4043public:
4044 VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
4045 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
4046 StartMask, DL) {}
4047
4048 ~VPActiveLaneMaskPHIRecipe() override = default;
4049
4050 VPActiveLaneMaskPHIRecipe *clone() override {
4051 auto *R = new VPActiveLaneMaskPHIRecipe(getOperand(N: 0), getDebugLoc());
4052 if (getNumOperands() == 2)
4053 R->addBackedgeValue(V: getOperand(N: 1));
4054 return R;
4055 }
4056
4057 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
4058
4059 /// Generate the active lane mask phi of the vector loop.
4060 void execute(VPTransformState &State) override;
4061
4062protected:
4063#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4064 /// Print the recipe.
4065 void printRecipe(raw_ostream &O, const Twine &Indent,
4066 VPSlotTracker &SlotTracker) const override;
4067#endif
4068};
4069
4070/// A recipe for generating the phi node tracking the current scalar iteration
4071/// index. It starts at the start value of the canonical induction and gets
4072/// incremented by the number of scalar iterations processed by the vector loop
4073/// iteration. The increment does not have to be loop invariant.
4074class VPCurrentIterationPHIRecipe : public VPHeaderPHIRecipe {
4075public:
4076 VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
4077 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
4078 StartIV, DL) {}
4079
4080 ~VPCurrentIterationPHIRecipe() override = default;
4081
4082 VPCurrentIterationPHIRecipe *clone() override {
4083 llvm_unreachable("cloning not implemented yet");
4084 }
4085
4086 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
4087
4088 void execute(VPTransformState &State) override {
4089 llvm_unreachable("cannot execute this recipe, should be replaced by a "
4090 "scalar phi recipe");
4091 }
4092
4093 /// Return the cost of this VPCurrentIterationPHIRecipe.
4094 InstructionCost computeCost(ElementCount VF,
4095 VPCostContext &Ctx) const override {
4096 // For now, match the behavior of the legacy cost model.
4097 return 0;
4098 }
4099
4100 /// Returns true if the recipe only uses the first lane of operand \p Op.
4101 bool usesFirstLaneOnly(const VPValue *Op) const override {
4102 assert(is_contained(operands(), Op) &&
4103 "Op must be an operand of the recipe");
4104 return true;
4105 }
4106
4107protected:
4108#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4109 /// Print the recipe.
4110 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
4111 VPSlotTracker &SlotTracker) const override;
4112#endif
4113};
4114
4115/// A Recipe for widening the canonical induction variable of the vector loop.
4116/// First operand is the canonical IV recipe, a second step operand (VF * Part)
4117/// is added during unrolling.
4118class VPWidenCanonicalIVRecipe : public VPRecipeWithIRFlags {
4119public:
4120 VPWidenCanonicalIVRecipe(VPRegionValue *CanonicalIV,
4121 const VPIRFlags::WrapFlagsTy &Flags = {false, false})
4122 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCanonicalIVSC, CanonicalIV,
4123 CanonicalIV->getType(), Flags) {}
4124
4125 ~VPWidenCanonicalIVRecipe() override = default;
4126
4127 VPWidenCanonicalIVRecipe *clone() override {
4128 auto *WideCanIV =
4129 new VPWidenCanonicalIVRecipe(getCanonicalIV(), getNoWrapFlags());
4130 if (VPValue *Step = getStepValue())
4131 WideCanIV->addPerPartStep(Step);
4132 return WideCanIV;
4133 }
4134
4135 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
4136
4137 void execute(VPTransformState &State) override {
4138 llvm_unreachable("Expected prior expansion of WidenCanonicalIV recipes");
4139 }
4140
4141 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
4142 InstructionCost computeCost(ElementCount VF,
4143 VPCostContext &Ctx) const override {
4144 // TODO: Compute accurate cost after retiring the legacy cost model.
4145 return 0;
4146 }
4147
4148 /// Return the canonical IV being widened.
4149 VPRegionValue *getCanonicalIV() const {
4150 return cast<VPRegionValue>(Val: getOperand(N: 0));
4151 }
4152
4153 VPValue *getStepValue() const {
4154 return getNumOperands() == 2 ? getOperand(N: 1) : nullptr;
4155 }
4156
4157 /// Add the per-part step (VF * Part) used for unrolled parts.
4158 void addPerPartStep(VPValue *Step) {
4159 assert(Step->getScalarType() == getScalarType() &&
4160 "per-part step must have the same type as the canonical IV");
4161 VPUser::addOperand(Operand: Step);
4162 }
4163
4164protected:
4165#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4166 /// Print the recipe.
4167 void printRecipe(raw_ostream &O, const Twine &Indent,
4168 VPSlotTracker &SlotTracker) const override;
4169#endif
4170};
4171
4172/// A recipe for converting the input value \p IV value to the corresponding
4173/// value of an IV with different start and step values, using Start + IV *
4174/// Step.
4175class VPDerivedIVRecipe : public VPSingleDefRecipe {
4176 /// Kind of the induction.
4177 const InductionDescriptor::InductionKind Kind;
4178 /// If not nullptr, the floating point induction binary operator. Must be set
4179 /// for floating point inductions.
4180 const FPMathOperator *FPBinOp;
4181
4182public:
4183 VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start,
4184 VPValue *CanonicalIV, VPValue *Step)
4185 : VPDerivedIVRecipe(
4186 IndDesc.getKind(),
4187 dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp()),
4188 Start, CanonicalIV, Step) {}
4189
4190 VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
4191 const FPMathOperator *FPBinOp, VPIRValue *Start,
4192 VPValue *IV, VPValue *Step)
4193 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step},
4194 Start->getScalarType(), nullptr),
4195 Kind(Kind), FPBinOp(FPBinOp) {}
4196
4197 ~VPDerivedIVRecipe() override = default;
4198
4199 VPDerivedIVRecipe *clone() override {
4200 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(N: 1),
4201 getStepValue());
4202 }
4203
4204 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
4205
4206 void execute(VPTransformState &State) override {
4207 llvm_unreachable("Expected prior expansion of this recipe");
4208 }
4209
4210 /// Return the cost of this VPDerivedIVRecipe.
4211 InstructionCost computeCost(ElementCount VF,
4212 VPCostContext &Ctx) const override;
4213
4214 VPIRValue *getStartValue() const { return cast<VPIRValue>(Val: getOperand(N: 0)); }
4215 VPValue *getIndex() const { return getOperand(N: 1); }
4216 VPValue *getStepValue() const { return getOperand(N: 2); }
4217 const FPMathOperator *getFPBinOp() const { return FPBinOp; }
4218 InductionDescriptor::InductionKind getInductionKind() const { return Kind; }
4219
4220 /// Returns true if the recipe only uses the first lane of operand \p Op.
4221 bool usesFirstLaneOnly(const VPValue *Op) const override {
4222 assert(is_contained(operands(), Op) &&
4223 "Op must be an operand of the recipe");
4224 return true;
4225 }
4226
4227protected:
4228#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4229 /// Print the recipe.
4230 void printRecipe(raw_ostream &O, const Twine &Indent,
4231 VPSlotTracker &SlotTracker) const override;
4232#endif
4233};
4234
4235/// A recipe for handling phi nodes of integer and floating-point inductions,
4236/// producing their scalar values. Before unrolling by UF the recipe represents
4237/// the VF*UF scalar values to be produced, or UF scalar values if only first
4238/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
4239/// operand StartIndex to all unroll parts except part 0, as the recipe
4240/// represents the VF scalar values (this number of values is taken from
4241/// State.VF rather than from the VF operand) starting at IV + StartIndex.
4242class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
4243 Instruction::BinaryOps InductionOpcode;
4244
4245public:
4246 VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF,
4247 Instruction::BinaryOps Opcode, FastMathFlags FMFs,
4248 DebugLoc DL)
4249 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
4250 IV->getScalarType(), FMFs, DL),
4251 InductionOpcode(Opcode) {}
4252
4253 VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
4254 VPValue *Step, VPValue *VF,
4255 DebugLoc DL = DebugLoc::getUnknown())
4256 : VPScalarIVStepsRecipe(
4257 IV, Step, VF, IndDesc.getInductionOpcode(),
4258 dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp())
4259 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4260 : FastMathFlags(),
4261 DL) {}
4262
4263 ~VPScalarIVStepsRecipe() override = default;
4264
4265 VPScalarIVStepsRecipe *clone() override {
4266 auto *NewR = new VPScalarIVStepsRecipe(
4267 getOperand(N: 0), getOperand(N: 1), getOperand(N: 2), InductionOpcode,
4268 getFastMathFlagsOrNone(), getDebugLoc());
4269 if (VPValue *StartIndex = getStartIndex())
4270 NewR->setStartIndex(StartIndex);
4271 return NewR;
4272 }
4273
4274 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4275
4276 /// Generate the scalarized versions of the phi node as needed by their users.
4277 void execute(VPTransformState &State) override;
4278
4279 /// Return the cost of this VPScalarIVStepsRecipe.
4280 InstructionCost computeCost(ElementCount VF,
4281 VPCostContext &Ctx) const override;
4282
4283 VPValue *getStepValue() const { return getOperand(N: 1); }
4284
4285 /// Return the number of scalars to produce per unroll part, used to compute
4286 /// StartIndex during unrolling.
4287 VPValue *getVFValue() const { return getOperand(N: 2); }
4288
4289 /// Return the StartIndex, or null if known to be zero, valid only after
4290 /// unrolling.
4291 VPValue *getStartIndex() const {
4292 return getNumOperands() == 4 ? getOperand(N: 3) : nullptr;
4293 }
4294
4295 /// Set or add the StartIndex operand.
4296 void setStartIndex(VPValue *StartIndex) {
4297 if (getNumOperands() == 4)
4298 setOperand(I: 3, New: StartIndex);
4299 else
4300 addOperand(Operand: StartIndex);
4301 }
4302
4303 /// Returns true if the recipe only uses the first lane of operand \p Op.
4304 bool usesFirstLaneOnly(const VPValue *Op) const override {
4305 assert(is_contained(operands(), Op) &&
4306 "Op must be an operand of the recipe");
4307 return true;
4308 }
4309
4310 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4311
4312protected:
4313#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4314 /// Print the recipe.
4315 void printRecipe(raw_ostream &O, const Twine &Indent,
4316 VPSlotTracker &SlotTracker) const override;
4317#endif
4318};
4319
4320/// CastInfo helper for casting from VPRecipeBase to a mixin class that is not
4321/// part of the VPRecipeBase class hierarchy (e.g. VPPhiAccessors,
4322/// VPIRMetadata).
4323namespace vpdetail {
4324template <typename VPMixin, typename... RecipeTys>
4325struct CastInfoMixinImpl
4326 : public DefaultDoCastIfPossible<VPMixin *, VPRecipeBase *,
4327 CastInfoMixinImpl<VPMixin, RecipeTys...>> {
4328 static_assert((std::is_base_of_v<VPMixin, RecipeTys> && ...),
4329 "Each type in RecipeTys must derive from VPMixin");
4330
4331 /// Used by isa.
4332 static bool isPossible(VPRecipeBase *R) { return isa<RecipeTys...>(R); }
4333
4334 /// Used by cast.
4335 static VPMixin *doCast(VPRecipeBase *R) {
4336 VPMixin *Out = nullptr;
4337 ((Out = dyn_cast<RecipeTys>(R)) || ...);
4338 assert(Out && "Illegal recipe for cast");
4339 return Out;
4340 }
4341 static VPMixin *castFailed() { return nullptr; }
4342};
4343} // namespace vpdetail
4344
4345/// Support casting from VPRecipeBase -> VPPhiAccessors.
4346template <>
4347struct CastInfo<VPPhiAccessors, VPRecipeBase *>
4348 : vpdetail::CastInfoMixinImpl<VPPhiAccessors, VPPhi, VPIRPhi,
4349 VPWidenPHIRecipe, VPHeaderPHIRecipe> {};
4350
4351template <>
4352struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
4353 : public ConstStrippingForwardingCast<
4354 VPPhiAccessors, const VPRecipeBase *,
4355 CastInfo<VPPhiAccessors, VPRecipeBase *>> {};
4356template <>
4357struct CastInfo<VPPhiAccessors, VPRecipeBase>
4358 : public ForwardToPointerCast<VPPhiAccessors, VPRecipeBase *,
4359 CastInfo<VPPhiAccessors, VPRecipeBase *>> {};
4360
4361/// Support casting from VPRecipeBase / VPUser -> VPWidenMemoryRecipe.
4362template <>
4363struct CastInfo<VPWidenMemoryRecipe, VPRecipeBase *>
4364 : vpdetail::CastInfoMixinImpl<VPWidenMemoryRecipe, VPWidenLoadRecipe,
4365 VPWidenLoadEVLRecipe, VPWidenStoreRecipe,
4366 VPWidenStoreEVLRecipe> {};
4367template <>
4368struct CastInfo<VPWidenMemoryRecipe, const VPRecipeBase *>
4369 : public ConstStrippingForwardingCast<
4370 VPWidenMemoryRecipe, const VPRecipeBase *,
4371 CastInfo<VPWidenMemoryRecipe, VPRecipeBase *>> {};
4372
4373/// Support casting from VPRecipeBase -> VPIRMetadata.
4374template <>
4375struct CastInfo<VPIRMetadata, VPRecipeBase *>
4376 : vpdetail::CastInfoMixinImpl<
4377 VPIRMetadata, VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
4378 VPWidenIntrinsicRecipe, VPWidenCallRecipe, VPReplicateRecipe,
4379 VPInterleaveBase, VPWidenMemoryRecipe, VPHistogramRecipe> {};
4380
4381template <>
4382struct CastInfo<VPIRMetadata, const VPRecipeBase *>
4383 : public ConstStrippingForwardingCast<
4384 VPIRMetadata, const VPRecipeBase *,
4385 CastInfo<VPIRMetadata, VPRecipeBase *>> {};
4386template <>
4387struct CastInfo<VPIRMetadata, VPRecipeBase>
4388 : public ForwardToPointerCast<VPIRMetadata, VPRecipeBase *,
4389 CastInfo<VPIRMetadata, VPRecipeBase *>> {};
4390
4391/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4392/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4393/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4394class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4395 friend class VPlan;
4396
4397 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4398 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4399 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4400 if (Recipe)
4401 appendRecipe(Recipe);
4402 }
4403
4404public:
4405 using RecipeListTy = iplist<VPRecipeBase>;
4406
4407protected:
4408 /// The VPRecipes held in the order of output instructions to generate.
4409 RecipeListTy Recipes;
4410
4411 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4412 : VPBlockBase(BlockSC, Name.str()) {}
4413
4414public:
4415 ~VPBasicBlock() override {
4416 while (!Recipes.empty())
4417 Recipes.pop_back();
4418 }
4419
4420 /// Instruction iterators...
4421 using iterator = RecipeListTy::iterator;
4422 using const_iterator = RecipeListTy::const_iterator;
4423 using reverse_iterator = RecipeListTy::reverse_iterator;
4424 using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
4425
4426 //===--------------------------------------------------------------------===//
4427 /// Recipe iterator methods
4428 ///
4429 inline iterator begin() { return Recipes.begin(); }
4430 inline const_iterator begin() const { return Recipes.begin(); }
4431 inline iterator end() { return Recipes.end(); }
4432 inline const_iterator end() const { return Recipes.end(); }
4433
4434 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4435 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4436 inline reverse_iterator rend() { return Recipes.rend(); }
4437 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4438
4439 inline size_t size() const { return Recipes.size(); }
4440 inline bool empty() const { return Recipes.empty(); }
4441 inline const VPRecipeBase &front() const { return Recipes.front(); }
4442 inline VPRecipeBase &front() { return Recipes.front(); }
4443 inline const VPRecipeBase &back() const { return Recipes.back(); }
4444 inline VPRecipeBase &back() { return Recipes.back(); }
4445
4446 /// Returns a reference to the list of recipes.
4447 RecipeListTy &getRecipeList() { return Recipes; }
4448
4449 /// Returns a pointer to a member of the recipe list.
4450 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4451 return &VPBasicBlock::Recipes;
4452 }
4453
4454 /// Method to support type inquiry through isa, cast, and dyn_cast.
4455 static inline bool classof(const VPBlockBase *V) {
4456 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4457 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4458 }
4459
4460 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4461 assert(Recipe && "No recipe to append.");
4462 assert(!Recipe->Parent && "Recipe already in VPlan");
4463 Recipe->Parent = this;
4464 Recipes.insert(where: InsertPt, New: Recipe);
4465 }
4466
4467 /// Augment the existing recipes of a VPBasicBlock with an additional
4468 /// \p Recipe as the last recipe.
4469 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, InsertPt: end()); }
4470
4471 /// The method which generates the output IR instructions that correspond to
4472 /// this VPBasicBlock, thereby "executing" the VPlan.
4473 void execute(VPTransformState *State) override;
4474
4475 /// Return the cost of this VPBasicBlock.
4476 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4477
4478 /// Return the position of the first non-phi node recipe in the block.
4479 iterator getFirstNonPhi();
4480
4481 /// Returns an iterator range over the PHI-like recipes in the block.
4482 iterator_range<iterator> phis() {
4483 return make_range(x: begin(), y: getFirstNonPhi());
4484 }
4485
4486 /// Split current block at \p SplitAt by inserting a new block between the
4487 /// current block and its successors and moving all recipes starting at
4488 /// SplitAt to the new block. Returns the new block.
4489 VPBasicBlock *splitAt(iterator SplitAt);
4490
4491 VPRegionBlock *getEnclosingLoopRegion();
4492 const VPRegionBlock *getEnclosingLoopRegion() const;
4493
4494#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4495 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4496 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4497 ///
4498 /// Note that the numbering is applied to the whole VPlan, so printing
4499 /// individual blocks is consistent with the whole VPlan printing.
4500 void print(raw_ostream &O, const Twine &Indent,
4501 VPSlotTracker &SlotTracker) const override;
4502 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4503#endif
4504
4505 /// If the block has multiple successors, return the branch recipe terminating
4506 /// the block. If there are no or only a single successor, return nullptr;
4507 VPRecipeBase *getTerminator();
4508 const VPRecipeBase *getTerminator() const;
4509
4510 /// Returns true if the block is exiting it's parent region.
4511 bool isExiting() const;
4512
4513 /// Clone the current block and it's recipes, without updating the operands of
4514 /// the cloned recipes.
4515 VPBasicBlock *clone() override;
4516
4517 /// Returns the predecessor block at index \p Idx with the predecessors as per
4518 /// the corresponding plain CFG. If the block is an entry block to a region,
4519 /// the first predecessor is the single predecessor of a region, and the
4520 /// second predecessor is the exiting block of the region.
4521 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4522
4523protected:
4524 /// Execute the recipes in the IR basic block \p BB.
4525 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4526
4527 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4528 /// generated for this VPBB.
4529 void connectToPredecessors(VPTransformState &State);
4530
4531private:
4532 /// Create an IR BasicBlock to hold the output instructions generated by this
4533 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4534 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4535};
4536
4537inline const VPBasicBlock *
4538VPPhiAccessors::getIncomingBlock(unsigned Idx) const {
4539 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4540}
4541
4542/// A special type of VPBasicBlock that wraps an existing IR basic block.
4543/// Recipes of the block get added before the first non-phi instruction in the
4544/// wrapped block.
4545/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4546/// preheader block.
4547class VPIRBasicBlock : public VPBasicBlock {
4548 friend class VPlan;
4549
4550 BasicBlock *IRBB;
4551
4552 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4553 VPIRBasicBlock(BasicBlock *IRBB)
4554 : VPBasicBlock(VPIRBasicBlockSC,
4555 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4556 IRBB(IRBB) {}
4557
4558public:
4559 ~VPIRBasicBlock() override = default;
4560
4561 static inline bool classof(const VPBlockBase *V) {
4562 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4563 }
4564
4565 /// The method which generates the output IR instructions that correspond to
4566 /// this VPBasicBlock, thereby "executing" the VPlan.
4567 void execute(VPTransformState *State) override;
4568
4569 VPIRBasicBlock *clone() override;
4570
4571 BasicBlock *getIRBasicBlock() const { return IRBB; }
4572};
4573
4574/// Track information about the canonical IV value of a region.
4575/// TODO: Have it also track the canonical IV increment, subject of NUW flag.
4576class VPCanonicalIVInfo {
4577 /// VPRegionValue for the canonical IV, whose allocation is managed by
4578 /// VPCanonicalIVInfo.
4579 std::unique_ptr<VPRegionValue> CanIV;
4580
4581 /// Whether the increment of the canonical IV may unsigned wrap or not.
4582 bool HasNUW = true;
4583
4584public:
4585 VPCanonicalIVInfo(Type *Ty, DebugLoc DL, VPRegionBlock *Region)
4586 : CanIV(std::make_unique<VPRegionValue>(args&: Ty, args&: DL, args&: Region)) {}
4587
4588 VPRegionValue *getRegionValue() { return CanIV.get(); }
4589 const VPRegionValue *getRegionValue() const { return CanIV.get(); }
4590
4591 bool hasNUW() const { return HasNUW; }
4592
4593 void clearNUW() { HasNUW = false; }
4594};
4595
4596/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4597/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4598/// A VPRegionBlock may indicate that its contents are to be replicated several
4599/// times. This is designed to support predicated scalarization, in which a
4600/// scalar if-then code structure needs to be generated VF * UF times. Having
4601/// this replication indicator helps to keep a single model for multiple
4602/// candidate VF's. The actual replication takes place only once the desired VF
4603/// and UF have been determined.
4604class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4605 friend class VPlan;
4606
4607 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4608 VPBlockBase *Entry;
4609
4610 /// Hold the Single Exiting block of the SESE region modelled by the
4611 /// VPRegionBlock.
4612 VPBlockBase *Exiting;
4613
4614 /// Holds the Canonical IV of the loop region along with additional
4615 /// information. If CanIVInfo is nullptr, the region is a replicating region.
4616 /// Loop regions retain their canonical IVs until they are dissolved, even if
4617 /// the canonical IV has no users.
4618 std::unique_ptr<VPCanonicalIVInfo> CanIVInfo;
4619
4620 /// Use VPlan::createLoopRegion() and VPlan::createReplicateRegion() to create
4621 /// VPRegionBlocks.
4622 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4623 const std::string &Name = "")
4624 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting) {
4625 if (Entry) {
4626 assert(!Entry->hasPredecessors() && "Entry block has predecessors.");
4627 assert(Exiting && "Must also pass Exiting if Entry is passed.");
4628 assert(!Exiting->hasSuccessors() && "Exit block has successors.");
4629 Entry->setParent(this);
4630 Exiting->setParent(this);
4631 }
4632 }
4633
4634 VPRegionBlock(Type *CanIVTy, DebugLoc DL, VPBlockBase *Entry,
4635 VPBlockBase *Exiting, const std::string &Name = "")
4636 : VPRegionBlock(Entry, Exiting, Name) {
4637 CanIVInfo = std::make_unique<VPCanonicalIVInfo>(args&: CanIVTy, args&: DL, args: this);
4638 }
4639
4640public:
4641 ~VPRegionBlock() override = default;
4642
4643 /// Method to support type inquiry through isa, cast, and dyn_cast.
4644 static inline bool classof(const VPBlockBase *V) {
4645 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4646 }
4647
4648 const VPBlockBase *getEntry() const { return Entry; }
4649 VPBlockBase *getEntry() { return Entry; }
4650
4651 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4652 /// EntryBlock must have no predecessors.
4653 void setEntry(VPBlockBase *EntryBlock) {
4654 assert(!EntryBlock->hasPredecessors() &&
4655 "Entry block cannot have predecessors.");
4656 Entry = EntryBlock;
4657 EntryBlock->setParent(this);
4658 }
4659
4660 const VPBlockBase *getExiting() const { return Exiting; }
4661 VPBlockBase *getExiting() { return Exiting; }
4662
4663 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4664 /// ExitingBlock must have no successors.
4665 void setExiting(VPBlockBase *ExitingBlock) {
4666 assert(!ExitingBlock->hasSuccessors() &&
4667 "Exit block cannot have successors.");
4668 Exiting = ExitingBlock;
4669 ExitingBlock->setParent(this);
4670 }
4671
4672 /// Returns the pre-header VPBasicBlock of the loop region.
4673 VPBasicBlock *getPreheaderVPBB() {
4674 assert(!isReplicator() && "should only get pre-header of loop regions");
4675 return getSinglePredecessor()->getExitingBasicBlock();
4676 }
4677
4678 /// An indicator whether this region is to generate multiple replicated
4679 /// instances of output IR corresponding to its VPBlockBases.
4680 bool isReplicator() const { return !CanIVInfo; }
4681
4682 /// The method which generates the output IR instructions that correspond to
4683 /// this VPRegionBlock, thereby "executing" the VPlan.
4684 void execute(VPTransformState *State) override;
4685
4686 // Return the cost of this region.
4687 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4688
4689#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4690 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4691 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4692 /// consequtive numbers.
4693 ///
4694 /// Note that the numbering is applied to the whole VPlan, so printing
4695 /// individual regions is consistent with the whole VPlan printing.
4696 void print(raw_ostream &O, const Twine &Indent,
4697 VPSlotTracker &SlotTracker) const override;
4698 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4699#endif
4700
4701 /// Clone all blocks in the single-entry single-exit region of the block and
4702 /// their recipes without updating the operands of the cloned recipes.
4703 VPRegionBlock *clone() override;
4704
4705 /// Remove the current region from its VPlan, connecting its predecessor to
4706 /// its entry, and its exiting block to its successor.
4707 void dissolveToCFGLoop();
4708
4709 /// Get the canonical IV increment instruction if it exists. Otherwise, create
4710 /// a new increment before the terminator and return it. The canonical IV
4711 /// increment is subject to DCE if unused, unlike the canonical IV itself.
4712 VPInstruction *getOrCreateCanonicalIVIncrement();
4713
4714 /// Return the canonical induction variable of the region, null for
4715 /// replicating regions.
4716 VPRegionValue *getCanonicalIV() {
4717 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4718 }
4719 const VPRegionValue *getCanonicalIV() const {
4720 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4721 }
4722
4723 /// Return the type of the canonical IV for loop regions.
4724 Type *getCanonicalIVType() const {
4725 return CanIVInfo->getRegionValue()->getType();
4726 }
4727
4728 /// Indicates if NUW is set for the canonical IV increment, for loop regions.
4729 bool hasCanonicalIVNUW() const { return CanIVInfo->hasNUW(); }
4730
4731 /// Unsets NUW for the canonical IV increment \p Increment, for loop regions.
4732 void clearCanonicalIVNUW(VPInstruction *Increment) {
4733 assert(Increment && "Must provide increment to clear");
4734 Increment->dropPoisonGeneratingFlags();
4735 CanIVInfo->clearNUW();
4736 }
4737};
4738
4739inline VPRegionBlock *VPRecipeBase::getRegion() {
4740 return getParent()->getParent();
4741}
4742
4743inline const VPRegionBlock *VPRecipeBase::getRegion() const {
4744 return getParent()->getParent();
4745}
4746
4747/// VPlan models a candidate for vectorization, encoding various decisions take
4748/// to produce efficient output IR, including which branches, basic-blocks and
4749/// output IR instructions to generate, and their cost. VPlan holds a
4750/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4751/// VPBasicBlock.
4752class VPlan {
4753 friend class VPlanPrinter;
4754 friend class VPSlotTracker;
4755
4756 /// VPBasicBlock corresponding to the original preheader. Used to place
4757 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4758 /// rest of VPlan execution.
4759 /// When this VPlan is used for the epilogue vector loop, the entry will be
4760 /// replaced by a new entry block created during skeleton creation.
4761 VPBasicBlock *Entry;
4762
4763 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4764 VPIRBasicBlock *ScalarHeader;
4765
4766 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4767 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4768 /// e.g. if the scalar epilogue always executes.
4769 SmallVector<VPIRBasicBlock *, 2> ExitBlocks;
4770
4771 /// Holds the VFs applicable to this VPlan.
4772 SmallSetVector<ElementCount, 2> VFs;
4773
4774 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4775 /// any UF.
4776 SmallSetVector<unsigned, 2> UFs;
4777
4778 /// Holds the name of the VPlan, for printing.
4779 std::string Name;
4780
4781 /// Represents the trip count of the original loop, for folding
4782 /// the tail.
4783 VPValue *TripCount = nullptr;
4784
4785 /// Represents the backedge taken count of the original loop, for folding
4786 /// the tail. It equals TripCount - 1.
4787 VPSymbolicValue *BackedgeTakenCount = nullptr;
4788
4789 /// Represents the vector trip count.
4790 VPSymbolicValue VectorTripCount;
4791
4792 /// Represents the vectorization factor of the loop.
4793 VPSymbolicValue VF;
4794
4795 /// Represents the unroll factor of the loop.
4796 VPSymbolicValue UF;
4797
4798 /// Represents the loop-invariant VF * UF of the vector loop region.
4799 VPSymbolicValue VFxUF;
4800
4801 /// Contains all the external definitions created for this VPlan, as a mapping
4802 /// from IR Values to VPIRValues.
4803 SmallMapVector<Value *, VPIRValue *, 16> LiveIns;
4804
4805 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4806 /// VPlan is destroyed.
4807 SmallVector<VPBlockBase *> CreatedBlocks;
4808
4809 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4810 /// wrapping the original header of the scalar loop. The vector loop will have
4811 /// index type \p IdxTy.
4812 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader, Type *IdxTy)
4813 : Entry(Entry), ScalarHeader(ScalarHeader), VectorTripCount(IdxTy),
4814 VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4815 Entry->setPlan(this);
4816 assert(ScalarHeader->getNumSuccessors() == 0 &&
4817 "scalar header must be a leaf node");
4818 }
4819
4820public:
4821 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4822 /// original preheader and scalar header of \p L, to be used as entry and
4823 /// scalar header blocks of the new VPlan. The vector loop will have index
4824 /// type \p IdxTy.
4825 VPlan(Loop *L, Type *IdxTy);
4826
4827 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4828 /// wrapping \p ScalarHeaderBB and vector loop index of type \p IdxTy.
4829 VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
4830 : VectorTripCount(IdxTy), VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4831 setEntry(createVPBasicBlock(Name: "preheader"));
4832 ScalarHeader = createVPIRBasicBlock(IRBB: ScalarHeaderBB);
4833 }
4834
4835 LLVM_ABI_FOR_TEST ~VPlan();
4836
4837 void setEntry(VPBasicBlock *VPBB) {
4838 Entry = VPBB;
4839 VPBB->setPlan(this);
4840 }
4841
4842 /// Generate the IR code for this VPlan.
4843 void execute(VPTransformState *State);
4844
4845 /// Return the cost of this plan.
4846 InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
4847
4848 VPBasicBlock *getEntry() { return Entry; }
4849 const VPBasicBlock *getEntry() const { return Entry; }
4850
4851 /// Returns the preheader of the vector loop region, if one exists, or null
4852 /// otherwise.
4853 VPBasicBlock *getVectorPreheader() const {
4854 const VPRegionBlock *VectorRegion = getVectorLoopRegion();
4855 return VectorRegion
4856 ? cast<VPBasicBlock>(Val: VectorRegion->getSinglePredecessor())
4857 : nullptr;
4858 }
4859
4860 /// Returns the VPRegionBlock of the vector loop.
4861 LLVM_ABI_FOR_TEST VPRegionBlock *getVectorLoopRegion();
4862 LLVM_ABI_FOR_TEST const VPRegionBlock *getVectorLoopRegion() const;
4863
4864 /// Returns true if this VPlan is for an outer loop, i.e., its vector
4865 /// loop region contains a nested loop region.
4866 LLVM_ABI_FOR_TEST bool isOuterLoop() const;
4867
4868 /// Returns the 'middle' block of the plan, that is the block that selects
4869 /// whether to execute the scalar tail loop or the exit block from the loop
4870 /// latch. If there is an early exit from the vector loop, the middle block
4871 /// conceptully has the early exit block as third successor, split accross 2
4872 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4873 /// tail loop or the exit block. If the scalar tail loop or exit block are
4874 /// known to always execute, the middle block may branch directly to that
4875 /// block. This function cannot be called once the vector loop region has been
4876 /// removed.
4877 VPBasicBlock *getMiddleBlock() {
4878 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4879 assert(
4880 LoopRegion &&
4881 "cannot call the function after vector loop region has been removed");
4882 // The middle block is always the last successor of the region.
4883 return cast<VPBasicBlock>(Val: LoopRegion->getSuccessors().back());
4884 }
4885
4886 const VPBasicBlock *getMiddleBlock() const {
4887 return const_cast<VPlan *>(this)->getMiddleBlock();
4888 }
4889
4890 /// Return the VPBasicBlock for the preheader of the scalar loop.
4891 VPBasicBlock *getScalarPreheader() const {
4892 return dyn_cast_or_null<VPBasicBlock>(
4893 Val: getScalarHeader()->getSinglePredecessor());
4894 }
4895
4896 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4897 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4898
4899 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4900 /// the original scalar loop.
4901 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4902
4903 /// Returns true if \p VPBB is an exit block.
4904 bool isExitBlock(VPBlockBase *VPBB);
4905
4906 /// The trip count of the original loop.
4907 VPValue *getTripCount() const {
4908 assert(TripCount && "trip count needs to be set before accessing it");
4909 return TripCount;
4910 }
4911
4912 /// Set the trip count assuming it is currently null; if it is not - use
4913 /// resetTripCount().
4914 void setTripCount(VPValue *NewTripCount) {
4915 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4916 TripCount = NewTripCount;
4917 }
4918
4919 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4920 /// the original trip count have been replaced.
4921 void resetTripCount(VPValue *NewTripCount) {
4922 assert(TripCount && NewTripCount && TripCount->user_empty() &&
4923 "TripCount must be set when resetting");
4924 TripCount = NewTripCount;
4925 }
4926
4927 /// The backedge taken count of the original loop.
4928 VPValue *getOrCreateBackedgeTakenCount() {
4929 // BTC shares the canonical IV type with VectorTripCount.
4930 if (!BackedgeTakenCount)
4931 BackedgeTakenCount = new VPSymbolicValue(VectorTripCount.getType());
4932 return BackedgeTakenCount;
4933 }
4934 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4935
4936 /// The vector trip count.
4937 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4938
4939 /// Returns the VF of the vector loop region.
4940 VPSymbolicValue &getVF() { return VF; };
4941 const VPSymbolicValue &getVF() const { return VF; };
4942
4943 /// Returns the UF of the vector loop region.
4944 VPSymbolicValue &getUF() { return UF; };
4945
4946 /// Returns VF * UF of the vector loop region.
4947 VPSymbolicValue &getVFxUF() { return VFxUF; }
4948
4949 LLVMContext &getContext() const {
4950 return getScalarHeader()->getIRBasicBlock()->getContext();
4951 }
4952
4953 const DataLayout &getDataLayout() const {
4954 return getScalarHeader()->getIRBasicBlock()->getDataLayout();
4955 }
4956
4957 void addVF(ElementCount VF) { VFs.insert(X: VF); }
4958
4959 void setVF(ElementCount VF) {
4960 assert(hasVF(VF) && "Cannot set VF not already in plan");
4961 VFs.clear();
4962 VFs.insert(X: VF);
4963 }
4964
4965 /// Remove \p VF from the plan.
4966 void removeVF(ElementCount VF) {
4967 assert(hasVF(VF) && "tried to remove VF not present in plan");
4968 VFs.remove(X: VF);
4969 }
4970
4971 bool hasVF(ElementCount VF) const { return VFs.count(key: VF); }
4972 bool hasScalableVF() const {
4973 return any_of(Range: VFs, P: [](ElementCount VF) { return VF.isScalable(); });
4974 }
4975
4976 /// Returns an iterator range over all VFs of the plan.
4977 iterator_range<SmallSetVector<ElementCount, 2>::iterator>
4978 vectorFactors() const {
4979 return VFs;
4980 }
4981
4982 /// Returns the single VF of the plan, asserting that the plan has exactly
4983 /// one VF.
4984 ElementCount getSingleVF() const {
4985 assert(VFs.size() == 1 && "expected plan with single VF");
4986 return VFs[0];
4987 }
4988
4989 bool hasScalarVFOnly() const {
4990 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4991 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4992 "Plan with scalar VF should only have a single VF");
4993 return HasScalarVFOnly;
4994 }
4995
4996 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(key: UF); }
4997
4998 /// Returns the concrete UF of the plan, after unrolling.
4999 unsigned getConcreteUF() const {
5000 assert(UFs.size() == 1 && "Expected a single UF");
5001 return UFs[0];
5002 }
5003
5004 void setUF(unsigned UF) {
5005 assert(hasUF(UF) && "Cannot set the UF not already in plan");
5006 UFs.clear();
5007 UFs.insert(X: UF);
5008 }
5009
5010 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
5011 /// concrete UF.
5012 bool isUnrolled() const { return UFs.size() == 1; }
5013
5014 /// Return a string with the name of the plan and the applicable VFs and UFs.
5015 std::string getName() const;
5016
5017 void setName(const Twine &newName) { Name = newName.str(); }
5018
5019 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
5020 /// yet) for \p V.
5021 VPIRValue *getOrAddLiveIn(Value *V) {
5022 assert(V && "Trying to get or add the VPIRValue of a null Value");
5023 auto [It, Inserted] = LiveIns.try_emplace(Key: V);
5024 if (Inserted) {
5025 if (auto *CI = dyn_cast<ConstantInt>(Val: V))
5026 It->second = new VPConstantInt(CI);
5027 else
5028 It->second = new VPIRValue(V);
5029 }
5030
5031 assert(isa<VPIRValue>(It->second) &&
5032 "Only VPIRValues should be in mapping");
5033 return It->second;
5034 }
5035 VPIRValue *getOrAddLiveIn(VPIRValue *V) {
5036 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
5037 return getOrAddLiveIn(V: V->getValue());
5038 }
5039
5040 /// Return a VPIRValue wrapping i1 true.
5041 VPIRValue *getTrue() { return getConstantInt(BitWidth: 1, Val: 1); }
5042
5043 /// Return a VPIRValue wrapping i1 false.
5044 VPIRValue *getFalse() { return getConstantInt(BitWidth: 1, Val: 0); }
5045
5046 /// Return a VPIRValue wrapping the null value of type \p Ty.
5047 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, Val: 0); }
5048
5049 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
5050 VPIRValue *getAllOnesValue(Type *Ty) {
5051 return getConstantInt(Val: APInt::getAllOnes(numBits: Ty->getIntegerBitWidth()));
5052 }
5053
5054 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
5055 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
5056 return getOrAddLiveIn(V: ConstantInt::get(Ty, V: Val, IsSigned));
5057 }
5058
5059 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
5060 /// value.
5061 VPIRValue *getConstantInt(unsigned BitWidth, uint64_t Val,
5062 bool IsSigned = false) {
5063 return getConstantInt(Val: APInt(BitWidth, Val, IsSigned));
5064 }
5065
5066 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
5067 VPIRValue *getConstantInt(const APInt &Val) {
5068 return getOrAddLiveIn(V: ConstantInt::get(Context&: getContext(), V: Val));
5069 }
5070
5071 /// Return a VPIRValue wrapping a poison value of type \p Ty.
5072 VPIRValue *getPoison(Type *Ty) {
5073 return getOrAddLiveIn(V: PoisonValue::get(T: Ty));
5074 }
5075
5076 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
5077 /// otherwise.
5078 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(Key: V); }
5079
5080 /// Return the list of live-in VPValues available in the VPlan.
5081 auto getLiveIns() const { return LiveIns.values(); }
5082
5083#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5084 /// Print the live-ins of this VPlan to \p O.
5085 void printLiveIns(raw_ostream &O) const;
5086
5087 /// Print this VPlan to \p O.
5088 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
5089
5090 /// Print this VPlan in DOT format to \p O.
5091 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
5092
5093 /// Dump the plan to stderr (for debugging).
5094 LLVM_DUMP_METHOD void dump() const;
5095#endif
5096
5097 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
5098 /// recipes to refer to the clones, and return it.
5099 LLVM_ABI_FOR_TEST VPlan *duplicate();
5100
5101 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
5102 /// present. The returned block is owned by the VPlan and deleted once the
5103 /// VPlan is destroyed.
5104 VPBasicBlock *createVPBasicBlock(const Twine &Name,
5105 VPRecipeBase *Recipe = nullptr) {
5106 auto *VPB = new VPBasicBlock(Name, Recipe);
5107 CreatedBlocks.push_back(Elt: VPB);
5108 return VPB;
5109 }
5110
5111 /// Create a new loop region with a canonical IV using \p CanIVTy and
5112 /// \p DL. Use \p Name as the region's name and set entry and exiting blocks
5113 /// to \p Entry and \p Exiting respectively, if provided. The returned block
5114 /// is owned by the VPlan and deleted once the VPlan is destroyed.
5115 VPRegionBlock *createLoopRegion(Type *CanIVTy, DebugLoc DL,
5116 const std::string &Name = "",
5117 VPBlockBase *Entry = nullptr,
5118 VPBlockBase *Exiting = nullptr) {
5119 auto *VPB = new VPRegionBlock(CanIVTy, DL, Entry, Exiting, Name);
5120 CreatedBlocks.push_back(Elt: VPB);
5121 return VPB;
5122 }
5123
5124 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
5125 /// returned block is owned by the VPlan and deleted once the VPlan is
5126 /// destroyed.
5127 VPRegionBlock *createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting,
5128 const std::string &Name = "") {
5129 auto *VPB = new VPRegionBlock(Entry, Exiting, Name);
5130 CreatedBlocks.push_back(Elt: VPB);
5131 return VPB;
5132 }
5133
5134 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
5135 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
5136 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
5137 VPIRBasicBlock *createEmptyVPIRBasicBlock(BasicBlock *IRBB);
5138
5139 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
5140 /// instructions in \p IRBB, except its terminator which is managed by the
5141 /// successors of the block in VPlan. The returned block is owned by the VPlan
5142 /// and deleted once the VPlan is destroyed.
5143 LLVM_ABI_FOR_TEST VPIRBasicBlock *createVPIRBasicBlock(BasicBlock *IRBB);
5144
5145 /// Returns true if the VPlan is based on a loop with an early exit. That is
5146 /// the case if the VPlan has either more than one exit block or a single exit
5147 /// block with multiple predecessors (one for the exit via the latch and one
5148 /// via the other early exit).
5149 bool hasEarlyExit() const {
5150 return count_if(Range: ExitBlocks,
5151 P: [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
5152 1 ||
5153 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
5154 }
5155
5156 /// Returns true if the scalar tail may execute after the vector loop, i.e.
5157 /// if the middle block is a predecessor of the scalar preheader. Note that
5158 /// this relies on unneeded branches to the scalar tail loop being removed.
5159 bool hasScalarTail() const {
5160 auto *ScalarPH = getScalarPreheader();
5161 return ScalarPH &&
5162 is_contained(Range&: ScalarPH->getPredecessors(), Element: getMiddleBlock());
5163 }
5164
5165 /// The type of the canonical induction variable of the vector loop.
5166 Type *getIndexType() const { return VF.getType(); }
5167};
5168
5169#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5170inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
5171 Plan.print(OS);
5172 return OS;
5173}
5174#endif
5175
5176} // end namespace llvm
5177
5178#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
5179