1 | //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This file contains the declarations of the Vectorization Plan base classes: |
11 | /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual |
12 | /// VPBlockBase, together implementing a Hierarchical CFG; |
13 | /// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained |
14 | /// within VPBasicBlocks; |
15 | /// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that |
16 | /// also inherit from VPValue. |
17 | /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned |
18 | /// instruction; |
19 | /// 5. The VPlan class holding a candidate for vectorization; |
20 | /// These are documented in docs/VectorizationPlan.rst. |
21 | // |
22 | //===----------------------------------------------------------------------===// |
23 | |
24 | #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H |
25 | #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H |
26 | |
27 | #include "VPlanAnalysis.h" |
28 | #include "VPlanValue.h" |
29 | #include "llvm/ADT/DenseMap.h" |
30 | #include "llvm/ADT/SmallBitVector.h" |
31 | #include "llvm/ADT/SmallPtrSet.h" |
32 | #include "llvm/ADT/SmallVector.h" |
33 | #include "llvm/ADT/Twine.h" |
34 | #include "llvm/ADT/ilist.h" |
35 | #include "llvm/ADT/ilist_node.h" |
36 | #include "llvm/Analysis/IVDescriptors.h" |
37 | #include "llvm/Analysis/VectorUtils.h" |
38 | #include "llvm/IR/DebugLoc.h" |
39 | #include "llvm/IR/FMF.h" |
40 | #include "llvm/IR/Operator.h" |
41 | #include "llvm/Support/InstructionCost.h" |
42 | #include <algorithm> |
43 | #include <cassert> |
44 | #include <cstddef> |
45 | #include <string> |
46 | |
47 | namespace llvm { |
48 | |
49 | class BasicBlock; |
50 | class DominatorTree; |
51 | class InnerLoopVectorizer; |
52 | class IRBuilderBase; |
53 | struct VPTransformState; |
54 | class raw_ostream; |
55 | class RecurrenceDescriptor; |
56 | class SCEV; |
57 | class Type; |
58 | class VPBasicBlock; |
59 | class VPBuilder; |
60 | class VPDominatorTree; |
61 | class VPRegionBlock; |
62 | class VPlan; |
63 | class VPLane; |
64 | class VPReplicateRecipe; |
65 | class VPlanSlp; |
66 | class Value; |
67 | class LoopVectorizationCostModel; |
68 | class LoopVersioning; |
69 | |
70 | struct VPCostContext; |
71 | |
72 | namespace Intrinsic { |
73 | typedef unsigned ID; |
74 | } |
75 | |
76 | using VPlanPtr = std::unique_ptr<VPlan>; |
77 | |
78 | /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. |
79 | /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock. |
80 | class VPBlockBase { |
81 | friend class VPBlockUtils; |
82 | |
83 | const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). |
84 | |
85 | /// An optional name for the block. |
86 | std::string Name; |
87 | |
88 | /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if |
89 | /// it is a topmost VPBlockBase. |
90 | VPRegionBlock *Parent = nullptr; |
91 | |
92 | /// List of predecessor blocks. |
93 | SmallVector<VPBlockBase *, 1> Predecessors; |
94 | |
95 | /// List of successor blocks. |
96 | SmallVector<VPBlockBase *, 1> Successors; |
97 | |
98 | /// VPlan containing the block. Can only be set on the entry block of the |
99 | /// plan. |
100 | VPlan *Plan = nullptr; |
101 | |
102 | /// Add \p Successor as the last successor to this block. |
103 | void appendSuccessor(VPBlockBase *Successor) { |
104 | assert(Successor && "Cannot add nullptr successor!" ); |
105 | Successors.push_back(Elt: Successor); |
106 | } |
107 | |
108 | /// Add \p Predecessor as the last predecessor to this block. |
109 | void appendPredecessor(VPBlockBase *Predecessor) { |
110 | assert(Predecessor && "Cannot add nullptr predecessor!" ); |
111 | Predecessors.push_back(Elt: Predecessor); |
112 | } |
113 | |
114 | /// Remove \p Predecessor from the predecessors of this block. |
115 | void removePredecessor(VPBlockBase *Predecessor) { |
116 | auto Pos = find(Range&: Predecessors, Val: Predecessor); |
117 | assert(Pos && "Predecessor does not exist" ); |
118 | Predecessors.erase(CI: Pos); |
119 | } |
120 | |
121 | /// Remove \p Successor from the successors of this block. |
122 | void removeSuccessor(VPBlockBase *Successor) { |
123 | auto Pos = find(Range&: Successors, Val: Successor); |
124 | assert(Pos && "Successor does not exist" ); |
125 | Successors.erase(CI: Pos); |
126 | } |
127 | |
128 | /// This function replaces one predecessor with another, useful when |
129 | /// trying to replace an old block in the CFG with a new one. |
130 | void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) { |
131 | auto I = find(Range&: Predecessors, Val: Old); |
132 | assert(I != Predecessors.end()); |
133 | assert(Old->getParent() == New->getParent() && |
134 | "replaced predecessor must have the same parent" ); |
135 | *I = New; |
136 | } |
137 | |
138 | /// This function replaces one successor with another, useful when |
139 | /// trying to replace an old block in the CFG with a new one. |
140 | void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) { |
141 | auto I = find(Range&: Successors, Val: Old); |
142 | assert(I != Successors.end()); |
143 | assert(Old->getParent() == New->getParent() && |
144 | "replaced successor must have the same parent" ); |
145 | *I = New; |
146 | } |
147 | |
148 | protected: |
149 | VPBlockBase(const unsigned char SC, const std::string &N) |
150 | : SubclassID(SC), Name(N) {} |
151 | |
152 | public: |
153 | /// An enumeration for keeping track of the concrete subclass of VPBlockBase |
154 | /// that are actually instantiated. Values of this enumeration are kept in the |
155 | /// SubclassID field of the VPBlockBase objects. They are used for concrete |
156 | /// type identification. |
157 | using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC }; |
158 | |
159 | using VPBlocksTy = SmallVectorImpl<VPBlockBase *>; |
160 | |
161 | virtual ~VPBlockBase() = default; |
162 | |
163 | const std::string &getName() const { return Name; } |
164 | |
165 | void setName(const Twine &newName) { Name = newName.str(); } |
166 | |
167 | /// \return an ID for the concrete type of this object. |
168 | /// This is used to implement the classof checks. This should not be used |
169 | /// for any other purpose, as the values may change as LLVM evolves. |
170 | unsigned getVPBlockID() const { return SubclassID; } |
171 | |
172 | VPRegionBlock *getParent() { return Parent; } |
173 | const VPRegionBlock *getParent() const { return Parent; } |
174 | |
175 | /// \return A pointer to the plan containing the current block. |
176 | VPlan *getPlan(); |
177 | const VPlan *getPlan() const; |
178 | |
179 | /// Sets the pointer of the plan containing the block. The block must be the |
180 | /// entry block into the VPlan. |
181 | void setPlan(VPlan *ParentPlan); |
182 | |
183 | void setParent(VPRegionBlock *P) { Parent = P; } |
184 | |
185 | /// \return the VPBasicBlock that is the entry of this VPBlockBase, |
186 | /// recursively, if the latter is a VPRegionBlock. Otherwise, if this |
187 | /// VPBlockBase is a VPBasicBlock, it is returned. |
188 | const VPBasicBlock *getEntryBasicBlock() const; |
189 | VPBasicBlock *getEntryBasicBlock(); |
190 | |
191 | /// \return the VPBasicBlock that is the exiting this VPBlockBase, |
192 | /// recursively, if the latter is a VPRegionBlock. Otherwise, if this |
193 | /// VPBlockBase is a VPBasicBlock, it is returned. |
194 | const VPBasicBlock *getExitingBasicBlock() const; |
195 | VPBasicBlock *getExitingBasicBlock(); |
196 | |
197 | const VPBlocksTy &getSuccessors() const { return Successors; } |
198 | VPBlocksTy &getSuccessors() { return Successors; } |
199 | |
200 | iterator_range<VPBlockBase **> successors() { return Successors; } |
201 | iterator_range<VPBlockBase **> predecessors() { return Predecessors; } |
202 | |
203 | const VPBlocksTy &getPredecessors() const { return Predecessors; } |
204 | VPBlocksTy &getPredecessors() { return Predecessors; } |
205 | |
206 | /// \return the successor of this VPBlockBase if it has a single successor. |
207 | /// Otherwise return a null pointer. |
208 | VPBlockBase *getSingleSuccessor() const { |
209 | return (Successors.size() == 1 ? *Successors.begin() : nullptr); |
210 | } |
211 | |
212 | /// \return the predecessor of this VPBlockBase if it has a single |
213 | /// predecessor. Otherwise return a null pointer. |
214 | VPBlockBase *getSinglePredecessor() const { |
215 | return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr); |
216 | } |
217 | |
218 | size_t getNumSuccessors() const { return Successors.size(); } |
219 | size_t getNumPredecessors() const { return Predecessors.size(); } |
220 | |
221 | /// An Enclosing Block of a block B is any block containing B, including B |
222 | /// itself. \return the closest enclosing block starting from "this", which |
223 | /// has successors. \return the root enclosing block if all enclosing blocks |
224 | /// have no successors. |
225 | VPBlockBase *getEnclosingBlockWithSuccessors(); |
226 | |
227 | /// \return the closest enclosing block starting from "this", which has |
228 | /// predecessors. \return the root enclosing block if all enclosing blocks |
229 | /// have no predecessors. |
230 | VPBlockBase *getEnclosingBlockWithPredecessors(); |
231 | |
232 | /// \return the successors either attached directly to this VPBlockBase or, if |
233 | /// this VPBlockBase is the exit block of a VPRegionBlock and has no |
234 | /// successors of its own, search recursively for the first enclosing |
235 | /// VPRegionBlock that has successors and return them. If no such |
236 | /// VPRegionBlock exists, return the (empty) successors of the topmost |
237 | /// VPBlockBase reached. |
238 | const VPBlocksTy &getHierarchicalSuccessors() { |
239 | return getEnclosingBlockWithSuccessors()->getSuccessors(); |
240 | } |
241 | |
242 | /// \return the hierarchical successor of this VPBlockBase if it has a single |
243 | /// hierarchical successor. Otherwise return a null pointer. |
244 | VPBlockBase *getSingleHierarchicalSuccessor() { |
245 | return getEnclosingBlockWithSuccessors()->getSingleSuccessor(); |
246 | } |
247 | |
248 | /// \return the predecessors either attached directly to this VPBlockBase or, |
249 | /// if this VPBlockBase is the entry block of a VPRegionBlock and has no |
250 | /// predecessors of its own, search recursively for the first enclosing |
251 | /// VPRegionBlock that has predecessors and return them. If no such |
252 | /// VPRegionBlock exists, return the (empty) predecessors of the topmost |
253 | /// VPBlockBase reached. |
254 | const VPBlocksTy &getHierarchicalPredecessors() { |
255 | return getEnclosingBlockWithPredecessors()->getPredecessors(); |
256 | } |
257 | |
258 | /// \return the hierarchical predecessor of this VPBlockBase if it has a |
259 | /// single hierarchical predecessor. Otherwise return a null pointer. |
260 | VPBlockBase *getSingleHierarchicalPredecessor() { |
261 | return getEnclosingBlockWithPredecessors()->getSinglePredecessor(); |
262 | } |
263 | |
264 | /// Set a given VPBlockBase \p Successor as the single successor of this |
265 | /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor. |
266 | /// This VPBlockBase must have no successors. |
267 | void setOneSuccessor(VPBlockBase *Successor) { |
268 | assert(Successors.empty() && "Setting one successor when others exist." ); |
269 | assert(Successor->getParent() == getParent() && |
270 | "connected blocks must have the same parent" ); |
271 | appendSuccessor(Successor); |
272 | } |
273 | |
274 | /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two |
275 | /// successors of this VPBlockBase. This VPBlockBase is not added as |
276 | /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no |
277 | /// successors. |
278 | void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) { |
279 | assert(Successors.empty() && "Setting two successors when others exist." ); |
280 | appendSuccessor(Successor: IfTrue); |
281 | appendSuccessor(Successor: IfFalse); |
282 | } |
283 | |
284 | /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase. |
285 | /// This VPBlockBase must have no predecessors. This VPBlockBase is not added |
286 | /// as successor of any VPBasicBlock in \p NewPreds. |
287 | void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) { |
288 | assert(Predecessors.empty() && "Block predecessors already set." ); |
289 | for (auto *Pred : NewPreds) |
290 | appendPredecessor(Predecessor: Pred); |
291 | } |
292 | |
293 | /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase. |
294 | /// This VPBlockBase must have no successors. This VPBlockBase is not added |
295 | /// as predecessor of any VPBasicBlock in \p NewSuccs. |
296 | void setSuccessors(ArrayRef<VPBlockBase *> NewSuccs) { |
297 | assert(Successors.empty() && "Block successors already set." ); |
298 | for (auto *Succ : NewSuccs) |
299 | appendSuccessor(Successor: Succ); |
300 | } |
301 | |
302 | /// Remove all the predecessor of this block. |
303 | void clearPredecessors() { Predecessors.clear(); } |
304 | |
305 | /// Remove all the successors of this block. |
306 | void clearSuccessors() { Successors.clear(); } |
307 | |
308 | /// Swap predecessors of the block. The block must have exactly 2 |
309 | /// predecessors. |
310 | void swapPredecessors() { |
311 | assert(Predecessors.size() == 2 && "must have 2 predecessors to swap" ); |
312 | std::swap(a&: Predecessors[0], b&: Predecessors[1]); |
313 | } |
314 | |
315 | /// Swap successors of the block. The block must have exactly 2 successors. |
316 | // TODO: This should be part of introducing conditional branch recipes rather |
317 | // than being independent. |
318 | void swapSuccessors() { |
319 | assert(Successors.size() == 2 && "must have 2 successors to swap" ); |
320 | std::swap(a&: Successors[0], b&: Successors[1]); |
321 | } |
322 | |
323 | /// Returns the index for \p Pred in the blocks predecessors list. |
324 | unsigned getIndexForPredecessor(const VPBlockBase *Pred) const { |
325 | assert(count(Predecessors, Pred) == 1 && |
326 | "must have Pred exactly once in Predecessors" ); |
327 | return std::distance(first: Predecessors.begin(), last: find(Range: Predecessors, Val: Pred)); |
328 | } |
329 | |
330 | /// Returns the index for \p Succ in the blocks successor list. |
331 | unsigned getIndexForSuccessor(const VPBlockBase *Succ) const { |
332 | assert(count(Successors, Succ) == 1 && |
333 | "must have Succ exactly once in Successors" ); |
334 | return std::distance(first: Successors.begin(), last: find(Range: Successors, Val: Succ)); |
335 | } |
336 | |
337 | /// The method which generates the output IR that correspond to this |
338 | /// VPBlockBase, thereby "executing" the VPlan. |
339 | virtual void execute(VPTransformState *State) = 0; |
340 | |
341 | /// Return the cost of the block. |
342 | virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0; |
343 | |
344 | /// Return true if it is legal to hoist instructions into this block. |
345 | bool isLegalToHoistInto() { |
346 | // There are currently no constraints that prevent an instruction to be |
347 | // hoisted into a VPBlockBase. |
348 | return true; |
349 | } |
350 | |
351 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
352 | void printAsOperand(raw_ostream &OS, bool PrintType = false) const { |
353 | OS << getName(); |
354 | } |
355 | |
356 | /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines |
357 | /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using |
358 | /// consequtive numbers. |
359 | /// |
360 | /// Note that the numbering is applied to the whole VPlan, so printing |
361 | /// individual blocks is consistent with the whole VPlan printing. |
362 | virtual void print(raw_ostream &O, const Twine &Indent, |
363 | VPSlotTracker &SlotTracker) const = 0; |
364 | |
365 | /// Print plain-text dump of this VPlan to \p O. |
366 | void print(raw_ostream &O) const; |
367 | |
368 | /// Print the successors of this block to \p O, prefixing all lines with \p |
369 | /// Indent. |
370 | void printSuccessors(raw_ostream &O, const Twine &Indent) const; |
371 | |
372 | /// Dump this VPBlockBase to dbgs(). |
373 | LLVM_DUMP_METHOD void dump() const { print(dbgs()); } |
374 | #endif |
375 | |
376 | /// Clone the current block and it's recipes without updating the operands of |
377 | /// the cloned recipes, including all blocks in the single-entry single-exit |
378 | /// region for VPRegionBlocks. |
379 | virtual VPBlockBase *clone() = 0; |
380 | }; |
381 | |
382 | /// VPRecipeBase is a base class modeling a sequence of one or more output IR |
383 | /// instructions. VPRecipeBase owns the VPValues it defines through VPDef |
384 | /// and is responsible for deleting its defined values. Single-value |
385 | /// recipes must inherit from VPSingleDef instead of inheriting from both |
386 | /// VPRecipeBase and VPValue separately. |
387 | class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>, |
388 | public VPDef, |
389 | public VPUser { |
390 | friend VPBasicBlock; |
391 | friend class VPBlockUtils; |
392 | |
393 | /// Each VPRecipe belongs to a single VPBasicBlock. |
394 | VPBasicBlock *Parent = nullptr; |
395 | |
396 | /// The debug location for the recipe. |
397 | DebugLoc DL; |
398 | |
399 | public: |
400 | VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands, |
401 | DebugLoc DL = {}) |
402 | : VPDef(SC), VPUser(Operands), DL(DL) {} |
403 | |
404 | virtual ~VPRecipeBase() = default; |
405 | |
406 | /// Clone the current recipe. |
407 | virtual VPRecipeBase *clone() = 0; |
408 | |
409 | /// \return the VPBasicBlock which this VPRecipe belongs to. |
410 | VPBasicBlock *getParent() { return Parent; } |
411 | const VPBasicBlock *getParent() const { return Parent; } |
412 | |
413 | /// The method which generates the output IR instructions that correspond to |
414 | /// this VPRecipe, thereby "executing" the VPlan. |
415 | virtual void execute(VPTransformState &State) = 0; |
416 | |
417 | /// Return the cost of this recipe, taking into account if the cost |
418 | /// computation should be skipped and the ForceTargetInstructionCost flag. |
419 | /// Also takes care of printing the cost for debugging. |
420 | InstructionCost cost(ElementCount VF, VPCostContext &Ctx); |
421 | |
422 | /// Insert an unlinked recipe into a basic block immediately before |
423 | /// the specified recipe. |
424 | void insertBefore(VPRecipeBase *InsertPos); |
425 | /// Insert an unlinked recipe into \p BB immediately before the insertion |
426 | /// point \p IP; |
427 | void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP); |
428 | |
429 | /// Insert an unlinked Recipe into a basic block immediately after |
430 | /// the specified Recipe. |
431 | void insertAfter(VPRecipeBase *InsertPos); |
432 | |
433 | /// Unlink this recipe from its current VPBasicBlock and insert it into |
434 | /// the VPBasicBlock that MovePos lives in, right after MovePos. |
435 | void moveAfter(VPRecipeBase *MovePos); |
436 | |
437 | /// Unlink this recipe and insert into BB before I. |
438 | /// |
439 | /// \pre I is a valid iterator into BB. |
440 | void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I); |
441 | |
442 | /// This method unlinks 'this' from the containing basic block, but does not |
443 | /// delete it. |
444 | void removeFromParent(); |
445 | |
446 | /// This method unlinks 'this' from the containing basic block and deletes it. |
447 | /// |
448 | /// \returns an iterator pointing to the element after the erased one |
449 | iplist<VPRecipeBase>::iterator eraseFromParent(); |
450 | |
451 | /// Method to support type inquiry through isa, cast, and dyn_cast. |
452 | static inline bool classof(const VPDef *D) { |
453 | // All VPDefs are also VPRecipeBases. |
454 | return true; |
455 | } |
456 | |
457 | static inline bool classof(const VPUser *U) { return true; } |
458 | |
459 | /// Returns true if the recipe may have side-effects. |
460 | bool mayHaveSideEffects() const; |
461 | |
462 | /// Returns true for PHI-like recipes. |
463 | bool isPhi() const; |
464 | |
465 | /// Returns true if the recipe may read from memory. |
466 | bool mayReadFromMemory() const; |
467 | |
468 | /// Returns true if the recipe may write to memory. |
469 | bool mayWriteToMemory() const; |
470 | |
471 | /// Returns true if the recipe may read from or write to memory. |
472 | bool mayReadOrWriteMemory() const { |
473 | return mayReadFromMemory() || mayWriteToMemory(); |
474 | } |
475 | |
476 | /// Returns the debug location of the recipe. |
477 | DebugLoc getDebugLoc() const { return DL; } |
478 | |
479 | /// Return true if the recipe is a scalar cast. |
480 | bool isScalarCast() const; |
481 | |
482 | /// Set the recipe's debug location to \p NewDL. |
483 | void setDebugLoc(DebugLoc NewDL) { DL = NewDL; } |
484 | |
485 | protected: |
486 | /// Compute the cost of this recipe either using a recipe's specialized |
487 | /// implementation or using the legacy cost model and the underlying |
488 | /// instructions. |
489 | virtual InstructionCost computeCost(ElementCount VF, |
490 | VPCostContext &Ctx) const; |
491 | }; |
492 | |
493 | // Helper macro to define common classof implementations for recipes. |
494 | #define VP_CLASSOF_IMPL(VPDefID) \ |
495 | static inline bool classof(const VPDef *D) { \ |
496 | return D->getVPDefID() == VPDefID; \ |
497 | } \ |
498 | static inline bool classof(const VPValue *V) { \ |
499 | auto *R = V->getDefiningRecipe(); \ |
500 | return R && R->getVPDefID() == VPDefID; \ |
501 | } \ |
502 | static inline bool classof(const VPUser *U) { \ |
503 | auto *R = dyn_cast<VPRecipeBase>(U); \ |
504 | return R && R->getVPDefID() == VPDefID; \ |
505 | } \ |
506 | static inline bool classof(const VPRecipeBase *R) { \ |
507 | return R->getVPDefID() == VPDefID; \ |
508 | } \ |
509 | static inline bool classof(const VPSingleDefRecipe *R) { \ |
510 | return R->getVPDefID() == VPDefID; \ |
511 | } |
512 | |
513 | /// VPSingleDef is a base class for recipes for modeling a sequence of one or |
514 | /// more output IR that define a single result VPValue. |
515 | /// Note that VPRecipeBase must be inherited from before VPValue. |
516 | class VPSingleDefRecipe : public VPRecipeBase, public VPValue { |
517 | public: |
518 | VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands, |
519 | DebugLoc DL = {}) |
520 | : VPRecipeBase(SC, Operands, DL), VPValue(this) {} |
521 | |
522 | VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands, |
523 | Value *UV, DebugLoc DL = {}) |
524 | : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {} |
525 | |
526 | static inline bool classof(const VPRecipeBase *R) { |
527 | switch (R->getVPDefID()) { |
528 | case VPRecipeBase::VPDerivedIVSC: |
529 | case VPRecipeBase::VPEVLBasedIVPHISC: |
530 | case VPRecipeBase::VPExpandSCEVSC: |
531 | case VPRecipeBase::VPExpressionSC: |
532 | case VPRecipeBase::VPInstructionSC: |
533 | case VPRecipeBase::VPReductionEVLSC: |
534 | case VPRecipeBase::VPReductionSC: |
535 | case VPRecipeBase::VPReplicateSC: |
536 | case VPRecipeBase::VPScalarIVStepsSC: |
537 | case VPRecipeBase::VPVectorPointerSC: |
538 | case VPRecipeBase::VPVectorEndPointerSC: |
539 | case VPRecipeBase::VPWidenCallSC: |
540 | case VPRecipeBase::VPWidenCanonicalIVSC: |
541 | case VPRecipeBase::VPWidenCastSC: |
542 | case VPRecipeBase::VPWidenGEPSC: |
543 | case VPRecipeBase::VPWidenIntrinsicSC: |
544 | case VPRecipeBase::VPWidenSC: |
545 | case VPRecipeBase::VPWidenSelectSC: |
546 | case VPRecipeBase::VPBlendSC: |
547 | case VPRecipeBase::VPPredInstPHISC: |
548 | case VPRecipeBase::VPCanonicalIVPHISC: |
549 | case VPRecipeBase::VPActiveLaneMaskPHISC: |
550 | case VPRecipeBase::VPFirstOrderRecurrencePHISC: |
551 | case VPRecipeBase::VPWidenPHISC: |
552 | case VPRecipeBase::VPWidenIntOrFpInductionSC: |
553 | case VPRecipeBase::VPWidenPointerInductionSC: |
554 | case VPRecipeBase::VPReductionPHISC: |
555 | case VPRecipeBase::VPPartialReductionSC: |
556 | return true; |
557 | case VPRecipeBase::VPBranchOnMaskSC: |
558 | case VPRecipeBase::VPInterleaveSC: |
559 | case VPRecipeBase::VPIRInstructionSC: |
560 | case VPRecipeBase::VPWidenLoadEVLSC: |
561 | case VPRecipeBase::VPWidenLoadSC: |
562 | case VPRecipeBase::VPWidenStoreEVLSC: |
563 | case VPRecipeBase::VPWidenStoreSC: |
564 | case VPRecipeBase::VPHistogramSC: |
565 | // TODO: Widened stores don't define a value, but widened loads do. Split |
566 | // the recipes to be able to make widened loads VPSingleDefRecipes. |
567 | return false; |
568 | } |
569 | llvm_unreachable("Unhandled VPDefID" ); |
570 | } |
571 | |
572 | static inline bool classof(const VPUser *U) { |
573 | auto *R = dyn_cast<VPRecipeBase>(Val: U); |
574 | return R && classof(R); |
575 | } |
576 | |
577 | virtual VPSingleDefRecipe *clone() override = 0; |
578 | |
579 | /// Returns the underlying instruction. |
580 | Instruction *getUnderlyingInstr() { |
581 | return cast<Instruction>(Val: getUnderlyingValue()); |
582 | } |
583 | const Instruction *getUnderlyingInstr() const { |
584 | return cast<Instruction>(Val: getUnderlyingValue()); |
585 | } |
586 | |
587 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
588 | /// Print this VPSingleDefRecipe to dbgs() (for debugging). |
589 | LLVM_DUMP_METHOD void dump() const; |
590 | #endif |
591 | }; |
592 | |
593 | /// Class to record and manage LLVM IR flags. |
594 | class VPIRFlags { |
595 | enum class OperationType : unsigned char { |
596 | Cmp, |
597 | OverflowingBinOp, |
598 | DisjointOp, |
599 | PossiblyExactOp, |
600 | GEPOp, |
601 | FPMathOp, |
602 | NonNegOp, |
603 | Other |
604 | }; |
605 | |
606 | public: |
607 | struct WrapFlagsTy { |
608 | char HasNUW : 1; |
609 | char HasNSW : 1; |
610 | |
611 | WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {} |
612 | }; |
613 | |
614 | struct DisjointFlagsTy { |
615 | char IsDisjoint : 1; |
616 | DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {} |
617 | }; |
618 | |
619 | struct NonNegFlagsTy { |
620 | char NonNeg : 1; |
621 | NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {} |
622 | }; |
623 | |
624 | private: |
625 | struct ExactFlagsTy { |
626 | char IsExact : 1; |
627 | }; |
628 | struct FastMathFlagsTy { |
629 | char AllowReassoc : 1; |
630 | char NoNaNs : 1; |
631 | char NoInfs : 1; |
632 | char NoSignedZeros : 1; |
633 | char AllowReciprocal : 1; |
634 | char AllowContract : 1; |
635 | char ApproxFunc : 1; |
636 | |
637 | FastMathFlagsTy(const FastMathFlags &FMF); |
638 | }; |
639 | |
640 | OperationType OpType; |
641 | |
642 | union { |
643 | CmpInst::Predicate CmpPredicate; |
644 | WrapFlagsTy WrapFlags; |
645 | DisjointFlagsTy DisjointFlags; |
646 | ExactFlagsTy ExactFlags; |
647 | GEPNoWrapFlags GEPFlags; |
648 | NonNegFlagsTy NonNegFlags; |
649 | FastMathFlagsTy FMFs; |
650 | unsigned AllFlags; |
651 | }; |
652 | |
653 | public: |
654 | VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {} |
655 | |
656 | VPIRFlags(Instruction &I) { |
657 | if (auto *Op = dyn_cast<CmpInst>(Val: &I)) { |
658 | OpType = OperationType::Cmp; |
659 | CmpPredicate = Op->getPredicate(); |
660 | } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(Val: &I)) { |
661 | OpType = OperationType::DisjointOp; |
662 | DisjointFlags.IsDisjoint = Op->isDisjoint(); |
663 | } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(Val: &I)) { |
664 | OpType = OperationType::OverflowingBinOp; |
665 | WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()}; |
666 | } else if (auto *Op = dyn_cast<PossiblyExactOperator>(Val: &I)) { |
667 | OpType = OperationType::PossiblyExactOp; |
668 | ExactFlags.IsExact = Op->isExact(); |
669 | } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) { |
670 | OpType = OperationType::GEPOp; |
671 | GEPFlags = GEP->getNoWrapFlags(); |
672 | } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(Val: &I)) { |
673 | OpType = OperationType::NonNegOp; |
674 | NonNegFlags.NonNeg = PNNI->hasNonNeg(); |
675 | } else if (auto *Op = dyn_cast<FPMathOperator>(Val: &I)) { |
676 | OpType = OperationType::FPMathOp; |
677 | FMFs = Op->getFastMathFlags(); |
678 | } else { |
679 | OpType = OperationType::Other; |
680 | AllFlags = 0; |
681 | } |
682 | } |
683 | |
684 | VPIRFlags(CmpInst::Predicate Pred) |
685 | : OpType(OperationType::Cmp), CmpPredicate(Pred) {} |
686 | |
687 | VPIRFlags(WrapFlagsTy WrapFlags) |
688 | : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {} |
689 | |
690 | VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {} |
691 | |
692 | VPIRFlags(DisjointFlagsTy DisjointFlags) |
693 | : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {} |
694 | |
695 | VPIRFlags(NonNegFlagsTy NonNegFlags) |
696 | : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {} |
697 | |
698 | VPIRFlags(GEPNoWrapFlags GEPFlags) |
699 | : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {} |
700 | |
701 | public: |
702 | void transferFlags(VPIRFlags &Other) { |
703 | OpType = Other.OpType; |
704 | AllFlags = Other.AllFlags; |
705 | } |
706 | |
707 | /// Drop all poison-generating flags. |
708 | void dropPoisonGeneratingFlags() { |
709 | // NOTE: This needs to be kept in-sync with |
710 | // Instruction::dropPoisonGeneratingFlags. |
711 | switch (OpType) { |
712 | case OperationType::OverflowingBinOp: |
713 | WrapFlags.HasNUW = false; |
714 | WrapFlags.HasNSW = false; |
715 | break; |
716 | case OperationType::DisjointOp: |
717 | DisjointFlags.IsDisjoint = false; |
718 | break; |
719 | case OperationType::PossiblyExactOp: |
720 | ExactFlags.IsExact = false; |
721 | break; |
722 | case OperationType::GEPOp: |
723 | GEPFlags = GEPNoWrapFlags::none(); |
724 | break; |
725 | case OperationType::FPMathOp: |
726 | FMFs.NoNaNs = false; |
727 | FMFs.NoInfs = false; |
728 | break; |
729 | case OperationType::NonNegOp: |
730 | NonNegFlags.NonNeg = false; |
731 | break; |
732 | case OperationType::Cmp: |
733 | case OperationType::Other: |
734 | break; |
735 | } |
736 | } |
737 | |
738 | /// Apply the IR flags to \p I. |
739 | void applyFlags(Instruction &I) const { |
740 | switch (OpType) { |
741 | case OperationType::OverflowingBinOp: |
742 | I.setHasNoUnsignedWrap(WrapFlags.HasNUW); |
743 | I.setHasNoSignedWrap(WrapFlags.HasNSW); |
744 | break; |
745 | case OperationType::DisjointOp: |
746 | cast<PossiblyDisjointInst>(Val: &I)->setIsDisjoint(DisjointFlags.IsDisjoint); |
747 | break; |
748 | case OperationType::PossiblyExactOp: |
749 | I.setIsExact(ExactFlags.IsExact); |
750 | break; |
751 | case OperationType::GEPOp: |
752 | cast<GetElementPtrInst>(Val: &I)->setNoWrapFlags(GEPFlags); |
753 | break; |
754 | case OperationType::FPMathOp: |
755 | I.setHasAllowReassoc(FMFs.AllowReassoc); |
756 | I.setHasNoNaNs(FMFs.NoNaNs); |
757 | I.setHasNoInfs(FMFs.NoInfs); |
758 | I.setHasNoSignedZeros(FMFs.NoSignedZeros); |
759 | I.setHasAllowReciprocal(FMFs.AllowReciprocal); |
760 | I.setHasAllowContract(FMFs.AllowContract); |
761 | I.setHasApproxFunc(FMFs.ApproxFunc); |
762 | break; |
763 | case OperationType::NonNegOp: |
764 | I.setNonNeg(NonNegFlags.NonNeg); |
765 | break; |
766 | case OperationType::Cmp: |
767 | case OperationType::Other: |
768 | break; |
769 | } |
770 | } |
771 | |
772 | CmpInst::Predicate getPredicate() const { |
773 | assert(OpType == OperationType::Cmp && |
774 | "recipe doesn't have a compare predicate" ); |
775 | return CmpPredicate; |
776 | } |
777 | |
778 | void setPredicate(CmpInst::Predicate Pred) { |
779 | assert(OpType == OperationType::Cmp && |
780 | "recipe doesn't have a compare predicate" ); |
781 | CmpPredicate = Pred; |
782 | } |
783 | |
784 | GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; } |
785 | |
786 | /// Returns true if the recipe has fast-math flags. |
787 | bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; } |
788 | |
789 | FastMathFlags getFastMathFlags() const; |
790 | |
791 | /// Returns true if the recipe has non-negative flag. |
792 | bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; } |
793 | |
794 | bool isNonNeg() const { |
795 | assert(OpType == OperationType::NonNegOp && |
796 | "recipe doesn't have a NNEG flag" ); |
797 | return NonNegFlags.NonNeg; |
798 | } |
799 | |
800 | bool hasNoUnsignedWrap() const { |
801 | assert(OpType == OperationType::OverflowingBinOp && |
802 | "recipe doesn't have a NUW flag" ); |
803 | return WrapFlags.HasNUW; |
804 | } |
805 | |
806 | bool hasNoSignedWrap() const { |
807 | assert(OpType == OperationType::OverflowingBinOp && |
808 | "recipe doesn't have a NSW flag" ); |
809 | return WrapFlags.HasNSW; |
810 | } |
811 | |
812 | bool isDisjoint() const { |
813 | assert(OpType == OperationType::DisjointOp && |
814 | "recipe cannot have a disjoing flag" ); |
815 | return DisjointFlags.IsDisjoint; |
816 | } |
817 | |
818 | #if !defined(NDEBUG) |
819 | /// Returns true if the set flags are valid for \p Opcode. |
820 | bool flagsValidForOpcode(unsigned Opcode) const; |
821 | #endif |
822 | |
823 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
824 | void printFlags(raw_ostream &O) const; |
825 | #endif |
826 | }; |
827 | |
828 | /// A pure-virtual common base class for recipes defining a single VPValue and |
829 | /// using IR flags. |
830 | struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { |
831 | VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands, |
832 | DebugLoc DL = {}) |
833 | : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags() {} |
834 | |
835 | VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands, |
836 | Instruction &I) |
837 | : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()), VPIRFlags(I) {} |
838 | |
839 | VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands, |
840 | const VPIRFlags &Flags, DebugLoc DL = {}) |
841 | : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {} |
842 | |
843 | static inline bool classof(const VPRecipeBase *R) { |
844 | return R->getVPDefID() == VPRecipeBase::VPInstructionSC || |
845 | R->getVPDefID() == VPRecipeBase::VPWidenSC || |
846 | R->getVPDefID() == VPRecipeBase::VPWidenGEPSC || |
847 | R->getVPDefID() == VPRecipeBase::VPWidenCallSC || |
848 | R->getVPDefID() == VPRecipeBase::VPWidenCastSC || |
849 | R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC || |
850 | R->getVPDefID() == VPRecipeBase::VPWidenSelectSC || |
851 | R->getVPDefID() == VPRecipeBase::VPReductionSC || |
852 | R->getVPDefID() == VPRecipeBase::VPReductionEVLSC || |
853 | R->getVPDefID() == VPRecipeBase::VPReplicateSC || |
854 | R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC || |
855 | R->getVPDefID() == VPRecipeBase::VPVectorPointerSC; |
856 | } |
857 | |
858 | static inline bool classof(const VPUser *U) { |
859 | auto *R = dyn_cast<VPRecipeBase>(Val: U); |
860 | return R && classof(R); |
861 | } |
862 | |
863 | static inline bool classof(const VPValue *V) { |
864 | auto *R = dyn_cast_or_null<VPRecipeBase>(Val: V->getDefiningRecipe()); |
865 | return R && classof(R); |
866 | } |
867 | |
868 | void execute(VPTransformState &State) override = 0; |
869 | }; |
870 | |
871 | /// Helper to access the operand that contains the unroll part for this recipe |
872 | /// after unrolling. |
873 | template <unsigned PartOpIdx> class VPUnrollPartAccessor { |
874 | protected: |
875 | /// Return the VPValue operand containing the unroll part or null if there is |
876 | /// no such operand. |
877 | VPValue *getUnrollPartOperand(VPUser &U) const; |
878 | |
879 | /// Return the unroll part. |
880 | unsigned getUnrollPart(VPUser &U) const; |
881 | }; |
882 | |
883 | /// Helper to manage IR metadata for recipes. It filters out metadata that |
884 | /// cannot be propagated. |
885 | class VPIRMetadata { |
886 | SmallVector<std::pair<unsigned, MDNode *>> Metadata; |
887 | |
888 | public: |
889 | VPIRMetadata() {} |
890 | |
891 | /// Adds metatadata that can be preserved from the original instruction |
892 | /// \p I. |
893 | VPIRMetadata(Instruction &I) { getMetadataToPropagate(Inst: &I, Metadata); } |
894 | |
895 | /// Adds metatadata that can be preserved from the original instruction |
896 | /// \p I and noalias metadata guaranteed by runtime checks using \p LVer. |
897 | VPIRMetadata(Instruction &I, LoopVersioning *LVer); |
898 | |
899 | /// Copy constructor for cloning. |
900 | VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {} |
901 | |
902 | /// Add all metadata to \p I. |
903 | void applyMetadata(Instruction &I) const; |
904 | |
905 | /// Add metadata with kind \p Kind and \p Node. |
906 | void addMetadata(unsigned Kind, MDNode *Node) { |
907 | Metadata.emplace_back(Args&: Kind, Args&: Node); |
908 | } |
909 | }; |
910 | |
911 | /// This is a concrete Recipe that models a single VPlan-level instruction. |
912 | /// While as any Recipe it may generate a sequence of IR instructions when |
913 | /// executed, these instructions would always form a single-def expression as |
914 | /// the VPInstruction is also a single def-use vertex. |
915 | class VPInstruction : public VPRecipeWithIRFlags, |
916 | public VPIRMetadata, |
917 | public VPUnrollPartAccessor<1> { |
918 | friend class VPlanSlp; |
919 | |
920 | public: |
921 | /// VPlan opcodes, extending LLVM IR with idiomatics instructions. |
922 | enum { |
923 | FirstOrderRecurrenceSplice = |
924 | Instruction::OtherOpsEnd + 1, // Combines the incoming and previous |
925 | // values of a first-order recurrence. |
926 | Not, |
927 | SLPLoad, |
928 | SLPStore, |
929 | ActiveLaneMask, |
930 | ExplicitVectorLength, |
931 | CalculateTripCountMinusVF, |
932 | // Increment the canonical IV separately for each unrolled part. |
933 | CanonicalIVIncrementForPart, |
934 | BranchOnCount, |
935 | BranchOnCond, |
936 | Broadcast, |
937 | /// Given operands of (the same) struct type, creates a struct of fixed- |
938 | /// width vectors each containing a struct field of all operands. The |
939 | /// number of operands matches the element count of every vector. |
940 | BuildStructVector, |
941 | /// Creates a fixed-width vector containing all operands. The number of |
942 | /// operands matches the vector element count. |
943 | BuildVector, |
944 | /// Compute the final result of a AnyOf reduction with select(cmp(),x,y), |
945 | /// where one of (x,y) is loop invariant, and both x and y are integer type. |
946 | ComputeAnyOfResult, |
947 | ComputeFindIVResult, |
948 | ComputeReductionResult, |
949 | // Extracts the last lane from its operand if it is a vector, or the last |
950 | // part if scalar. In the latter case, the recipe will be removed during |
951 | // unrolling. |
952 | , |
953 | // Extracts the second-to-last lane from its operand or the second-to-last |
954 | // part if it is scalar. In the latter case, the recipe will be removed |
955 | // during unrolling. |
956 | , |
957 | LogicalAnd, // Non-poison propagating logical And. |
958 | // Add an offset in bytes (second operand) to a base pointer (first |
959 | // operand). Only generates scalar values (either for the first lane only or |
960 | // for all lanes, depending on its uses). |
961 | PtrAdd, |
962 | // Returns a scalar boolean value, which is true if any lane of its |
963 | // (boolean) vector operands is true. It produces the reduced value across |
964 | // all unrolled iterations. Unrolling will add all copies of its original |
965 | // operand as additional operands. |
966 | AnyOf, |
967 | // Calculates the first active lane index of the vector predicate operands. |
968 | // It produces the lane index across all unrolled iterations. Unrolling will |
969 | // add all copies of its original operand as additional operands. |
970 | FirstActiveLane, |
971 | |
972 | // The opcodes below are used for VPInstructionWithType. |
973 | // |
974 | /// Scale the first operand (vector step) by the second operand |
975 | /// (scalar-step). Casts both operands to the result type if needed. |
976 | WideIVStep, |
977 | /// Start vector for reductions with 3 operands: the original start value, |
978 | /// the identity value for the reduction and an integer indicating the |
979 | /// scaling factor. |
980 | ReductionStartVector, |
981 | // Creates a step vector starting from 0 to VF with a step of 1. |
982 | StepVector, |
983 | |
984 | }; |
985 | |
986 | private: |
987 | typedef unsigned char OpcodeTy; |
988 | OpcodeTy Opcode; |
989 | |
990 | /// An optional name that can be used for the generated IR instruction. |
991 | const std::string Name; |
992 | |
993 | /// Returns true if this VPInstruction generates scalar values for all lanes. |
994 | /// Most VPInstructions generate a single value per part, either vector or |
995 | /// scalar. VPReplicateRecipe takes care of generating multiple (scalar) |
996 | /// values per all lanes, stemming from an original ingredient. This method |
997 | /// identifies the (rare) cases of VPInstructions that do so as well, w/o an |
998 | /// underlying ingredient. |
999 | bool doesGeneratePerAllLanes() const; |
1000 | |
1001 | /// Returns true if we can generate a scalar for the first lane only if |
1002 | /// needed. |
1003 | bool canGenerateScalarForFirstLane() const; |
1004 | |
1005 | /// Utility methods serving execute(): generates a single vector instance of |
1006 | /// the modeled instruction. \returns the generated value. . In some cases an |
1007 | /// existing value is returned rather than a generated one. |
1008 | Value *generate(VPTransformState &State); |
1009 | |
1010 | /// Utility methods serving execute(): generates a scalar single instance of |
1011 | /// the modeled instruction for a given lane. \returns the scalar generated |
1012 | /// value for lane \p Lane. |
1013 | Value *generatePerLane(VPTransformState &State, const VPLane &Lane); |
1014 | |
1015 | #if !defined(NDEBUG) |
1016 | /// Return the number of operands determined by the opcode of the |
1017 | /// VPInstruction. Returns -1u if the number of operands cannot be determined |
1018 | /// directly by the opcode. |
1019 | static unsigned getNumOperandsForOpcode(unsigned Opcode); |
1020 | #endif |
1021 | |
1022 | public: |
1023 | VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL = {}, |
1024 | const Twine &Name = "" ) |
1025 | : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL), |
1026 | VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {} |
1027 | |
1028 | VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, |
1029 | const VPIRFlags &Flags, DebugLoc DL = {}, |
1030 | const Twine &Name = "" ); |
1031 | |
1032 | VP_CLASSOF_IMPL(VPDef::VPInstructionSC) |
1033 | |
1034 | VPInstruction *clone() override { |
1035 | SmallVector<VPValue *, 2> Operands(operands()); |
1036 | auto *New = new VPInstruction(Opcode, Operands, *this, getDebugLoc(), Name); |
1037 | if (getUnderlyingValue()) |
1038 | New->setUnderlyingValue(getUnderlyingInstr()); |
1039 | return New; |
1040 | } |
1041 | |
1042 | unsigned getOpcode() const { return Opcode; } |
1043 | |
1044 | /// Generate the instruction. |
1045 | /// TODO: We currently execute only per-part unless a specific instance is |
1046 | /// provided. |
1047 | void execute(VPTransformState &State) override; |
1048 | |
1049 | /// Return the cost of this VPInstruction. |
1050 | InstructionCost computeCost(ElementCount VF, |
1051 | VPCostContext &Ctx) const override; |
1052 | |
1053 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1054 | /// Print the VPInstruction to \p O. |
1055 | void print(raw_ostream &O, const Twine &Indent, |
1056 | VPSlotTracker &SlotTracker) const override; |
1057 | |
1058 | /// Print the VPInstruction to dbgs() (for debugging). |
1059 | LLVM_DUMP_METHOD void dump() const; |
1060 | #endif |
1061 | |
1062 | bool hasResult() const { |
1063 | // CallInst may or may not have a result, depending on the called function. |
1064 | // Conservatively return calls have results for now. |
1065 | switch (getOpcode()) { |
1066 | case Instruction::Ret: |
1067 | case Instruction::Br: |
1068 | case Instruction::Store: |
1069 | case Instruction::Switch: |
1070 | case Instruction::IndirectBr: |
1071 | case Instruction::Resume: |
1072 | case Instruction::CatchRet: |
1073 | case Instruction::Unreachable: |
1074 | case Instruction::Fence: |
1075 | case Instruction::AtomicRMW: |
1076 | case VPInstruction::BranchOnCond: |
1077 | case VPInstruction::BranchOnCount: |
1078 | return false; |
1079 | default: |
1080 | return true; |
1081 | } |
1082 | } |
1083 | |
1084 | /// Returns true if the underlying opcode may read from or write to memory. |
1085 | bool opcodeMayReadOrWriteFromMemory() const; |
1086 | |
1087 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
1088 | bool onlyFirstLaneUsed(const VPValue *Op) const override; |
1089 | |
1090 | /// Returns true if the recipe only uses the first part of operand \p Op. |
1091 | bool onlyFirstPartUsed(const VPValue *Op) const override; |
1092 | |
1093 | /// Returns true if this VPInstruction produces a scalar value from a vector, |
1094 | /// e.g. by performing a reduction or extracting a lane. |
1095 | bool isVectorToScalar() const; |
1096 | |
1097 | /// Returns true if this VPInstruction's operands are single scalars and the |
1098 | /// result is also a single scalar. |
1099 | bool isSingleScalar() const; |
1100 | |
1101 | /// Returns the symbolic name assigned to the VPInstruction. |
1102 | StringRef getName() const { return Name; } |
1103 | }; |
1104 | |
1105 | /// A specialization of VPInstruction augmenting it with a dedicated result |
1106 | /// type, to be used when the opcode and operands of the VPInstruction don't |
1107 | /// directly determine the result type. Note that there is no separate VPDef ID |
1108 | /// for VPInstructionWithType; it shares the same ID as VPInstruction and is |
1109 | /// distinguished purely by the opcode. |
1110 | class VPInstructionWithType : public VPInstruction { |
1111 | /// Scalar result type produced by the recipe. |
1112 | Type *ResultTy; |
1113 | |
1114 | public: |
1115 | VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands, |
1116 | Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL, |
1117 | const Twine &Name = "" ) |
1118 | : VPInstruction(Opcode, Operands, Flags, DL, Name), ResultTy(ResultTy) {} |
1119 | |
1120 | static inline bool classof(const VPRecipeBase *R) { |
1121 | // VPInstructionWithType are VPInstructions with specific opcodes requiring |
1122 | // type information. |
1123 | if (R->isScalarCast()) |
1124 | return true; |
1125 | auto *VPI = dyn_cast<VPInstruction>(Val: R); |
1126 | if (!VPI) |
1127 | return false; |
1128 | switch (VPI->getOpcode()) { |
1129 | case VPInstruction::WideIVStep: |
1130 | case VPInstruction::StepVector: |
1131 | return true; |
1132 | default: |
1133 | return false; |
1134 | } |
1135 | } |
1136 | |
1137 | static inline bool classof(const VPUser *R) { |
1138 | return isa<VPInstructionWithType>(Val: cast<VPRecipeBase>(Val: R)); |
1139 | } |
1140 | |
1141 | VPInstruction *clone() override { |
1142 | SmallVector<VPValue *, 2> Operands(operands()); |
1143 | auto *New = |
1144 | new VPInstructionWithType(getOpcode(), Operands, getResultType(), *this, |
1145 | getDebugLoc(), getName()); |
1146 | New->setUnderlyingValue(getUnderlyingValue()); |
1147 | return New; |
1148 | } |
1149 | |
1150 | void execute(VPTransformState &State) override; |
1151 | |
1152 | /// Return the cost of this VPInstruction. |
1153 | InstructionCost computeCost(ElementCount VF, |
1154 | VPCostContext &Ctx) const override { |
1155 | // TODO: Compute accurate cost after retiring the legacy cost model. |
1156 | return 0; |
1157 | } |
1158 | |
1159 | Type *getResultType() const { return ResultTy; } |
1160 | |
1161 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1162 | /// Print the recipe. |
1163 | void print(raw_ostream &O, const Twine &Indent, |
1164 | VPSlotTracker &SlotTracker) const override; |
1165 | #endif |
1166 | }; |
1167 | |
1168 | /// Helper type to provide functions to access incoming values and blocks for |
1169 | /// phi-like recipes. |
1170 | class VPPhiAccessors { |
1171 | protected: |
1172 | /// Return a VPRecipeBase* to the current object. |
1173 | virtual const VPRecipeBase *getAsRecipe() const = 0; |
1174 | |
1175 | public: |
1176 | virtual ~VPPhiAccessors() = default; |
1177 | |
1178 | /// Returns the incoming VPValue with index \p Idx. |
1179 | VPValue *getIncomingValue(unsigned Idx) const { |
1180 | return getAsRecipe()->getOperand(N: Idx); |
1181 | } |
1182 | |
1183 | /// Returns the incoming block with index \p Idx. |
1184 | const VPBasicBlock *getIncomingBlock(unsigned Idx) const; |
1185 | |
1186 | /// Returns the number of incoming values, also number of incoming blocks. |
1187 | virtual unsigned getNumIncoming() const { |
1188 | return getAsRecipe()->getNumOperands(); |
1189 | } |
1190 | |
1191 | /// Removes the incoming value for \p IncomingBlock, which must be a |
1192 | /// predecessor. |
1193 | void removeIncomingValueFor(VPBlockBase *IncomingBlock) const; |
1194 | |
1195 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1196 | /// Print the recipe. |
1197 | void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const; |
1198 | #endif |
1199 | }; |
1200 | |
1201 | struct VPPhi : public VPInstruction, public VPPhiAccessors { |
1202 | VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "" ) |
1203 | : VPInstruction(Instruction::PHI, Operands, DL, Name) {} |
1204 | |
1205 | static inline bool classof(const VPUser *U) { |
1206 | auto *R = dyn_cast<VPInstruction>(Val: U); |
1207 | return R && R->getOpcode() == Instruction::PHI; |
1208 | } |
1209 | |
1210 | VPPhi *clone() override { |
1211 | return new VPPhi(operands(), getDebugLoc(), getName()); |
1212 | } |
1213 | |
1214 | void execute(VPTransformState &State) override; |
1215 | |
1216 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1217 | /// Print the recipe. |
1218 | void print(raw_ostream &O, const Twine &Indent, |
1219 | VPSlotTracker &SlotTracker) const override; |
1220 | #endif |
1221 | |
1222 | protected: |
1223 | const VPRecipeBase *getAsRecipe() const override { return this; } |
1224 | }; |
1225 | |
1226 | /// A recipe to wrap on original IR instruction not to be modified during |
1227 | /// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass. |
1228 | /// Expect PHIs, VPIRInstructions cannot have any operands. |
1229 | class VPIRInstruction : public VPRecipeBase { |
1230 | Instruction &I; |
1231 | |
1232 | protected: |
1233 | /// VPIRInstruction::create() should be used to create VPIRInstructions, as |
1234 | /// subclasses may need to be created, e.g. VPIRPhi. |
1235 | VPIRInstruction(Instruction &I) |
1236 | : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {} |
1237 | |
1238 | public: |
1239 | ~VPIRInstruction() override = default; |
1240 | |
1241 | /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a |
1242 | /// VPIRInstruction. |
1243 | static VPIRInstruction *create(Instruction &I); |
1244 | |
1245 | VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC) |
1246 | |
1247 | VPIRInstruction *clone() override { |
1248 | auto *R = create(I); |
1249 | for (auto *Op : operands()) |
1250 | R->addOperand(Operand: Op); |
1251 | return R; |
1252 | } |
1253 | |
1254 | void execute(VPTransformState &State) override; |
1255 | |
1256 | /// Return the cost of this VPIRInstruction. |
1257 | InstructionCost computeCost(ElementCount VF, |
1258 | VPCostContext &Ctx) const override; |
1259 | |
1260 | Instruction &getInstruction() const { return I; } |
1261 | |
1262 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1263 | /// Print the recipe. |
1264 | void print(raw_ostream &O, const Twine &Indent, |
1265 | VPSlotTracker &SlotTracker) const override; |
1266 | #endif |
1267 | |
1268 | bool usesScalars(const VPValue *Op) const override { |
1269 | assert(is_contained(operands(), Op) && |
1270 | "Op must be an operand of the recipe" ); |
1271 | return true; |
1272 | } |
1273 | |
1274 | bool onlyFirstPartUsed(const VPValue *Op) const override { |
1275 | assert(is_contained(operands(), Op) && |
1276 | "Op must be an operand of the recipe" ); |
1277 | return true; |
1278 | } |
1279 | |
1280 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
1281 | assert(is_contained(operands(), Op) && |
1282 | "Op must be an operand of the recipe" ); |
1283 | return true; |
1284 | } |
1285 | |
1286 | /// Update the recipes first operand to the last lane of the operand using \p |
1287 | /// Builder. Must only be used for VPIRInstructions with at least one operand |
1288 | /// wrapping a PHINode. |
1289 | void extractLastLaneOfFirstOperand(VPBuilder &Builder); |
1290 | }; |
1291 | |
1292 | /// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use |
1293 | /// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is |
1294 | /// allowed, and it is used to add a new incoming value for the single |
1295 | /// predecessor VPBB. |
1296 | struct VPIRPhi : public VPIRInstruction, public VPPhiAccessors { |
1297 | VPIRPhi(PHINode &PN) : VPIRInstruction(PN) {} |
1298 | |
1299 | static inline bool classof(const VPRecipeBase *U) { |
1300 | auto *R = dyn_cast<VPIRInstruction>(Val: U); |
1301 | return R && isa<PHINode>(Val: R->getInstruction()); |
1302 | } |
1303 | |
1304 | PHINode &getIRPhi() { return cast<PHINode>(Val&: getInstruction()); } |
1305 | |
1306 | void execute(VPTransformState &State) override; |
1307 | |
1308 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1309 | /// Print the recipe. |
1310 | void print(raw_ostream &O, const Twine &Indent, |
1311 | VPSlotTracker &SlotTracker) const override; |
1312 | #endif |
1313 | |
1314 | protected: |
1315 | const VPRecipeBase *getAsRecipe() const override { return this; } |
1316 | }; |
1317 | |
1318 | /// VPWidenRecipe is a recipe for producing a widened instruction using the |
1319 | /// opcode and operands of the recipe. This recipe covers most of the |
1320 | /// traditional vectorization cases where each recipe transforms into a |
1321 | /// vectorized version of itself. |
1322 | class VPWidenRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { |
1323 | unsigned Opcode; |
1324 | |
1325 | public: |
1326 | VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands, |
1327 | const VPIRFlags &Flags, DebugLoc DL) |
1328 | : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL), |
1329 | Opcode(Opcode) {} |
1330 | |
1331 | VPWidenRecipe(Instruction &I, ArrayRef<VPValue *> Operands) |
1332 | : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPIRMetadata(I), |
1333 | Opcode(I.getOpcode()) {} |
1334 | |
1335 | ~VPWidenRecipe() override = default; |
1336 | |
1337 | VPWidenRecipe *clone() override { |
1338 | auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands()); |
1339 | R->transferFlags(Other&: *this); |
1340 | return R; |
1341 | } |
1342 | |
1343 | VP_CLASSOF_IMPL(VPDef::VPWidenSC) |
1344 | |
1345 | /// Produce a widened instruction using the opcode and operands of the recipe, |
1346 | /// processing State.VF elements. |
1347 | void execute(VPTransformState &State) override; |
1348 | |
1349 | /// Return the cost of this VPWidenRecipe. |
1350 | InstructionCost computeCost(ElementCount VF, |
1351 | VPCostContext &Ctx) const override; |
1352 | |
1353 | unsigned getOpcode() const { return Opcode; } |
1354 | |
1355 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1356 | /// Print the recipe. |
1357 | void print(raw_ostream &O, const Twine &Indent, |
1358 | VPSlotTracker &SlotTracker) const override; |
1359 | #endif |
1360 | }; |
1361 | |
1362 | /// VPWidenCastRecipe is a recipe to create vector cast instructions. |
1363 | class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { |
1364 | /// Cast instruction opcode. |
1365 | Instruction::CastOps Opcode; |
1366 | |
1367 | /// Result type for the cast. |
1368 | Type *ResultTy; |
1369 | |
1370 | public: |
1371 | VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, |
1372 | CastInst &UI) |
1373 | : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPIRMetadata(UI), |
1374 | Opcode(Opcode), ResultTy(ResultTy) { |
1375 | assert(UI.getOpcode() == Opcode && |
1376 | "opcode of underlying cast doesn't match" ); |
1377 | } |
1378 | |
1379 | VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, |
1380 | const VPIRFlags &Flags = {}, DebugLoc DL = {}) |
1381 | : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL), |
1382 | VPIRMetadata(), Opcode(Opcode), ResultTy(ResultTy) { |
1383 | assert(flagsValidForOpcode(Opcode) && |
1384 | "Set flags not supported for the provided opcode" ); |
1385 | } |
1386 | |
1387 | ~VPWidenCastRecipe() override = default; |
1388 | |
1389 | VPWidenCastRecipe *clone() override { |
1390 | if (auto *UV = getUnderlyingValue()) |
1391 | return new VPWidenCastRecipe(Opcode, getOperand(N: 0), ResultTy, |
1392 | *cast<CastInst>(Val: UV)); |
1393 | |
1394 | return new VPWidenCastRecipe(Opcode, getOperand(N: 0), ResultTy); |
1395 | } |
1396 | |
1397 | VP_CLASSOF_IMPL(VPDef::VPWidenCastSC) |
1398 | |
1399 | /// Produce widened copies of the cast. |
1400 | void execute(VPTransformState &State) override; |
1401 | |
1402 | /// Return the cost of this VPWidenCastRecipe. |
1403 | InstructionCost computeCost(ElementCount VF, |
1404 | VPCostContext &Ctx) const override; |
1405 | |
1406 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1407 | /// Print the recipe. |
1408 | void print(raw_ostream &O, const Twine &Indent, |
1409 | VPSlotTracker &SlotTracker) const override; |
1410 | #endif |
1411 | |
1412 | Instruction::CastOps getOpcode() const { return Opcode; } |
1413 | |
1414 | /// Returns the result type of the cast. |
1415 | Type *getResultType() const { return ResultTy; } |
1416 | }; |
1417 | |
1418 | /// A recipe for widening vector intrinsics. |
1419 | class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { |
1420 | /// ID of the vector intrinsic to widen. |
1421 | Intrinsic::ID VectorIntrinsicID; |
1422 | |
1423 | /// Scalar return type of the intrinsic. |
1424 | Type *ResultTy; |
1425 | |
1426 | /// True if the intrinsic may read from memory. |
1427 | bool MayReadFromMemory; |
1428 | |
1429 | /// True if the intrinsic may read write to memory. |
1430 | bool MayWriteToMemory; |
1431 | |
1432 | /// True if the intrinsic may have side-effects. |
1433 | bool MayHaveSideEffects; |
1434 | |
1435 | public: |
1436 | VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, |
1437 | ArrayRef<VPValue *> CallArguments, Type *Ty, |
1438 | DebugLoc DL = {}) |
1439 | : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI), |
1440 | VPIRMetadata(CI), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty), |
1441 | MayReadFromMemory(CI.mayReadFromMemory()), |
1442 | MayWriteToMemory(CI.mayWriteToMemory()), |
1443 | MayHaveSideEffects(CI.mayHaveSideEffects()) {} |
1444 | |
1445 | VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, |
1446 | ArrayRef<VPValue *> CallArguments, Type *Ty, |
1447 | DebugLoc DL = {}) |
1448 | : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL), |
1449 | VPIRMetadata(), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) { |
1450 | LLVMContext &Ctx = Ty->getContext(); |
1451 | AttributeSet Attrs = Intrinsic::getFnAttributes(C&: Ctx, id: VectorIntrinsicID); |
1452 | MemoryEffects ME = Attrs.getMemoryEffects(); |
1453 | MayReadFromMemory = !ME.onlyWritesMemory(); |
1454 | MayWriteToMemory = !ME.onlyReadsMemory(); |
1455 | MayHaveSideEffects = MayWriteToMemory || |
1456 | !Attrs.hasAttribute(Kind: Attribute::NoUnwind) || |
1457 | !Attrs.hasAttribute(Kind: Attribute::WillReturn); |
1458 | } |
1459 | |
1460 | ~VPWidenIntrinsicRecipe() override = default; |
1461 | |
1462 | VPWidenIntrinsicRecipe *clone() override { |
1463 | if (Value *CI = getUnderlyingValue()) |
1464 | return new VPWidenIntrinsicRecipe(*cast<CallInst>(Val: CI), VectorIntrinsicID, |
1465 | operands(), ResultTy, getDebugLoc()); |
1466 | return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy, |
1467 | getDebugLoc()); |
1468 | } |
1469 | |
1470 | VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC) |
1471 | |
1472 | /// Produce a widened version of the vector intrinsic. |
1473 | void execute(VPTransformState &State) override; |
1474 | |
1475 | /// Return the cost of this vector intrinsic. |
1476 | InstructionCost computeCost(ElementCount VF, |
1477 | VPCostContext &Ctx) const override; |
1478 | |
1479 | /// Return the ID of the intrinsic. |
1480 | Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; } |
1481 | |
1482 | /// Return the scalar return type of the intrinsic. |
1483 | Type *getResultType() const { return ResultTy; } |
1484 | |
1485 | /// Return to name of the intrinsic as string. |
1486 | StringRef getIntrinsicName() const; |
1487 | |
1488 | /// Returns true if the intrinsic may read from memory. |
1489 | bool mayReadFromMemory() const { return MayReadFromMemory; } |
1490 | |
1491 | /// Returns true if the intrinsic may write to memory. |
1492 | bool mayWriteToMemory() const { return MayWriteToMemory; } |
1493 | |
1494 | /// Returns true if the intrinsic may have side-effects. |
1495 | bool mayHaveSideEffects() const { return MayHaveSideEffects; } |
1496 | |
1497 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1498 | /// Print the recipe. |
1499 | void print(raw_ostream &O, const Twine &Indent, |
1500 | VPSlotTracker &SlotTracker) const override; |
1501 | #endif |
1502 | |
1503 | bool onlyFirstLaneUsed(const VPValue *Op) const override; |
1504 | }; |
1505 | |
1506 | /// A recipe for widening Call instructions using library calls. |
1507 | class VPWidenCallRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { |
1508 | /// Variant stores a pointer to the chosen function. There is a 1:1 mapping |
1509 | /// between a given VF and the chosen vectorized variant, so there will be a |
1510 | /// different VPlan for each VF with a valid variant. |
1511 | Function *Variant; |
1512 | |
1513 | public: |
1514 | VPWidenCallRecipe(Value *UV, Function *Variant, |
1515 | ArrayRef<VPValue *> CallArguments, DebugLoc DL = {}) |
1516 | : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, |
1517 | *cast<Instruction>(Val: UV)), |
1518 | VPIRMetadata(*cast<Instruction>(Val: UV)), Variant(Variant) { |
1519 | assert( |
1520 | isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) && |
1521 | "last operand must be the called function" ); |
1522 | } |
1523 | |
1524 | ~VPWidenCallRecipe() override = default; |
1525 | |
1526 | VPWidenCallRecipe *clone() override { |
1527 | return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(), |
1528 | getDebugLoc()); |
1529 | } |
1530 | |
1531 | VP_CLASSOF_IMPL(VPDef::VPWidenCallSC) |
1532 | |
1533 | /// Produce a widened version of the call instruction. |
1534 | void execute(VPTransformState &State) override; |
1535 | |
1536 | /// Return the cost of this VPWidenCallRecipe. |
1537 | InstructionCost computeCost(ElementCount VF, |
1538 | VPCostContext &Ctx) const override; |
1539 | |
1540 | Function *getCalledScalarFunction() const { |
1541 | return cast<Function>(Val: getOperand(N: getNumOperands() - 1)->getLiveInIRValue()); |
1542 | } |
1543 | |
1544 | operand_range args() { return make_range(x: op_begin(), y: std::prev(x: op_end())); } |
1545 | const_operand_range args() const { |
1546 | return make_range(x: op_begin(), y: std::prev(x: op_end())); |
1547 | } |
1548 | |
1549 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1550 | /// Print the recipe. |
1551 | void print(raw_ostream &O, const Twine &Indent, |
1552 | VPSlotTracker &SlotTracker) const override; |
1553 | #endif |
1554 | }; |
1555 | |
1556 | /// A recipe representing a sequence of load -> update -> store as part of |
1557 | /// a histogram operation. This means there may be aliasing between vector |
1558 | /// lanes, which is handled by the llvm.experimental.vector.histogram family |
1559 | /// of intrinsics. The only update operations currently supported are |
1560 | /// 'add' and 'sub' where the other term is loop-invariant. |
1561 | class VPHistogramRecipe : public VPRecipeBase { |
1562 | /// Opcode of the update operation, currently either add or sub. |
1563 | unsigned Opcode; |
1564 | |
1565 | public: |
1566 | VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands, |
1567 | DebugLoc DL = {}) |
1568 | : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {} |
1569 | |
1570 | ~VPHistogramRecipe() override = default; |
1571 | |
1572 | VPHistogramRecipe *clone() override { |
1573 | return new VPHistogramRecipe(Opcode, operands(), getDebugLoc()); |
1574 | } |
1575 | |
1576 | VP_CLASSOF_IMPL(VPDef::VPHistogramSC); |
1577 | |
1578 | /// Produce a vectorized histogram operation. |
1579 | void execute(VPTransformState &State) override; |
1580 | |
1581 | /// Return the cost of this VPHistogramRecipe. |
1582 | InstructionCost computeCost(ElementCount VF, |
1583 | VPCostContext &Ctx) const override; |
1584 | |
1585 | unsigned getOpcode() const { return Opcode; } |
1586 | |
1587 | /// Return the mask operand if one was provided, or a null pointer if all |
1588 | /// lanes should be executed unconditionally. |
1589 | VPValue *getMask() const { |
1590 | return getNumOperands() == 3 ? getOperand(N: 2) : nullptr; |
1591 | } |
1592 | |
1593 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1594 | /// Print the recipe |
1595 | void print(raw_ostream &O, const Twine &Indent, |
1596 | VPSlotTracker &SlotTracker) const override; |
1597 | #endif |
1598 | }; |
1599 | |
1600 | /// A recipe for widening select instructions. |
1601 | struct VPWidenSelectRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { |
1602 | VPWidenSelectRecipe(SelectInst &I, ArrayRef<VPValue *> Operands) |
1603 | : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I), |
1604 | VPIRMetadata(I) {} |
1605 | |
1606 | ~VPWidenSelectRecipe() override = default; |
1607 | |
1608 | VPWidenSelectRecipe *clone() override { |
1609 | return new VPWidenSelectRecipe(*cast<SelectInst>(Val: getUnderlyingInstr()), |
1610 | operands()); |
1611 | } |
1612 | |
1613 | VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC) |
1614 | |
1615 | /// Produce a widened version of the select instruction. |
1616 | void execute(VPTransformState &State) override; |
1617 | |
1618 | /// Return the cost of this VPWidenSelectRecipe. |
1619 | InstructionCost computeCost(ElementCount VF, |
1620 | VPCostContext &Ctx) const override; |
1621 | |
1622 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1623 | /// Print the recipe. |
1624 | void print(raw_ostream &O, const Twine &Indent, |
1625 | VPSlotTracker &SlotTracker) const override; |
1626 | #endif |
1627 | |
1628 | VPValue *getCond() const { |
1629 | return getOperand(N: 0); |
1630 | } |
1631 | |
1632 | bool isInvariantCond() const { |
1633 | return getCond()->isDefinedOutsideLoopRegions(); |
1634 | } |
1635 | |
1636 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
1637 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
1638 | assert(is_contained(operands(), Op) && |
1639 | "Op must be an operand of the recipe" ); |
1640 | return Op == getCond() && isInvariantCond(); |
1641 | } |
1642 | }; |
1643 | |
1644 | /// A recipe for handling GEP instructions. |
1645 | class VPWidenGEPRecipe : public VPRecipeWithIRFlags { |
1646 | bool isPointerLoopInvariant() const { |
1647 | return getOperand(N: 0)->isDefinedOutsideLoopRegions(); |
1648 | } |
1649 | |
1650 | bool isIndexLoopInvariant(unsigned I) const { |
1651 | return getOperand(N: I + 1)->isDefinedOutsideLoopRegions(); |
1652 | } |
1653 | |
1654 | bool areAllOperandsInvariant() const { |
1655 | return all_of(Range: operands(), P: [](VPValue *Op) { |
1656 | return Op->isDefinedOutsideLoopRegions(); |
1657 | }); |
1658 | } |
1659 | |
1660 | public: |
1661 | VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands) |
1662 | : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) { |
1663 | SmallVector<std::pair<unsigned, MDNode *>> Metadata; |
1664 | (void)Metadata; |
1665 | getMetadataToPropagate(Inst: GEP, Metadata); |
1666 | assert(Metadata.empty() && "unexpected metadata on GEP" ); |
1667 | } |
1668 | |
1669 | ~VPWidenGEPRecipe() override = default; |
1670 | |
1671 | VPWidenGEPRecipe *clone() override { |
1672 | return new VPWidenGEPRecipe(cast<GetElementPtrInst>(Val: getUnderlyingInstr()), |
1673 | operands()); |
1674 | } |
1675 | |
1676 | VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC) |
1677 | |
1678 | /// Generate the gep nodes. |
1679 | void execute(VPTransformState &State) override; |
1680 | |
1681 | /// Return the cost of this VPWidenGEPRecipe. |
1682 | InstructionCost computeCost(ElementCount VF, |
1683 | VPCostContext &Ctx) const override { |
1684 | // TODO: Compute accurate cost after retiring the legacy cost model. |
1685 | return 0; |
1686 | } |
1687 | |
1688 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1689 | /// Print the recipe. |
1690 | void print(raw_ostream &O, const Twine &Indent, |
1691 | VPSlotTracker &SlotTracker) const override; |
1692 | #endif |
1693 | |
1694 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
1695 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
1696 | assert(is_contained(operands(), Op) && |
1697 | "Op must be an operand of the recipe" ); |
1698 | if (Op == getOperand(N: 0)) |
1699 | return isPointerLoopInvariant(); |
1700 | else |
1701 | return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions(); |
1702 | } |
1703 | }; |
1704 | |
1705 | /// A recipe to compute a pointer to the last element of each part of a widened |
1706 | /// memory access for widened memory accesses of IndexedTy. Used for |
1707 | /// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. |
1708 | class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, |
1709 | public VPUnrollPartAccessor<2> { |
1710 | Type *IndexedTy; |
1711 | |
1712 | /// The constant stride of the pointer computed by this recipe, expressed in |
1713 | /// units of IndexedTy. |
1714 | int64_t Stride; |
1715 | |
1716 | public: |
1717 | VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, |
1718 | int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL) |
1719 | : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC, |
1720 | ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL), |
1721 | IndexedTy(IndexedTy), Stride(Stride) { |
1722 | assert(Stride < 0 && "Stride must be negative" ); |
1723 | } |
1724 | |
1725 | VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC) |
1726 | |
1727 | VPValue *getVFValue() { return getOperand(N: 1); } |
1728 | const VPValue *getVFValue() const { return getOperand(N: 1); } |
1729 | |
1730 | void execute(VPTransformState &State) override; |
1731 | |
1732 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
1733 | assert(is_contained(operands(), Op) && |
1734 | "Op must be an operand of the recipe" ); |
1735 | return true; |
1736 | } |
1737 | |
1738 | /// Return the cost of this VPVectorPointerRecipe. |
1739 | InstructionCost computeCost(ElementCount VF, |
1740 | VPCostContext &Ctx) const override { |
1741 | // TODO: Compute accurate cost after retiring the legacy cost model. |
1742 | return 0; |
1743 | } |
1744 | |
1745 | /// Returns true if the recipe only uses the first part of operand \p Op. |
1746 | bool onlyFirstPartUsed(const VPValue *Op) const override { |
1747 | assert(is_contained(operands(), Op) && |
1748 | "Op must be an operand of the recipe" ); |
1749 | assert(getNumOperands() <= 2 && "must have at most two operands" ); |
1750 | return true; |
1751 | } |
1752 | |
1753 | VPVectorEndPointerRecipe *clone() override { |
1754 | return new VPVectorEndPointerRecipe(getOperand(N: 0), getVFValue(), IndexedTy, |
1755 | Stride, getGEPNoWrapFlags(), |
1756 | getDebugLoc()); |
1757 | } |
1758 | |
1759 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1760 | /// Print the recipe. |
1761 | void print(raw_ostream &O, const Twine &Indent, |
1762 | VPSlotTracker &SlotTracker) const override; |
1763 | #endif |
1764 | }; |
1765 | |
1766 | /// A recipe to compute the pointers for widened memory accesses of IndexTy. |
1767 | class VPVectorPointerRecipe : public VPRecipeWithIRFlags, |
1768 | public VPUnrollPartAccessor<1> { |
1769 | Type *IndexedTy; |
1770 | |
1771 | public: |
1772 | VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags, |
1773 | DebugLoc DL) |
1774 | : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr), |
1775 | GEPFlags, DL), |
1776 | IndexedTy(IndexedTy) {} |
1777 | |
1778 | VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC) |
1779 | |
1780 | void execute(VPTransformState &State) override; |
1781 | |
1782 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
1783 | assert(is_contained(operands(), Op) && |
1784 | "Op must be an operand of the recipe" ); |
1785 | return true; |
1786 | } |
1787 | |
1788 | /// Returns true if the recipe only uses the first part of operand \p Op. |
1789 | bool onlyFirstPartUsed(const VPValue *Op) const override { |
1790 | assert(is_contained(operands(), Op) && |
1791 | "Op must be an operand of the recipe" ); |
1792 | assert(getNumOperands() <= 2 && "must have at most two operands" ); |
1793 | return true; |
1794 | } |
1795 | |
1796 | VPVectorPointerRecipe *clone() override { |
1797 | return new VPVectorPointerRecipe(getOperand(N: 0), IndexedTy, |
1798 | getGEPNoWrapFlags(), getDebugLoc()); |
1799 | } |
1800 | |
1801 | /// Return the cost of this VPHeaderPHIRecipe. |
1802 | InstructionCost computeCost(ElementCount VF, |
1803 | VPCostContext &Ctx) const override { |
1804 | // TODO: Compute accurate cost after retiring the legacy cost model. |
1805 | return 0; |
1806 | } |
1807 | |
1808 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1809 | /// Print the recipe. |
1810 | void print(raw_ostream &O, const Twine &Indent, |
1811 | VPSlotTracker &SlotTracker) const override; |
1812 | #endif |
1813 | }; |
1814 | |
1815 | /// A pure virtual base class for all recipes modeling header phis, including |
1816 | /// phis for first order recurrences, pointer inductions and reductions. The |
1817 | /// start value is the first operand of the recipe and the incoming value from |
1818 | /// the backedge is the second operand. |
1819 | /// |
1820 | /// Inductions are modeled using the following sub-classes: |
1821 | /// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop, |
1822 | /// starting at a specified value (zero for the main vector loop, the resume |
1823 | /// value for the epilogue vector loop) and stepping by 1. The induction |
1824 | /// controls exiting of the vector loop by comparing against the vector trip |
1825 | /// count. Produces a single scalar PHI for the induction value per |
1826 | /// iteration. |
1827 | /// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and |
1828 | /// floating point inductions with arbitrary start and step values. Produces |
1829 | /// a vector PHI per-part. |
1830 | /// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding |
1831 | /// value of an IV with different start and step values. Produces a single |
1832 | /// scalar value per iteration |
1833 | /// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a |
1834 | /// canonical or derived induction. |
1835 | /// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a |
1836 | /// pointer induction. Produces either a vector PHI per-part or scalar values |
1837 | /// per-lane based on the canonical induction. |
1838 | class : public VPSingleDefRecipe, public VPPhiAccessors { |
1839 | protected: |
1840 | (unsigned char VPDefID, Instruction *UnderlyingInstr, |
1841 | VPValue *Start, DebugLoc DL = DebugLoc::getUnknown()) |
1842 | : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}), |
1843 | UnderlyingInstr, DL) {} |
1844 | |
1845 | const VPRecipeBase *() const override { return this; } |
1846 | |
1847 | public: |
1848 | () override = default; |
1849 | |
1850 | /// Method to support type inquiry through isa, cast, and dyn_cast. |
1851 | static inline bool (const VPRecipeBase *B) { |
1852 | return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC && |
1853 | B->getVPDefID() <= VPDef::VPLastHeaderPHISC; |
1854 | } |
1855 | static inline bool (const VPValue *V) { |
1856 | auto *B = V->getDefiningRecipe(); |
1857 | return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC && |
1858 | B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC; |
1859 | } |
1860 | |
1861 | /// Generate the phi nodes. |
1862 | void (VPTransformState &State) override = 0; |
1863 | |
1864 | /// Return the cost of this header phi recipe. |
1865 | InstructionCost (ElementCount VF, |
1866 | VPCostContext &Ctx) const override; |
1867 | |
1868 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1869 | /// Print the recipe. |
1870 | void print(raw_ostream &O, const Twine &Indent, |
1871 | VPSlotTracker &SlotTracker) const override = 0; |
1872 | #endif |
1873 | |
1874 | /// Returns the start value of the phi, if one is set. |
1875 | VPValue *() { |
1876 | return getNumOperands() == 0 ? nullptr : getOperand(N: 0); |
1877 | } |
1878 | VPValue *() const { |
1879 | return getNumOperands() == 0 ? nullptr : getOperand(N: 0); |
1880 | } |
1881 | |
1882 | /// Update the start value of the recipe. |
1883 | void (VPValue *V) { setOperand(I: 0, New: V); } |
1884 | |
1885 | /// Returns the incoming value from the loop backedge. |
1886 | virtual VPValue *() { |
1887 | return getOperand(N: 1); |
1888 | } |
1889 | |
1890 | /// Returns the backedge value as a recipe. The backedge value is guaranteed |
1891 | /// to be a recipe. |
1892 | virtual VPRecipeBase &() { |
1893 | return *getBackedgeValue()->getDefiningRecipe(); |
1894 | } |
1895 | }; |
1896 | |
1897 | /// Base class for widened induction (VPWidenIntOrFpInductionRecipe and |
1898 | /// VPWidenPointerInductionRecipe), providing shared functionality, including |
1899 | /// retrieving the step value, induction descriptor and original phi node. |
1900 | class VPWidenInductionRecipe : public VPHeaderPHIRecipe { |
1901 | const InductionDescriptor &IndDesc; |
1902 | |
1903 | public: |
1904 | VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, |
1905 | VPValue *Step, const InductionDescriptor &IndDesc, |
1906 | DebugLoc DL) |
1907 | : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) { |
1908 | addOperand(Operand: Step); |
1909 | } |
1910 | |
1911 | static inline bool classof(const VPRecipeBase *R) { |
1912 | return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC || |
1913 | R->getVPDefID() == VPDef::VPWidenPointerInductionSC; |
1914 | } |
1915 | |
1916 | static inline bool classof(const VPValue *V) { |
1917 | auto *R = V->getDefiningRecipe(); |
1918 | return R && classof(R); |
1919 | } |
1920 | |
1921 | static inline bool (const VPHeaderPHIRecipe *R) { |
1922 | return classof(R: static_cast<const VPRecipeBase *>(R)); |
1923 | } |
1924 | |
1925 | virtual void execute(VPTransformState &State) override = 0; |
1926 | |
1927 | /// Returns the step value of the induction. |
1928 | VPValue *getStepValue() { return getOperand(N: 1); } |
1929 | const VPValue *getStepValue() const { return getOperand(N: 1); } |
1930 | |
1931 | /// Update the step value of the recipe. |
1932 | void setStepValue(VPValue *V) { setOperand(I: 1, New: V); } |
1933 | |
1934 | /// Returns the number of incoming values, also number of incoming blocks. |
1935 | /// Note that at the moment, VPWidenPointerInductionRecipe only has a single |
1936 | /// incoming value, its start value. |
1937 | unsigned getNumIncoming() const override { return 1; } |
1938 | |
1939 | PHINode *getPHINode() const { return cast<PHINode>(Val: getUnderlyingValue()); } |
1940 | |
1941 | /// Returns the induction descriptor for the recipe. |
1942 | const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } |
1943 | |
1944 | VPValue *getBackedgeValue() override { |
1945 | // TODO: All operands of base recipe must exist and be at same index in |
1946 | // derived recipe. |
1947 | llvm_unreachable( |
1948 | "VPWidenIntOrFpInductionRecipe generates its own backedge value" ); |
1949 | } |
1950 | |
1951 | VPRecipeBase &getBackedgeRecipe() override { |
1952 | // TODO: All operands of base recipe must exist and be at same index in |
1953 | // derived recipe. |
1954 | llvm_unreachable( |
1955 | "VPWidenIntOrFpInductionRecipe generates its own backedge value" ); |
1956 | } |
1957 | |
1958 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
1959 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
1960 | assert(is_contained(operands(), Op) && |
1961 | "Op must be an operand of the recipe" ); |
1962 | // The recipe creates its own wide start value, so it only requests the |
1963 | // first lane of the operand. |
1964 | // TODO: Remove once creating the start value is modeled separately. |
1965 | return Op == getStartValue() || Op == getStepValue(); |
1966 | } |
1967 | }; |
1968 | |
1969 | /// A recipe for handling phi nodes of integer and floating-point inductions, |
1970 | /// producing their vector values. This is an abstract recipe and must be |
1971 | /// converted to concrete recipes before executing. |
1972 | class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { |
1973 | TruncInst *Trunc; |
1974 | |
1975 | // If this recipe is unrolled it will have 2 additional operands. |
1976 | bool isUnrolled() const { return getNumOperands() == 5; } |
1977 | |
1978 | public: |
1979 | VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, |
1980 | VPValue *VF, const InductionDescriptor &IndDesc, |
1981 | DebugLoc DL) |
1982 | : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start, |
1983 | Step, IndDesc, DL), |
1984 | Trunc(nullptr) { |
1985 | addOperand(Operand: VF); |
1986 | } |
1987 | |
1988 | VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, |
1989 | VPValue *VF, const InductionDescriptor &IndDesc, |
1990 | TruncInst *Trunc, DebugLoc DL) |
1991 | : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start, |
1992 | Step, IndDesc, DL), |
1993 | Trunc(Trunc) { |
1994 | addOperand(Operand: VF); |
1995 | SmallVector<std::pair<unsigned, MDNode *>> Metadata; |
1996 | (void)Metadata; |
1997 | if (Trunc) |
1998 | getMetadataToPropagate(Inst: Trunc, Metadata); |
1999 | assert(Metadata.empty() && "unexpected metadata on Trunc" ); |
2000 | } |
2001 | |
2002 | ~VPWidenIntOrFpInductionRecipe() override = default; |
2003 | |
2004 | VPWidenIntOrFpInductionRecipe *clone() override { |
2005 | return new VPWidenIntOrFpInductionRecipe( |
2006 | getPHINode(), getStartValue(), getStepValue(), getVFValue(), |
2007 | getInductionDescriptor(), Trunc, getDebugLoc()); |
2008 | } |
2009 | |
2010 | VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) |
2011 | |
2012 | void execute(VPTransformState &State) override { |
2013 | llvm_unreachable("cannot execute this recipe, should be expanded via " |
2014 | "expandVPWidenIntOrFpInductionRecipe" ); |
2015 | } |
2016 | |
2017 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2018 | /// Print the recipe. |
2019 | void print(raw_ostream &O, const Twine &Indent, |
2020 | VPSlotTracker &SlotTracker) const override; |
2021 | #endif |
2022 | |
2023 | VPValue *getVFValue() { return getOperand(N: 2); } |
2024 | const VPValue *getVFValue() const { return getOperand(N: 2); } |
2025 | |
2026 | VPValue *getSplatVFValue() { |
2027 | // If the recipe has been unrolled return the VPValue for the induction |
2028 | // increment. |
2029 | return isUnrolled() ? getOperand(N: getNumOperands() - 2) : nullptr; |
2030 | } |
2031 | |
2032 | /// Returns the number of incoming values, also number of incoming blocks. |
2033 | /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single |
2034 | /// incoming value, its start value. |
2035 | unsigned getNumIncoming() const override { return 1; } |
2036 | |
2037 | /// Returns the first defined value as TruncInst, if it is one or nullptr |
2038 | /// otherwise. |
2039 | TruncInst *getTruncInst() { return Trunc; } |
2040 | const TruncInst *getTruncInst() const { return Trunc; } |
2041 | |
2042 | /// Returns true if the induction is canonical, i.e. starting at 0 and |
2043 | /// incremented by UF * VF (= the original IV is incremented by 1) and has the |
2044 | /// same type as the canonical induction. |
2045 | bool isCanonical() const; |
2046 | |
2047 | /// Returns the scalar type of the induction. |
2048 | Type *getScalarType() const { |
2049 | return Trunc ? Trunc->getType() |
2050 | : getStartValue()->getLiveInIRValue()->getType(); |
2051 | } |
2052 | |
2053 | /// Returns the VPValue representing the value of this induction at |
2054 | /// the last unrolled part, if it exists. Returns itself if unrolling did not |
2055 | /// take place. |
2056 | VPValue *getLastUnrolledPartOperand() { |
2057 | return isUnrolled() ? getOperand(N: getNumOperands() - 1) : this; |
2058 | } |
2059 | }; |
2060 | |
2061 | class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, |
2062 | public VPUnrollPartAccessor<4> { |
2063 | bool IsScalarAfterVectorization; |
2064 | |
2065 | public: |
2066 | /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p |
2067 | /// Start and the number of elements unrolled \p NumUnrolledElems, typically |
2068 | /// VF*UF. |
2069 | VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, |
2070 | VPValue *NumUnrolledElems, |
2071 | const InductionDescriptor &IndDesc, |
2072 | bool IsScalarAfterVectorization, DebugLoc DL) |
2073 | : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start, |
2074 | Step, IndDesc, DL), |
2075 | IsScalarAfterVectorization(IsScalarAfterVectorization) { |
2076 | addOperand(Operand: NumUnrolledElems); |
2077 | } |
2078 | |
2079 | ~VPWidenPointerInductionRecipe() override = default; |
2080 | |
2081 | VPWidenPointerInductionRecipe *clone() override { |
2082 | return new VPWidenPointerInductionRecipe( |
2083 | cast<PHINode>(Val: getUnderlyingInstr()), getOperand(N: 0), getOperand(N: 1), |
2084 | getOperand(N: 2), getInductionDescriptor(), IsScalarAfterVectorization, |
2085 | getDebugLoc()); |
2086 | } |
2087 | |
2088 | VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC) |
2089 | |
2090 | /// Generate vector values for the pointer induction. |
2091 | void execute(VPTransformState &State) override; |
2092 | |
2093 | /// Returns true if only scalar values will be generated. |
2094 | bool onlyScalarsGenerated(bool IsScalable); |
2095 | |
2096 | /// Returns the VPValue representing the value of this induction at |
2097 | /// the first unrolled part, if it exists. Returns itself if unrolling did not |
2098 | /// take place. |
2099 | VPValue *getFirstUnrolledPartOperand() { |
2100 | return getUnrollPart(U&: *this) == 0 ? this : getOperand(N: 3); |
2101 | } |
2102 | |
2103 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2104 | /// Print the recipe. |
2105 | void print(raw_ostream &O, const Twine &Indent, |
2106 | VPSlotTracker &SlotTracker) const override; |
2107 | #endif |
2108 | }; |
2109 | |
2110 | /// A recipe for widened phis. Incoming values are operands of the recipe and |
2111 | /// their operand index corresponds to the incoming predecessor block. If the |
2112 | /// recipe is placed in an entry block to a (non-replicate) region, it must have |
2113 | /// exactly 2 incoming values, the first from the predecessor of the region and |
2114 | /// the second from the exiting block of the region. |
2115 | class VPWidenPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors { |
2116 | /// Name to use for the generated IR instruction for the widened phi. |
2117 | std::string Name; |
2118 | |
2119 | protected: |
2120 | const VPRecipeBase *getAsRecipe() const override { return this; } |
2121 | |
2122 | public: |
2123 | /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and |
2124 | /// debug location \p DL. |
2125 | VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {}, |
2126 | const Twine &Name = "" ) |
2127 | : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL), |
2128 | Name(Name.str()) { |
2129 | if (Start) |
2130 | addOperand(Operand: Start); |
2131 | } |
2132 | |
2133 | VPWidenPHIRecipe *clone() override { |
2134 | auto *C = new VPWidenPHIRecipe(cast<PHINode>(Val: getUnderlyingValue()), |
2135 | getOperand(N: 0), getDebugLoc(), Name); |
2136 | for (VPValue *Op : llvm::drop_begin(RangeOrContainer: operands())) |
2137 | C->addOperand(Operand: Op); |
2138 | return C; |
2139 | } |
2140 | |
2141 | ~VPWidenPHIRecipe() override = default; |
2142 | |
2143 | VP_CLASSOF_IMPL(VPDef::VPWidenPHISC) |
2144 | |
2145 | /// Generate the phi/select nodes. |
2146 | void execute(VPTransformState &State) override; |
2147 | |
2148 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2149 | /// Print the recipe. |
2150 | void print(raw_ostream &O, const Twine &Indent, |
2151 | VPSlotTracker &SlotTracker) const override; |
2152 | #endif |
2153 | }; |
2154 | |
2155 | /// A recipe for handling first-order recurrence phis. The start value is the |
2156 | /// first operand of the recipe and the incoming value from the backedge is the |
2157 | /// second operand. |
2158 | struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { |
2159 | VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start) |
2160 | : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {} |
2161 | |
2162 | VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC) |
2163 | |
2164 | VPFirstOrderRecurrencePHIRecipe *clone() override { |
2165 | return new VPFirstOrderRecurrencePHIRecipe( |
2166 | cast<PHINode>(Val: getUnderlyingInstr()), *getOperand(N: 0)); |
2167 | } |
2168 | |
2169 | void execute(VPTransformState &State) override; |
2170 | |
2171 | /// Return the cost of this first-order recurrence phi recipe. |
2172 | InstructionCost computeCost(ElementCount VF, |
2173 | VPCostContext &Ctx) const override; |
2174 | |
2175 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2176 | /// Print the recipe. |
2177 | void print(raw_ostream &O, const Twine &Indent, |
2178 | VPSlotTracker &SlotTracker) const override; |
2179 | #endif |
2180 | |
2181 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
2182 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
2183 | assert(is_contained(operands(), Op) && |
2184 | "Op must be an operand of the recipe" ); |
2185 | return Op == getStartValue(); |
2186 | } |
2187 | }; |
2188 | |
2189 | /// A recipe for handling reduction phis. The start value is the first operand |
2190 | /// of the recipe and the incoming value from the backedge is the second |
2191 | /// operand. |
2192 | class VPReductionPHIRecipe : public VPHeaderPHIRecipe, |
2193 | public VPUnrollPartAccessor<2> { |
2194 | /// Descriptor for the reduction. |
2195 | const RecurrenceDescriptor &RdxDesc; |
2196 | |
2197 | /// The phi is part of an in-loop reduction. |
2198 | bool IsInLoop; |
2199 | |
2200 | /// The phi is part of an ordered reduction. Requires IsInLoop to be true. |
2201 | bool IsOrdered; |
2202 | |
2203 | /// When expanding the reduction PHI, the plan's VF element count is divided |
2204 | /// by this factor to form the reduction phi's VF. |
2205 | unsigned VFScaleFactor = 1; |
2206 | |
2207 | public: |
2208 | /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p |
2209 | /// RdxDesc. |
2210 | VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, |
2211 | VPValue &Start, bool IsInLoop = false, |
2212 | bool IsOrdered = false, unsigned VFScaleFactor = 1) |
2213 | : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), |
2214 | RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered), |
2215 | VFScaleFactor(VFScaleFactor) { |
2216 | assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop" ); |
2217 | } |
2218 | |
2219 | ~VPReductionPHIRecipe() override = default; |
2220 | |
2221 | VPReductionPHIRecipe *clone() override { |
2222 | auto *R = new VPReductionPHIRecipe( |
2223 | dyn_cast_or_null<PHINode>(Val: getUnderlyingValue()), RdxDesc, |
2224 | *getOperand(N: 0), IsInLoop, IsOrdered, VFScaleFactor); |
2225 | R->addOperand(Operand: getBackedgeValue()); |
2226 | return R; |
2227 | } |
2228 | |
2229 | VP_CLASSOF_IMPL(VPDef::VPReductionPHISC) |
2230 | |
2231 | /// Generate the phi/select nodes. |
2232 | void execute(VPTransformState &State) override; |
2233 | |
2234 | /// Get the factor that the VF of this recipe's output should be scaled by. |
2235 | unsigned getVFScaleFactor() const { return VFScaleFactor; } |
2236 | |
2237 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2238 | /// Print the recipe. |
2239 | void print(raw_ostream &O, const Twine &Indent, |
2240 | VPSlotTracker &SlotTracker) const override; |
2241 | #endif |
2242 | |
2243 | const RecurrenceDescriptor &getRecurrenceDescriptor() const { |
2244 | return RdxDesc; |
2245 | } |
2246 | |
2247 | /// Returns true, if the phi is part of an ordered reduction. |
2248 | bool isOrdered() const { return IsOrdered; } |
2249 | |
2250 | /// Returns true, if the phi is part of an in-loop reduction. |
2251 | bool isInLoop() const { return IsInLoop; } |
2252 | |
2253 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
2254 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
2255 | assert(is_contained(operands(), Op) && |
2256 | "Op must be an operand of the recipe" ); |
2257 | return isOrdered() || isInLoop(); |
2258 | } |
2259 | }; |
2260 | |
2261 | /// A recipe for vectorizing a phi-node as a sequence of mask-based select |
2262 | /// instructions. |
2263 | class VPBlendRecipe : public VPSingleDefRecipe { |
2264 | public: |
2265 | /// The blend operation is a User of the incoming values and of their |
2266 | /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can |
2267 | /// be omitted (implied by passing an odd number of operands) in which case |
2268 | /// all other incoming values are merged into it. |
2269 | VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands) |
2270 | : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) { |
2271 | assert(Operands.size() > 0 && "Expected at least one operand!" ); |
2272 | } |
2273 | |
2274 | VPBlendRecipe *clone() override { |
2275 | SmallVector<VPValue *> Ops(operands()); |
2276 | return new VPBlendRecipe(cast<PHINode>(Val: getUnderlyingValue()), Ops); |
2277 | } |
2278 | |
2279 | VP_CLASSOF_IMPL(VPDef::VPBlendSC) |
2280 | |
2281 | /// A normalized blend is one that has an odd number of operands, whereby the |
2282 | /// first operand does not have an associated mask. |
2283 | bool isNormalized() const { return getNumOperands() % 2; } |
2284 | |
2285 | /// Return the number of incoming values, taking into account when normalized |
2286 | /// the first incoming value will have no mask. |
2287 | unsigned getNumIncomingValues() const { |
2288 | return (getNumOperands() + isNormalized()) / 2; |
2289 | } |
2290 | |
2291 | /// Return incoming value number \p Idx. |
2292 | VPValue *getIncomingValue(unsigned Idx) const { |
2293 | return Idx == 0 ? getOperand(N: 0) : getOperand(N: Idx * 2 - isNormalized()); |
2294 | } |
2295 | |
2296 | /// Return mask number \p Idx. |
2297 | VPValue *getMask(unsigned Idx) const { |
2298 | assert((Idx > 0 || !isNormalized()) && "First index has no mask!" ); |
2299 | return Idx == 0 ? getOperand(N: 1) : getOperand(N: Idx * 2 + !isNormalized()); |
2300 | } |
2301 | |
2302 | /// Generate the phi/select nodes. |
2303 | void execute(VPTransformState &State) override; |
2304 | |
2305 | /// Return the cost of this VPWidenMemoryRecipe. |
2306 | InstructionCost computeCost(ElementCount VF, |
2307 | VPCostContext &Ctx) const override; |
2308 | |
2309 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2310 | /// Print the recipe. |
2311 | void print(raw_ostream &O, const Twine &Indent, |
2312 | VPSlotTracker &SlotTracker) const override; |
2313 | #endif |
2314 | |
2315 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
2316 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
2317 | assert(is_contained(operands(), Op) && |
2318 | "Op must be an operand of the recipe" ); |
2319 | // Recursing through Blend recipes only, must terminate at header phi's the |
2320 | // latest. |
2321 | return all_of(Range: users(), |
2322 | P: [this](VPUser *U) { return U->onlyFirstLaneUsed(Op: this); }); |
2323 | } |
2324 | }; |
2325 | |
2326 | /// VPInterleaveRecipe is a recipe for transforming an interleave group of load |
2327 | /// or stores into one wide load/store and shuffles. The first operand of a |
2328 | /// VPInterleave recipe is the address, followed by the stored values, followed |
2329 | /// by an optional mask. |
2330 | class VPInterleaveRecipe : public VPRecipeBase { |
2331 | const InterleaveGroup<Instruction> *IG; |
2332 | |
2333 | /// Indicates if the interleave group is in a conditional block and requires a |
2334 | /// mask. |
2335 | bool HasMask = false; |
2336 | |
2337 | /// Indicates if gaps between members of the group need to be masked out or if |
2338 | /// unusued gaps can be loaded speculatively. |
2339 | bool NeedsMaskForGaps = false; |
2340 | |
2341 | public: |
2342 | VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr, |
2343 | ArrayRef<VPValue *> StoredValues, VPValue *Mask, |
2344 | bool NeedsMaskForGaps, DebugLoc DL) |
2345 | : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}, |
2346 | DL), |
2347 | |
2348 | IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) { |
2349 | // TODO: extend the masked interleaved-group support to reversed access. |
2350 | assert((!Mask || !IG->isReverse()) && |
2351 | "Reversed masked interleave-group not supported." ); |
2352 | for (unsigned i = 0; i < IG->getFactor(); ++i) |
2353 | if (Instruction *I = IG->getMember(Index: i)) { |
2354 | if (I->getType()->isVoidTy()) |
2355 | continue; |
2356 | new VPValue(I, this); |
2357 | } |
2358 | |
2359 | for (auto *SV : StoredValues) |
2360 | addOperand(Operand: SV); |
2361 | if (Mask) { |
2362 | HasMask = true; |
2363 | addOperand(Operand: Mask); |
2364 | } |
2365 | } |
2366 | ~VPInterleaveRecipe() override = default; |
2367 | |
2368 | VPInterleaveRecipe *clone() override { |
2369 | return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(), |
2370 | NeedsMaskForGaps, getDebugLoc()); |
2371 | } |
2372 | |
2373 | VP_CLASSOF_IMPL(VPDef::VPInterleaveSC) |
2374 | |
2375 | /// Return the address accessed by this recipe. |
2376 | VPValue *getAddr() const { |
2377 | return getOperand(N: 0); // Address is the 1st, mandatory operand. |
2378 | } |
2379 | |
2380 | /// Return the mask used by this recipe. Note that a full mask is represented |
2381 | /// by a nullptr. |
2382 | VPValue *getMask() const { |
2383 | // Mask is optional and therefore the last, currently 2nd operand. |
2384 | return HasMask ? getOperand(N: getNumOperands() - 1) : nullptr; |
2385 | } |
2386 | |
2387 | /// Return the VPValues stored by this interleave group. If it is a load |
2388 | /// interleave group, return an empty ArrayRef. |
2389 | ArrayRef<VPValue *> getStoredValues() const { |
2390 | // The first operand is the address, followed by the stored values, followed |
2391 | // by an optional mask. |
2392 | return ArrayRef<VPValue *>(op_begin(), getNumOperands()) |
2393 | .slice(N: 1, M: getNumStoreOperands()); |
2394 | } |
2395 | |
2396 | /// Generate the wide load or store, and shuffles. |
2397 | void execute(VPTransformState &State) override; |
2398 | |
2399 | /// Return the cost of this VPInterleaveRecipe. |
2400 | InstructionCost computeCost(ElementCount VF, |
2401 | VPCostContext &Ctx) const override; |
2402 | |
2403 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2404 | /// Print the recipe. |
2405 | void print(raw_ostream &O, const Twine &Indent, |
2406 | VPSlotTracker &SlotTracker) const override; |
2407 | #endif |
2408 | |
2409 | const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; } |
2410 | |
2411 | /// Returns the number of stored operands of this interleave group. Returns 0 |
2412 | /// for load interleave groups. |
2413 | unsigned getNumStoreOperands() const { |
2414 | return getNumOperands() - (HasMask ? 2 : 1); |
2415 | } |
2416 | |
2417 | /// The recipe only uses the first lane of the address. |
2418 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
2419 | assert(is_contained(operands(), Op) && |
2420 | "Op must be an operand of the recipe" ); |
2421 | return Op == getAddr() && !llvm::is_contained(Range: getStoredValues(), Element: Op); |
2422 | } |
2423 | |
2424 | Instruction *getInsertPos() const { return IG->getInsertPos(); } |
2425 | }; |
2426 | |
2427 | /// A recipe to represent inloop reduction operations, performing a reduction on |
2428 | /// a vector operand into a scalar value, and adding the result to a chain. |
2429 | /// The Operands are {ChainOp, VecOp, [Condition]}. |
2430 | class VPReductionRecipe : public VPRecipeWithIRFlags { |
2431 | /// The recurrence kind for the reduction in question. |
2432 | RecurKind RdxKind; |
2433 | bool IsOrdered; |
2434 | /// Whether the reduction is conditional. |
2435 | bool IsConditional = false; |
2436 | |
2437 | protected: |
2438 | VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, |
2439 | FastMathFlags FMFs, Instruction *I, |
2440 | ArrayRef<VPValue *> Operands, VPValue *CondOp, |
2441 | bool IsOrdered, DebugLoc DL) |
2442 | : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind), |
2443 | IsOrdered(IsOrdered) { |
2444 | if (CondOp) { |
2445 | IsConditional = true; |
2446 | addOperand(Operand: CondOp); |
2447 | } |
2448 | setUnderlyingValue(I); |
2449 | } |
2450 | |
2451 | public: |
2452 | VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, |
2453 | VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, |
2454 | bool IsOrdered, DebugLoc DL = {}) |
2455 | : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I, |
2456 | ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, |
2457 | IsOrdered, DL) {} |
2458 | |
2459 | VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, |
2460 | VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, |
2461 | bool IsOrdered, DebugLoc DL = {}) |
2462 | : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr, |
2463 | ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, |
2464 | IsOrdered, DL) {} |
2465 | |
2466 | ~VPReductionRecipe() override = default; |
2467 | |
2468 | VPReductionRecipe *clone() override { |
2469 | return new VPReductionRecipe(RdxKind, getFastMathFlags(), |
2470 | getUnderlyingInstr(), getChainOp(), getVecOp(), |
2471 | getCondOp(), IsOrdered, getDebugLoc()); |
2472 | } |
2473 | |
2474 | static inline bool classof(const VPRecipeBase *R) { |
2475 | return R->getVPDefID() == VPRecipeBase::VPReductionSC || |
2476 | R->getVPDefID() == VPRecipeBase::VPReductionEVLSC; |
2477 | } |
2478 | |
2479 | static inline bool classof(const VPUser *U) { |
2480 | auto *R = dyn_cast<VPRecipeBase>(Val: U); |
2481 | return R && classof(R); |
2482 | } |
2483 | |
2484 | /// Generate the reduction in the loop. |
2485 | void execute(VPTransformState &State) override; |
2486 | |
2487 | /// Return the cost of VPReductionRecipe. |
2488 | InstructionCost computeCost(ElementCount VF, |
2489 | VPCostContext &Ctx) const override; |
2490 | |
2491 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2492 | /// Print the recipe. |
2493 | void print(raw_ostream &O, const Twine &Indent, |
2494 | VPSlotTracker &SlotTracker) const override; |
2495 | #endif |
2496 | |
2497 | /// Return the recurrence kind for the in-loop reduction. |
2498 | RecurKind getRecurrenceKind() const { return RdxKind; } |
2499 | /// Return true if the in-loop reduction is ordered. |
2500 | bool isOrdered() const { return IsOrdered; }; |
2501 | /// Return true if the in-loop reduction is conditional. |
2502 | bool isConditional() const { return IsConditional; }; |
2503 | /// The VPValue of the scalar Chain being accumulated. |
2504 | VPValue *getChainOp() const { return getOperand(N: 0); } |
2505 | /// The VPValue of the vector value to be reduced. |
2506 | VPValue *getVecOp() const { return getOperand(N: 1); } |
2507 | /// The VPValue of the condition for the block. |
2508 | VPValue *getCondOp() const { |
2509 | return isConditional() ? getOperand(N: getNumOperands() - 1) : nullptr; |
2510 | } |
2511 | }; |
2512 | |
2513 | /// A recipe for forming partial reductions. In the loop, an accumulator and |
2514 | /// vector operand are added together and passed to the next iteration as the |
2515 | /// next accumulator. After the loop body, the accumulator is reduced to a |
2516 | /// scalar value. |
2517 | class VPPartialReductionRecipe : public VPReductionRecipe { |
2518 | unsigned Opcode; |
2519 | |
2520 | /// The divisor by which the VF of this recipe's output should be divided |
2521 | /// during execution. |
2522 | unsigned VFScaleFactor; |
2523 | |
2524 | public: |
2525 | VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0, |
2526 | VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor) |
2527 | : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond, |
2528 | VFScaleFactor, ReductionInst) {} |
2529 | VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1, |
2530 | VPValue *Cond, unsigned ScaleFactor, |
2531 | Instruction *ReductionInst = nullptr) |
2532 | : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add, |
2533 | FastMathFlags(), ReductionInst, |
2534 | ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}), |
2535 | Opcode(Opcode), VFScaleFactor(ScaleFactor) { |
2536 | [[maybe_unused]] auto *AccumulatorRecipe = |
2537 | getChainOp()->getDefiningRecipe(); |
2538 | assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) || |
2539 | isa<VPPartialReductionRecipe>(AccumulatorRecipe)) && |
2540 | "Unexpected operand order for partial reduction recipe" ); |
2541 | } |
2542 | ~VPPartialReductionRecipe() override = default; |
2543 | |
2544 | VPPartialReductionRecipe *clone() override { |
2545 | return new VPPartialReductionRecipe(Opcode, getOperand(N: 0), getOperand(N: 1), |
2546 | getCondOp(), VFScaleFactor, |
2547 | getUnderlyingInstr()); |
2548 | } |
2549 | |
2550 | VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC) |
2551 | |
2552 | /// Generate the reduction in the loop. |
2553 | void execute(VPTransformState &State) override; |
2554 | |
2555 | /// Return the cost of this VPPartialReductionRecipe. |
2556 | InstructionCost computeCost(ElementCount VF, |
2557 | VPCostContext &Ctx) const override; |
2558 | |
2559 | /// Get the binary op's opcode. |
2560 | unsigned getOpcode() const { return Opcode; } |
2561 | |
2562 | /// Get the factor that the VF of this recipe's output should be scaled by. |
2563 | unsigned getVFScaleFactor() const { return VFScaleFactor; } |
2564 | |
2565 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2566 | /// Print the recipe. |
2567 | void print(raw_ostream &O, const Twine &Indent, |
2568 | VPSlotTracker &SlotTracker) const override; |
2569 | #endif |
2570 | }; |
2571 | |
2572 | /// A recipe to represent inloop reduction operations with vector-predication |
2573 | /// intrinsics, performing a reduction on a vector operand with the explicit |
2574 | /// vector length (EVL) into a scalar value, and adding the result to a chain. |
2575 | /// The Operands are {ChainOp, VecOp, EVL, [Condition]}. |
2576 | class VPReductionEVLRecipe : public VPReductionRecipe { |
2577 | public: |
2578 | VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, |
2579 | DebugLoc DL = {}) |
2580 | : VPReductionRecipe( |
2581 | VPDef::VPReductionEVLSC, R.getRecurrenceKind(), |
2582 | R.getFastMathFlags(), |
2583 | cast_or_null<Instruction>(Val: R.getUnderlyingValue()), |
2584 | ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp, |
2585 | R.isOrdered(), DL) {} |
2586 | |
2587 | ~VPReductionEVLRecipe() override = default; |
2588 | |
2589 | VPReductionEVLRecipe *clone() override { |
2590 | llvm_unreachable("cloning not implemented yet" ); |
2591 | } |
2592 | |
2593 | VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC) |
2594 | |
2595 | /// Generate the reduction in the loop |
2596 | void execute(VPTransformState &State) override; |
2597 | |
2598 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2599 | /// Print the recipe. |
2600 | void print(raw_ostream &O, const Twine &Indent, |
2601 | VPSlotTracker &SlotTracker) const override; |
2602 | #endif |
2603 | |
2604 | /// The VPValue of the explicit vector length. |
2605 | VPValue *getEVL() const { return getOperand(N: 2); } |
2606 | |
2607 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
2608 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
2609 | assert(is_contained(operands(), Op) && |
2610 | "Op must be an operand of the recipe" ); |
2611 | return Op == getEVL(); |
2612 | } |
2613 | }; |
2614 | |
2615 | /// VPReplicateRecipe replicates a given instruction producing multiple scalar |
2616 | /// copies of the original scalar type, one per lane, instead of producing a |
2617 | /// single copy of widened type for all lanes. If the instruction is known to be |
2618 | /// a single scalar, only one copy, per lane zero, will be generated. |
2619 | class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { |
2620 | /// Indicator if only a single replica per lane is needed. |
2621 | bool IsSingleScalar; |
2622 | |
2623 | /// Indicator if the replicas are also predicated. |
2624 | bool IsPredicated; |
2625 | |
2626 | public: |
2627 | VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands, |
2628 | bool IsSingleScalar, VPValue *Mask = nullptr, |
2629 | VPIRMetadata Metadata = {}) |
2630 | : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I), |
2631 | VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar), |
2632 | IsPredicated(Mask) { |
2633 | if (Mask) |
2634 | addOperand(Operand: Mask); |
2635 | } |
2636 | |
2637 | ~VPReplicateRecipe() override = default; |
2638 | |
2639 | VPReplicateRecipe *clone() override { |
2640 | auto *Copy = |
2641 | new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsSingleScalar, |
2642 | isPredicated() ? getMask() : nullptr, *this); |
2643 | Copy->transferFlags(Other&: *this); |
2644 | return Copy; |
2645 | } |
2646 | |
2647 | VP_CLASSOF_IMPL(VPDef::VPReplicateSC) |
2648 | |
2649 | /// Generate replicas of the desired Ingredient. Replicas will be generated |
2650 | /// for all parts and lanes unless a specific part and lane are specified in |
2651 | /// the \p State. |
2652 | void execute(VPTransformState &State) override; |
2653 | |
2654 | /// Return the cost of this VPReplicateRecipe. |
2655 | InstructionCost computeCost(ElementCount VF, |
2656 | VPCostContext &Ctx) const override; |
2657 | |
2658 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2659 | /// Print the recipe. |
2660 | void print(raw_ostream &O, const Twine &Indent, |
2661 | VPSlotTracker &SlotTracker) const override; |
2662 | #endif |
2663 | |
2664 | bool isSingleScalar() const { return IsSingleScalar; } |
2665 | |
2666 | bool isPredicated() const { return IsPredicated; } |
2667 | |
2668 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
2669 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
2670 | assert(is_contained(operands(), Op) && |
2671 | "Op must be an operand of the recipe" ); |
2672 | return isSingleScalar(); |
2673 | } |
2674 | |
2675 | /// Returns true if the recipe uses scalars of operand \p Op. |
2676 | bool usesScalars(const VPValue *Op) const override { |
2677 | assert(is_contained(operands(), Op) && |
2678 | "Op must be an operand of the recipe" ); |
2679 | return true; |
2680 | } |
2681 | |
2682 | /// Returns true if the recipe is used by a widened recipe via an intervening |
2683 | /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed |
2684 | /// in a vector. |
2685 | bool shouldPack() const; |
2686 | |
2687 | /// Return the mask of a predicated VPReplicateRecipe. |
2688 | VPValue *getMask() { |
2689 | assert(isPredicated() && "Trying to get the mask of a unpredicated recipe" ); |
2690 | return getOperand(N: getNumOperands() - 1); |
2691 | } |
2692 | |
2693 | unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); } |
2694 | }; |
2695 | |
2696 | /// A recipe for generating conditional branches on the bits of a mask. |
2697 | class VPBranchOnMaskRecipe : public VPRecipeBase { |
2698 | public: |
2699 | VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL) |
2700 | : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {} |
2701 | |
2702 | VPBranchOnMaskRecipe *clone() override { |
2703 | return new VPBranchOnMaskRecipe(getOperand(N: 0), getDebugLoc()); |
2704 | } |
2705 | |
2706 | VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC) |
2707 | |
2708 | /// Generate the extraction of the appropriate bit from the block mask and the |
2709 | /// conditional branch. |
2710 | void execute(VPTransformState &State) override; |
2711 | |
2712 | /// Return the cost of this VPBranchOnMaskRecipe. |
2713 | InstructionCost computeCost(ElementCount VF, |
2714 | VPCostContext &Ctx) const override; |
2715 | |
2716 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2717 | /// Print the recipe. |
2718 | void print(raw_ostream &O, const Twine &Indent, |
2719 | VPSlotTracker &SlotTracker) const override { |
2720 | O << Indent << "BRANCH-ON-MASK " ; |
2721 | printOperands(O, SlotTracker); |
2722 | } |
2723 | #endif |
2724 | |
2725 | /// Returns true if the recipe uses scalars of operand \p Op. |
2726 | bool usesScalars(const VPValue *Op) const override { |
2727 | assert(is_contained(operands(), Op) && |
2728 | "Op must be an operand of the recipe" ); |
2729 | return true; |
2730 | } |
2731 | }; |
2732 | |
2733 | /// A recipe to combine multiple recipes into a single 'expression' recipe, |
2734 | /// which should be considered a single entity for cost-modeling and transforms. |
2735 | /// The recipe needs to be 'decomposed', i.e. replaced by its individual |
2736 | /// expression recipes, before execute. The individual expression recipes are |
2737 | /// completely disconnected from the def-use graph of other recipes not part of |
2738 | /// the expression. Def-use edges between pairs of expression recipes remain |
2739 | /// intact, whereas every edge between an expression recipe and a recipe outside |
2740 | /// the expression is elevated to connect the non-expression recipe with the |
2741 | /// VPExpressionRecipe itself. |
2742 | class VPExpressionRecipe : public VPSingleDefRecipe { |
2743 | /// Recipes included in this VPExpressionRecipe. |
2744 | SmallVector<VPSingleDefRecipe *> ExpressionRecipes; |
2745 | |
2746 | /// Temporary VPValues used for external operands of the expression, i.e. |
2747 | /// operands not defined by recipes in the expression. |
2748 | SmallVector<VPValue *> LiveInPlaceholders; |
2749 | |
2750 | enum class ExpressionTypes { |
2751 | /// Represents an inloop extended reduction operation, performing a |
2752 | /// reduction on an extended vector operand into a scalar value, and adding |
2753 | /// the result to a chain. |
2754 | ExtendedReduction, |
2755 | /// Represent an inloop multiply-accumulate reduction, multiplying the |
2756 | /// extended vector operands, performing a reduction.add on the result, and |
2757 | /// adding the scalar result to a chain. |
2758 | ExtMulAccReduction, |
2759 | /// Represent an inloop multiply-accumulate reduction, multiplying the |
2760 | /// vector operands, performing a reduction.add on the result, and adding |
2761 | /// the scalar result to a chain. |
2762 | MulAccReduction, |
2763 | }; |
2764 | |
2765 | /// Type of the expression. |
2766 | ExpressionTypes ExpressionType; |
2767 | |
2768 | /// Construct a new VPExpressionRecipe by internalizing recipes in \p |
2769 | /// ExpressionRecipes. External operands (i.e. not defined by another recipe |
2770 | /// in the expression) are replaced by temporary VPValues and the original |
2771 | /// operands are transferred to the VPExpressionRecipe itself. Clone recipes |
2772 | /// as needed (excluding last) to ensure they are only used by other recipes |
2773 | /// in the expression. |
2774 | VPExpressionRecipe(ExpressionTypes ExpressionType, |
2775 | ArrayRef<VPSingleDefRecipe *> ExpressionRecipes); |
2776 | |
2777 | public: |
2778 | VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red) |
2779 | : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {} |
2780 | VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red) |
2781 | : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {} |
2782 | VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, |
2783 | VPWidenRecipe *Mul, VPReductionRecipe *Red) |
2784 | : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction, |
2785 | {Ext0, Ext1, Mul, Red}) {} |
2786 | |
2787 | ~VPExpressionRecipe() override { |
2788 | for (auto *R : reverse(C&: ExpressionRecipes)) |
2789 | delete R; |
2790 | for (VPValue *T : LiveInPlaceholders) |
2791 | delete T; |
2792 | } |
2793 | |
2794 | VP_CLASSOF_IMPL(VPDef::VPExpressionSC) |
2795 | |
2796 | VPExpressionRecipe *clone() override { |
2797 | assert(!ExpressionRecipes.empty() && "empty expressions should be removed" ); |
2798 | SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes; |
2799 | for (auto *R : ExpressionRecipes) |
2800 | NewExpressiondRecipes.push_back(Elt: R->clone()); |
2801 | for (auto *New : NewExpressiondRecipes) { |
2802 | for (const auto &[Idx, Old] : enumerate(First&: ExpressionRecipes)) |
2803 | New->replaceUsesOfWith(From: Old, To: NewExpressiondRecipes[Idx]); |
2804 | // Update placeholder operands in the cloned recipe to use the external |
2805 | // operands, to be internalized when the cloned expression is constructed. |
2806 | for (const auto &[Placeholder, OutsideOp] : |
2807 | zip(t&: LiveInPlaceholders, u: operands())) |
2808 | New->replaceUsesOfWith(From: Placeholder, To: OutsideOp); |
2809 | } |
2810 | return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes); |
2811 | } |
2812 | |
2813 | /// Return the VPValue to use to infer the result type of the recipe. |
2814 | VPValue *getOperandOfResultType() const { |
2815 | unsigned OpIdx = |
2816 | cast<VPReductionRecipe>(Val: ExpressionRecipes.back())->isConditional() ? 2 |
2817 | : 1; |
2818 | return getOperand(N: getNumOperands() - OpIdx); |
2819 | } |
2820 | |
2821 | /// Insert the recipes of the expression back into the VPlan, directly before |
2822 | /// the current recipe. Leaves the expression recipe empty, which must be |
2823 | /// removed before codegen. |
2824 | void decompose(); |
2825 | |
2826 | /// Method for generating code, must not be called as this recipe is abstract. |
2827 | void execute(VPTransformState &State) override { |
2828 | llvm_unreachable("recipe must be removed before execute" ); |
2829 | } |
2830 | |
2831 | InstructionCost computeCost(ElementCount VF, |
2832 | VPCostContext &Ctx) const override; |
2833 | |
2834 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2835 | /// Print the recipe. |
2836 | void print(raw_ostream &O, const Twine &Indent, |
2837 | VPSlotTracker &SlotTracker) const override; |
2838 | #endif |
2839 | |
2840 | /// Returns true if this expression contains recipes that may read from or |
2841 | /// write to memory. |
2842 | bool mayReadOrWriteMemory() const; |
2843 | |
2844 | /// Returns true if this expression contains recipes that may have side |
2845 | /// effects. |
2846 | bool mayHaveSideEffects() const; |
2847 | }; |
2848 | |
2849 | /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when |
2850 | /// control converges back from a Branch-on-Mask. The phi nodes are needed in |
2851 | /// order to merge values that are set under such a branch and feed their uses. |
2852 | /// The phi nodes can be scalar or vector depending on the users of the value. |
2853 | /// This recipe works in concert with VPBranchOnMaskRecipe. |
2854 | class VPPredInstPHIRecipe : public VPSingleDefRecipe { |
2855 | public: |
2856 | /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi |
2857 | /// nodes after merging back from a Branch-on-Mask. |
2858 | VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL) |
2859 | : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {} |
2860 | ~VPPredInstPHIRecipe() override = default; |
2861 | |
2862 | VPPredInstPHIRecipe *clone() override { |
2863 | return new VPPredInstPHIRecipe(getOperand(N: 0), getDebugLoc()); |
2864 | } |
2865 | |
2866 | VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC) |
2867 | |
2868 | /// Generates phi nodes for live-outs (from a replicate region) as needed to |
2869 | /// retain SSA form. |
2870 | void execute(VPTransformState &State) override; |
2871 | |
2872 | /// Return the cost of this VPPredInstPHIRecipe. |
2873 | InstructionCost computeCost(ElementCount VF, |
2874 | VPCostContext &Ctx) const override { |
2875 | // TODO: Compute accurate cost after retiring the legacy cost model. |
2876 | return 0; |
2877 | } |
2878 | |
2879 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2880 | /// Print the recipe. |
2881 | void print(raw_ostream &O, const Twine &Indent, |
2882 | VPSlotTracker &SlotTracker) const override; |
2883 | #endif |
2884 | |
2885 | /// Returns true if the recipe uses scalars of operand \p Op. |
2886 | bool usesScalars(const VPValue *Op) const override { |
2887 | assert(is_contained(operands(), Op) && |
2888 | "Op must be an operand of the recipe" ); |
2889 | return true; |
2890 | } |
2891 | }; |
2892 | |
2893 | /// A common base class for widening memory operations. An optional mask can be |
2894 | /// provided as the last operand. |
2895 | class VPWidenMemoryRecipe : public VPRecipeBase, public VPIRMetadata { |
2896 | protected: |
2897 | Instruction &Ingredient; |
2898 | |
2899 | /// Whether the accessed addresses are consecutive. |
2900 | bool Consecutive; |
2901 | |
2902 | /// Whether the consecutive accessed addresses are in reverse order. |
2903 | bool Reverse; |
2904 | |
2905 | /// Whether the memory access is masked. |
2906 | bool IsMasked = false; |
2907 | |
2908 | void setMask(VPValue *Mask) { |
2909 | assert(!IsMasked && "cannot re-set mask" ); |
2910 | if (!Mask) |
2911 | return; |
2912 | addOperand(Operand: Mask); |
2913 | IsMasked = true; |
2914 | } |
2915 | |
2916 | VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, |
2917 | std::initializer_list<VPValue *> Operands, |
2918 | bool Consecutive, bool Reverse, |
2919 | const VPIRMetadata &Metadata, DebugLoc DL) |
2920 | : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I), |
2921 | Consecutive(Consecutive), Reverse(Reverse) { |
2922 | assert((Consecutive || !Reverse) && "Reverse implies consecutive" ); |
2923 | } |
2924 | |
2925 | public: |
2926 | VPWidenMemoryRecipe *clone() override { |
2927 | llvm_unreachable("cloning not supported" ); |
2928 | } |
2929 | |
2930 | static inline bool classof(const VPRecipeBase *R) { |
2931 | return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC || |
2932 | R->getVPDefID() == VPRecipeBase::VPWidenStoreSC || |
2933 | R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC || |
2934 | R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC; |
2935 | } |
2936 | |
2937 | static inline bool classof(const VPUser *U) { |
2938 | auto *R = dyn_cast<VPRecipeBase>(Val: U); |
2939 | return R && classof(R); |
2940 | } |
2941 | |
2942 | /// Return whether the loaded-from / stored-to addresses are consecutive. |
2943 | bool isConsecutive() const { return Consecutive; } |
2944 | |
2945 | /// Return whether the consecutive loaded/stored addresses are in reverse |
2946 | /// order. |
2947 | bool isReverse() const { return Reverse; } |
2948 | |
2949 | /// Return the address accessed by this recipe. |
2950 | VPValue *getAddr() const { return getOperand(N: 0); } |
2951 | |
2952 | /// Returns true if the recipe is masked. |
2953 | bool isMasked() const { return IsMasked; } |
2954 | |
2955 | /// Return the mask used by this recipe. Note that a full mask is represented |
2956 | /// by a nullptr. |
2957 | VPValue *getMask() const { |
2958 | // Mask is optional and therefore the last operand. |
2959 | return isMasked() ? getOperand(N: getNumOperands() - 1) : nullptr; |
2960 | } |
2961 | |
2962 | /// Generate the wide load/store. |
2963 | void execute(VPTransformState &State) override { |
2964 | llvm_unreachable("VPWidenMemoryRecipe should not be instantiated." ); |
2965 | } |
2966 | |
2967 | /// Return the cost of this VPWidenMemoryRecipe. |
2968 | InstructionCost computeCost(ElementCount VF, |
2969 | VPCostContext &Ctx) const override; |
2970 | |
2971 | Instruction &getIngredient() const { return Ingredient; } |
2972 | }; |
2973 | |
2974 | /// A recipe for widening load operations, using the address to load from and an |
2975 | /// optional mask. |
2976 | struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue { |
2977 | VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, |
2978 | bool Consecutive, bool Reverse, |
2979 | const VPIRMetadata &Metadata, DebugLoc DL) |
2980 | : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive, |
2981 | Reverse, Metadata, DL), |
2982 | VPValue(this, &Load) { |
2983 | setMask(Mask); |
2984 | } |
2985 | |
2986 | VPWidenLoadRecipe *clone() override { |
2987 | return new VPWidenLoadRecipe(cast<LoadInst>(Val&: Ingredient), getAddr(), |
2988 | getMask(), Consecutive, Reverse, *this, |
2989 | getDebugLoc()); |
2990 | } |
2991 | |
2992 | VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC); |
2993 | |
2994 | /// Generate a wide load or gather. |
2995 | void execute(VPTransformState &State) override; |
2996 | |
2997 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2998 | /// Print the recipe. |
2999 | void print(raw_ostream &O, const Twine &Indent, |
3000 | VPSlotTracker &SlotTracker) const override; |
3001 | #endif |
3002 | |
3003 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
3004 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
3005 | assert(is_contained(operands(), Op) && |
3006 | "Op must be an operand of the recipe" ); |
3007 | // Widened, consecutive loads operations only demand the first lane of |
3008 | // their address. |
3009 | return Op == getAddr() && isConsecutive(); |
3010 | } |
3011 | }; |
3012 | |
3013 | /// A recipe for widening load operations with vector-predication intrinsics, |
3014 | /// using the address to load from, the explicit vector length and an optional |
3015 | /// mask. |
3016 | struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue { |
3017 | VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask) |
3018 | : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(), |
3019 | {L.getAddr(), &EVL}, L.isConsecutive(), |
3020 | L.isReverse(), L, L.getDebugLoc()), |
3021 | VPValue(this, &getIngredient()) { |
3022 | setMask(Mask); |
3023 | } |
3024 | |
3025 | VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC) |
3026 | |
3027 | /// Return the EVL operand. |
3028 | VPValue *getEVL() const { return getOperand(N: 1); } |
3029 | |
3030 | /// Generate the wide load or gather. |
3031 | void execute(VPTransformState &State) override; |
3032 | |
3033 | /// Return the cost of this VPWidenLoadEVLRecipe. |
3034 | InstructionCost computeCost(ElementCount VF, |
3035 | VPCostContext &Ctx) const override; |
3036 | |
3037 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
3038 | /// Print the recipe. |
3039 | void print(raw_ostream &O, const Twine &Indent, |
3040 | VPSlotTracker &SlotTracker) const override; |
3041 | #endif |
3042 | |
3043 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
3044 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
3045 | assert(is_contained(operands(), Op) && |
3046 | "Op must be an operand of the recipe" ); |
3047 | // Widened loads only demand the first lane of EVL and consecutive loads |
3048 | // only demand the first lane of their address. |
3049 | return Op == getEVL() || (Op == getAddr() && isConsecutive()); |
3050 | } |
3051 | }; |
3052 | |
3053 | /// A recipe for widening store operations, using the stored value, the address |
3054 | /// to store to and an optional mask. |
3055 | struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe { |
3056 | VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, |
3057 | VPValue *Mask, bool Consecutive, bool Reverse, |
3058 | const VPIRMetadata &Metadata, DebugLoc DL) |
3059 | : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal}, |
3060 | Consecutive, Reverse, Metadata, DL) { |
3061 | setMask(Mask); |
3062 | } |
3063 | |
3064 | VPWidenStoreRecipe *clone() override { |
3065 | return new VPWidenStoreRecipe(cast<StoreInst>(Val&: Ingredient), getAddr(), |
3066 | getStoredValue(), getMask(), Consecutive, |
3067 | Reverse, *this, getDebugLoc()); |
3068 | } |
3069 | |
3070 | VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC); |
3071 | |
3072 | /// Return the value stored by this recipe. |
3073 | VPValue *getStoredValue() const { return getOperand(N: 1); } |
3074 | |
3075 | /// Generate a wide store or scatter. |
3076 | void execute(VPTransformState &State) override; |
3077 | |
3078 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
3079 | /// Print the recipe. |
3080 | void print(raw_ostream &O, const Twine &Indent, |
3081 | VPSlotTracker &SlotTracker) const override; |
3082 | #endif |
3083 | |
3084 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
3085 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
3086 | assert(is_contained(operands(), Op) && |
3087 | "Op must be an operand of the recipe" ); |
3088 | // Widened, consecutive stores only demand the first lane of their address, |
3089 | // unless the same operand is also stored. |
3090 | return Op == getAddr() && isConsecutive() && Op != getStoredValue(); |
3091 | } |
3092 | }; |
3093 | |
3094 | /// A recipe for widening store operations with vector-predication intrinsics, |
3095 | /// using the value to store, the address to store to, the explicit vector |
3096 | /// length and an optional mask. |
3097 | struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe { |
3098 | VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask) |
3099 | : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(), |
3100 | {S.getAddr(), S.getStoredValue(), &EVL}, |
3101 | S.isConsecutive(), S.isReverse(), S, |
3102 | S.getDebugLoc()) { |
3103 | setMask(Mask); |
3104 | } |
3105 | |
3106 | VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC) |
3107 | |
3108 | /// Return the address accessed by this recipe. |
3109 | VPValue *getStoredValue() const { return getOperand(N: 1); } |
3110 | |
3111 | /// Return the EVL operand. |
3112 | VPValue *getEVL() const { return getOperand(N: 2); } |
3113 | |
3114 | /// Generate the wide store or scatter. |
3115 | void execute(VPTransformState &State) override; |
3116 | |
3117 | /// Return the cost of this VPWidenStoreEVLRecipe. |
3118 | InstructionCost computeCost(ElementCount VF, |
3119 | VPCostContext &Ctx) const override; |
3120 | |
3121 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
3122 | /// Print the recipe. |
3123 | void print(raw_ostream &O, const Twine &Indent, |
3124 | VPSlotTracker &SlotTracker) const override; |
3125 | #endif |
3126 | |
3127 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
3128 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
3129 | assert(is_contained(operands(), Op) && |
3130 | "Op must be an operand of the recipe" ); |
3131 | if (Op == getEVL()) { |
3132 | assert(getStoredValue() != Op && "unexpected store of EVL" ); |
3133 | return true; |
3134 | } |
3135 | // Widened, consecutive memory operations only demand the first lane of |
3136 | // their address, unless the same operand is also stored. That latter can |
3137 | // happen with opaque pointers. |
3138 | return Op == getAddr() && isConsecutive() && Op != getStoredValue(); |
3139 | } |
3140 | }; |
3141 | |
3142 | /// Recipe to expand a SCEV expression. |
3143 | class VPExpandSCEVRecipe : public VPSingleDefRecipe { |
3144 | const SCEV *Expr; |
3145 | ScalarEvolution &SE; |
3146 | |
3147 | public: |
3148 | VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE) |
3149 | : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {} |
3150 | |
3151 | ~VPExpandSCEVRecipe() override = default; |
3152 | |
3153 | VPExpandSCEVRecipe *clone() override { |
3154 | return new VPExpandSCEVRecipe(Expr, SE); |
3155 | } |
3156 | |
3157 | VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC) |
3158 | |
3159 | /// Generate a canonical vector induction variable of the vector loop, with |
3160 | void execute(VPTransformState &State) override; |
3161 | |
3162 | /// Return the cost of this VPExpandSCEVRecipe. |
3163 | InstructionCost computeCost(ElementCount VF, |
3164 | VPCostContext &Ctx) const override { |
3165 | // TODO: Compute accurate cost after retiring the legacy cost model. |
3166 | return 0; |
3167 | } |
3168 | |
3169 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
3170 | /// Print the recipe. |
3171 | void print(raw_ostream &O, const Twine &Indent, |
3172 | VPSlotTracker &SlotTracker) const override; |
3173 | #endif |
3174 | |
3175 | const SCEV *getSCEV() const { return Expr; } |
3176 | }; |
3177 | |
3178 | /// Canonical scalar induction phi of the vector loop. Starting at the specified |
3179 | /// start value (either 0 or the resume value when vectorizing the epilogue |
3180 | /// loop). VPWidenCanonicalIVRecipe represents the vector version of the |
3181 | /// canonical induction variable. |
3182 | class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { |
3183 | public: |
3184 | VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL) |
3185 | : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {} |
3186 | |
3187 | ~VPCanonicalIVPHIRecipe() override = default; |
3188 | |
3189 | VPCanonicalIVPHIRecipe *clone() override { |
3190 | auto *R = new VPCanonicalIVPHIRecipe(getOperand(N: 0), getDebugLoc()); |
3191 | R->addOperand(Operand: getBackedgeValue()); |
3192 | return R; |
3193 | } |
3194 | |
3195 | VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC) |
3196 | |
3197 | void execute(VPTransformState &State) override { |
3198 | llvm_unreachable("cannot execute this recipe, should be replaced by a " |
3199 | "scalar phi recipe" ); |
3200 | } |
3201 | |
3202 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
3203 | /// Print the recipe. |
3204 | void print(raw_ostream &O, const Twine &Indent, |
3205 | VPSlotTracker &SlotTracker) const override; |
3206 | #endif |
3207 | |
3208 | /// Returns the scalar type of the induction. |
3209 | Type *getScalarType() const { |
3210 | return getStartValue()->getLiveInIRValue()->getType(); |
3211 | } |
3212 | |
3213 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
3214 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
3215 | assert(is_contained(operands(), Op) && |
3216 | "Op must be an operand of the recipe" ); |
3217 | return true; |
3218 | } |
3219 | |
3220 | /// Returns true if the recipe only uses the first part of operand \p Op. |
3221 | bool onlyFirstPartUsed(const VPValue *Op) const override { |
3222 | assert(is_contained(operands(), Op) && |
3223 | "Op must be an operand of the recipe" ); |
3224 | return true; |
3225 | } |
3226 | |
3227 | /// Return the cost of this VPCanonicalIVPHIRecipe. |
3228 | InstructionCost computeCost(ElementCount VF, |
3229 | VPCostContext &Ctx) const override { |
3230 | // For now, match the behavior of the legacy cost model. |
3231 | return 0; |
3232 | } |
3233 | }; |
3234 | |
3235 | /// A recipe for generating the active lane mask for the vector loop that is |
3236 | /// used to predicate the vector operations. |
3237 | /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and |
3238 | /// remove VPActiveLaneMaskPHIRecipe. |
3239 | class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe { |
3240 | public: |
3241 | VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL) |
3242 | : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask, |
3243 | DL) {} |
3244 | |
3245 | ~VPActiveLaneMaskPHIRecipe() override = default; |
3246 | |
3247 | VPActiveLaneMaskPHIRecipe *clone() override { |
3248 | auto *R = new VPActiveLaneMaskPHIRecipe(getOperand(N: 0), getDebugLoc()); |
3249 | if (getNumOperands() == 2) |
3250 | R->addOperand(Operand: getOperand(N: 1)); |
3251 | return R; |
3252 | } |
3253 | |
3254 | VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC) |
3255 | |
3256 | /// Generate the active lane mask phi of the vector loop. |
3257 | void execute(VPTransformState &State) override; |
3258 | |
3259 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
3260 | /// Print the recipe. |
3261 | void print(raw_ostream &O, const Twine &Indent, |
3262 | VPSlotTracker &SlotTracker) const override; |
3263 | #endif |
3264 | }; |
3265 | |
3266 | /// A recipe for generating the phi node for the current index of elements, |
3267 | /// adjusted in accordance with EVL value. It starts at the start value of the |
3268 | /// canonical induction and gets incremented by EVL in each iteration of the |
3269 | /// vector loop. |
3270 | class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe { |
3271 | public: |
3272 | VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL) |
3273 | : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {} |
3274 | |
3275 | ~VPEVLBasedIVPHIRecipe() override = default; |
3276 | |
3277 | VPEVLBasedIVPHIRecipe *clone() override { |
3278 | llvm_unreachable("cloning not implemented yet" ); |
3279 | } |
3280 | |
3281 | VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC) |
3282 | |
3283 | void execute(VPTransformState &State) override { |
3284 | llvm_unreachable("cannot execute this recipe, should be replaced by a " |
3285 | "scalar phi recipe" ); |
3286 | } |
3287 | |
3288 | /// Return the cost of this VPEVLBasedIVPHIRecipe. |
3289 | InstructionCost computeCost(ElementCount VF, |
3290 | VPCostContext &Ctx) const override { |
3291 | // For now, match the behavior of the legacy cost model. |
3292 | return 0; |
3293 | } |
3294 | |
3295 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
3296 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
3297 | assert(is_contained(operands(), Op) && |
3298 | "Op must be an operand of the recipe" ); |
3299 | return true; |
3300 | } |
3301 | |
3302 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
3303 | /// Print the recipe. |
3304 | void print(raw_ostream &O, const Twine &Indent, |
3305 | VPSlotTracker &SlotTracker) const override; |
3306 | #endif |
3307 | }; |
3308 | |
3309 | /// A Recipe for widening the canonical induction variable of the vector loop. |
3310 | class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe, |
3311 | public VPUnrollPartAccessor<1> { |
3312 | public: |
3313 | VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV) |
3314 | : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {} |
3315 | |
3316 | ~VPWidenCanonicalIVRecipe() override = default; |
3317 | |
3318 | VPWidenCanonicalIVRecipe *clone() override { |
3319 | return new VPWidenCanonicalIVRecipe( |
3320 | cast<VPCanonicalIVPHIRecipe>(Val: getOperand(N: 0))); |
3321 | } |
3322 | |
3323 | VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC) |
3324 | |
3325 | /// Generate a canonical vector induction variable of the vector loop, with |
3326 | /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and |
3327 | /// step = <VF*UF, VF*UF, ..., VF*UF>. |
3328 | void execute(VPTransformState &State) override; |
3329 | |
3330 | /// Return the cost of this VPWidenCanonicalIVPHIRecipe. |
3331 | InstructionCost computeCost(ElementCount VF, |
3332 | VPCostContext &Ctx) const override { |
3333 | // TODO: Compute accurate cost after retiring the legacy cost model. |
3334 | return 0; |
3335 | } |
3336 | |
3337 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
3338 | /// Print the recipe. |
3339 | void print(raw_ostream &O, const Twine &Indent, |
3340 | VPSlotTracker &SlotTracker) const override; |
3341 | #endif |
3342 | }; |
3343 | |
3344 | /// A recipe for converting the input value \p IV value to the corresponding |
3345 | /// value of an IV with different start and step values, using Start + IV * |
3346 | /// Step. |
3347 | class VPDerivedIVRecipe : public VPSingleDefRecipe { |
3348 | /// Kind of the induction. |
3349 | const InductionDescriptor::InductionKind Kind; |
3350 | /// If not nullptr, the floating point induction binary operator. Must be set |
3351 | /// for floating point inductions. |
3352 | const FPMathOperator *FPBinOp; |
3353 | |
3354 | /// Name to use for the generated IR instruction for the derived IV. |
3355 | std::string Name; |
3356 | |
3357 | public: |
3358 | VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, |
3359 | VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, |
3360 | const Twine &Name = "" ) |
3361 | : VPDerivedIVRecipe( |
3362 | IndDesc.getKind(), |
3363 | dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp()), |
3364 | Start, CanonicalIV, Step, Name) {} |
3365 | |
3366 | VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, |
3367 | const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, |
3368 | VPValue *Step, const Twine &Name = "" ) |
3369 | : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind), |
3370 | FPBinOp(FPBinOp), Name(Name.str()) {} |
3371 | |
3372 | ~VPDerivedIVRecipe() override = default; |
3373 | |
3374 | VPDerivedIVRecipe *clone() override { |
3375 | return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(N: 1), |
3376 | getStepValue()); |
3377 | } |
3378 | |
3379 | VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC) |
3380 | |
3381 | /// Generate the transformed value of the induction at offset StartValue (1. |
3382 | /// operand) + IV (2. operand) * StepValue (3, operand). |
3383 | void execute(VPTransformState &State) override; |
3384 | |
3385 | /// Return the cost of this VPDerivedIVRecipe. |
3386 | InstructionCost computeCost(ElementCount VF, |
3387 | VPCostContext &Ctx) const override { |
3388 | // TODO: Compute accurate cost after retiring the legacy cost model. |
3389 | return 0; |
3390 | } |
3391 | |
3392 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
3393 | /// Print the recipe. |
3394 | void print(raw_ostream &O, const Twine &Indent, |
3395 | VPSlotTracker &SlotTracker) const override; |
3396 | #endif |
3397 | |
3398 | Type *getScalarType() const { |
3399 | return getStartValue()->getLiveInIRValue()->getType(); |
3400 | } |
3401 | |
3402 | VPValue *getStartValue() const { return getOperand(N: 0); } |
3403 | VPValue *getStepValue() const { return getOperand(N: 2); } |
3404 | |
3405 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
3406 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
3407 | assert(is_contained(operands(), Op) && |
3408 | "Op must be an operand of the recipe" ); |
3409 | return true; |
3410 | } |
3411 | }; |
3412 | |
3413 | /// A recipe for handling phi nodes of integer and floating-point inductions, |
3414 | /// producing their scalar values. |
3415 | class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, |
3416 | public VPUnrollPartAccessor<3> { |
3417 | Instruction::BinaryOps InductionOpcode; |
3418 | |
3419 | public: |
3420 | VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, |
3421 | Instruction::BinaryOps Opcode, FastMathFlags FMFs, |
3422 | DebugLoc DL) |
3423 | : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC, |
3424 | ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL), |
3425 | InductionOpcode(Opcode) {} |
3426 | |
3427 | VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, |
3428 | VPValue *Step, VPValue *VF, DebugLoc DL = {}) |
3429 | : VPScalarIVStepsRecipe( |
3430 | IV, Step, VF, IndDesc.getInductionOpcode(), |
3431 | dyn_cast_or_null<FPMathOperator>(Val: IndDesc.getInductionBinOp()) |
3432 | ? IndDesc.getInductionBinOp()->getFastMathFlags() |
3433 | : FastMathFlags(), |
3434 | DL) {} |
3435 | |
3436 | ~VPScalarIVStepsRecipe() override = default; |
3437 | |
3438 | VPScalarIVStepsRecipe *clone() override { |
3439 | return new VPScalarIVStepsRecipe( |
3440 | getOperand(N: 0), getOperand(N: 1), getOperand(N: 2), InductionOpcode, |
3441 | hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(), |
3442 | getDebugLoc()); |
3443 | } |
3444 | |
3445 | /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that |
3446 | /// this is only accurate after the VPlan has been unrolled. |
3447 | bool isPart0() { return getUnrollPart(U&: *this) == 0; } |
3448 | |
3449 | VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC) |
3450 | |
3451 | /// Generate the scalarized versions of the phi node as needed by their users. |
3452 | void execute(VPTransformState &State) override; |
3453 | |
3454 | /// Return the cost of this VPScalarIVStepsRecipe. |
3455 | InstructionCost computeCost(ElementCount VF, |
3456 | VPCostContext &Ctx) const override { |
3457 | // TODO: Compute accurate cost after retiring the legacy cost model. |
3458 | return 0; |
3459 | } |
3460 | |
3461 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
3462 | /// Print the recipe. |
3463 | void print(raw_ostream &O, const Twine &Indent, |
3464 | VPSlotTracker &SlotTracker) const override; |
3465 | #endif |
3466 | |
3467 | VPValue *getStepValue() const { return getOperand(N: 1); } |
3468 | |
3469 | /// Returns true if the recipe only uses the first lane of operand \p Op. |
3470 | bool onlyFirstLaneUsed(const VPValue *Op) const override { |
3471 | assert(is_contained(operands(), Op) && |
3472 | "Op must be an operand of the recipe" ); |
3473 | return true; |
3474 | } |
3475 | }; |
3476 | |
3477 | /// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe |
3478 | /// types implementing VPPhiAccessors. Used by isa<> & co. |
3479 | template <> struct CastIsPossible<VPPhiAccessors, const VPRecipeBase *> { |
3480 | static inline bool isPossible(const VPRecipeBase *f) { |
3481 | // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors. |
3482 | return isa<VPIRPhi, VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPhi>(Val: f); |
3483 | } |
3484 | }; |
3485 | /// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the |
3486 | /// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co. |
3487 | template <typename SrcTy> |
3488 | struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> { |
3489 | |
3490 | using Self = CastInfo<VPPhiAccessors, SrcTy>; |
3491 | |
3492 | /// doCast is used by cast<>. |
3493 | static inline VPPhiAccessors *doCast(SrcTy R) { |
3494 | return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * { |
3495 | switch (R->getVPDefID()) { |
3496 | case VPDef::VPInstructionSC: |
3497 | return cast<VPPhi>(R); |
3498 | case VPDef::VPIRInstructionSC: |
3499 | return cast<VPIRPhi>(R); |
3500 | case VPDef::VPWidenPHISC: |
3501 | return cast<VPWidenPHIRecipe>(R); |
3502 | default: |
3503 | return cast<VPHeaderPHIRecipe>(R); |
3504 | } |
3505 | }()); |
3506 | } |
3507 | |
3508 | /// doCastIfPossible is used by dyn_cast<>. |
3509 | static inline VPPhiAccessors *doCastIfPossible(SrcTy f) { |
3510 | if (!Self::isPossible(f)) |
3511 | return nullptr; |
3512 | return doCast(R: f); |
3513 | } |
3514 | }; |
3515 | template <> |
3516 | struct CastInfo<VPPhiAccessors, VPRecipeBase *> |
3517 | : CastInfoVPPhiAccessors<VPRecipeBase *> {}; |
3518 | template <> |
3519 | struct CastInfo<VPPhiAccessors, const VPRecipeBase *> |
3520 | : CastInfoVPPhiAccessors<const VPRecipeBase *> {}; |
3521 | |
3522 | /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It |
3523 | /// holds a sequence of zero or more VPRecipe's each representing a sequence of |
3524 | /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes. |
3525 | class VPBasicBlock : public VPBlockBase { |
3526 | friend class VPlan; |
3527 | |
3528 | /// Use VPlan::createVPBasicBlock to create VPBasicBlocks. |
3529 | VPBasicBlock(const Twine &Name = "" , VPRecipeBase *Recipe = nullptr) |
3530 | : VPBlockBase(VPBasicBlockSC, Name.str()) { |
3531 | if (Recipe) |
3532 | appendRecipe(Recipe); |
3533 | } |
3534 | |
3535 | public: |
3536 | using RecipeListTy = iplist<VPRecipeBase>; |
3537 | |
3538 | protected: |
3539 | /// The VPRecipes held in the order of output instructions to generate. |
3540 | RecipeListTy Recipes; |
3541 | |
3542 | VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "" ) |
3543 | : VPBlockBase(BlockSC, Name.str()) {} |
3544 | |
3545 | public: |
3546 | ~VPBasicBlock() override { |
3547 | while (!Recipes.empty()) |
3548 | Recipes.pop_back(); |
3549 | } |
3550 | |
3551 | /// Instruction iterators... |
3552 | using iterator = RecipeListTy::iterator; |
3553 | using const_iterator = RecipeListTy::const_iterator; |
3554 | using reverse_iterator = RecipeListTy::reverse_iterator; |
3555 | using const_reverse_iterator = RecipeListTy::const_reverse_iterator; |
3556 | |
3557 | //===--------------------------------------------------------------------===// |
3558 | /// Recipe iterator methods |
3559 | /// |
3560 | inline iterator begin() { return Recipes.begin(); } |
3561 | inline const_iterator begin() const { return Recipes.begin(); } |
3562 | inline iterator end() { return Recipes.end(); } |
3563 | inline const_iterator end() const { return Recipes.end(); } |
3564 | |
3565 | inline reverse_iterator rbegin() { return Recipes.rbegin(); } |
3566 | inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); } |
3567 | inline reverse_iterator rend() { return Recipes.rend(); } |
3568 | inline const_reverse_iterator rend() const { return Recipes.rend(); } |
3569 | |
3570 | inline size_t size() const { return Recipes.size(); } |
3571 | inline bool empty() const { return Recipes.empty(); } |
3572 | inline const VPRecipeBase &front() const { return Recipes.front(); } |
3573 | inline VPRecipeBase &front() { return Recipes.front(); } |
3574 | inline const VPRecipeBase &back() const { return Recipes.back(); } |
3575 | inline VPRecipeBase &back() { return Recipes.back(); } |
3576 | |
3577 | /// Returns a reference to the list of recipes. |
3578 | RecipeListTy &getRecipeList() { return Recipes; } |
3579 | |
3580 | /// Returns a pointer to a member of the recipe list. |
3581 | static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) { |
3582 | return &VPBasicBlock::Recipes; |
3583 | } |
3584 | |
3585 | /// Method to support type inquiry through isa, cast, and dyn_cast. |
3586 | static inline bool classof(const VPBlockBase *V) { |
3587 | return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC || |
3588 | V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC; |
3589 | } |
3590 | |
3591 | void insert(VPRecipeBase *Recipe, iterator InsertPt) { |
3592 | assert(Recipe && "No recipe to append." ); |
3593 | assert(!Recipe->Parent && "Recipe already in VPlan" ); |
3594 | Recipe->Parent = this; |
3595 | Recipes.insert(where: InsertPt, New: Recipe); |
3596 | } |
3597 | |
3598 | /// Augment the existing recipes of a VPBasicBlock with an additional |
3599 | /// \p Recipe as the last recipe. |
3600 | void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, InsertPt: end()); } |
3601 | |
3602 | /// The method which generates the output IR instructions that correspond to |
3603 | /// this VPBasicBlock, thereby "executing" the VPlan. |
3604 | void execute(VPTransformState *State) override; |
3605 | |
3606 | /// Return the cost of this VPBasicBlock. |
3607 | InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override; |
3608 | |
3609 | /// Return the position of the first non-phi node recipe in the block. |
3610 | iterator getFirstNonPhi(); |
3611 | |
3612 | /// Returns an iterator range over the PHI-like recipes in the block. |
3613 | iterator_range<iterator> phis() { |
3614 | return make_range(x: begin(), y: getFirstNonPhi()); |
3615 | } |
3616 | |
3617 | /// Split current block at \p SplitAt by inserting a new block between the |
3618 | /// current block and its successors and moving all recipes starting at |
3619 | /// SplitAt to the new block. Returns the new block. |
3620 | VPBasicBlock *splitAt(iterator SplitAt); |
3621 | |
3622 | VPRegionBlock *getEnclosingLoopRegion(); |
3623 | const VPRegionBlock *getEnclosingLoopRegion() const; |
3624 | |
3625 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
3626 | /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p |
3627 | /// SlotTracker is used to print unnamed VPValue's using consequtive numbers. |
3628 | /// |
3629 | /// Note that the numbering is applied to the whole VPlan, so printing |
3630 | /// individual blocks is consistent with the whole VPlan printing. |
3631 | void print(raw_ostream &O, const Twine &Indent, |
3632 | VPSlotTracker &SlotTracker) const override; |
3633 | using VPBlockBase::print; // Get the print(raw_stream &O) version. |
3634 | #endif |
3635 | |
3636 | /// If the block has multiple successors, return the branch recipe terminating |
3637 | /// the block. If there are no or only a single successor, return nullptr; |
3638 | VPRecipeBase *getTerminator(); |
3639 | const VPRecipeBase *getTerminator() const; |
3640 | |
3641 | /// Returns true if the block is exiting it's parent region. |
3642 | bool isExiting() const; |
3643 | |
3644 | /// Clone the current block and it's recipes, without updating the operands of |
3645 | /// the cloned recipes. |
3646 | VPBasicBlock *clone() override; |
3647 | |
3648 | /// Returns the predecessor block at index \p Idx with the predecessors as per |
3649 | /// the corresponding plain CFG. If the block is an entry block to a region, |
3650 | /// the first predecessor is the single predecessor of a region, and the |
3651 | /// second predecessor is the exiting block of the region. |
3652 | const VPBasicBlock *getCFGPredecessor(unsigned Idx) const; |
3653 | |
3654 | protected: |
3655 | /// Execute the recipes in the IR basic block \p BB. |
3656 | void executeRecipes(VPTransformState *State, BasicBlock *BB); |
3657 | |
3658 | /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block |
3659 | /// generated for this VPBB. |
3660 | void connectToPredecessors(VPTransformState &State); |
3661 | |
3662 | private: |
3663 | /// Create an IR BasicBlock to hold the output instructions generated by this |
3664 | /// VPBasicBlock, and return it. Update the CFGState accordingly. |
3665 | BasicBlock *createEmptyBasicBlock(VPTransformState &State); |
3666 | }; |
3667 | |
3668 | inline const VPBasicBlock * |
3669 | VPPhiAccessors::getIncomingBlock(unsigned Idx) const { |
3670 | return getAsRecipe()->getParent()->getCFGPredecessor(Idx); |
3671 | } |
3672 | |
3673 | /// A special type of VPBasicBlock that wraps an existing IR basic block. |
3674 | /// Recipes of the block get added before the first non-phi instruction in the |
3675 | /// wrapped block. |
3676 | /// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's |
3677 | /// preheader block. |
3678 | class VPIRBasicBlock : public VPBasicBlock { |
3679 | friend class VPlan; |
3680 | |
3681 | BasicBlock *IRBB; |
3682 | |
3683 | /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks. |
3684 | VPIRBasicBlock(BasicBlock *IRBB) |
3685 | : VPBasicBlock(VPIRBasicBlockSC, |
3686 | (Twine("ir-bb<" ) + IRBB->getName() + Twine(">" )).str()), |
3687 | IRBB(IRBB) {} |
3688 | |
3689 | public: |
3690 | ~VPIRBasicBlock() override {} |
3691 | |
3692 | static inline bool classof(const VPBlockBase *V) { |
3693 | return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC; |
3694 | } |
3695 | |
3696 | /// The method which generates the output IR instructions that correspond to |
3697 | /// this VPBasicBlock, thereby "executing" the VPlan. |
3698 | void execute(VPTransformState *State) override; |
3699 | |
3700 | VPIRBasicBlock *clone() override; |
3701 | |
3702 | BasicBlock *getIRBasicBlock() const { return IRBB; } |
3703 | }; |
3704 | |
3705 | /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks |
3706 | /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG. |
3707 | /// A VPRegionBlock may indicate that its contents are to be replicated several |
3708 | /// times. This is designed to support predicated scalarization, in which a |
3709 | /// scalar if-then code structure needs to be generated VF * UF times. Having |
3710 | /// this replication indicator helps to keep a single model for multiple |
3711 | /// candidate VF's. The actual replication takes place only once the desired VF |
3712 | /// and UF have been determined. |
3713 | class VPRegionBlock : public VPBlockBase { |
3714 | friend class VPlan; |
3715 | |
3716 | /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock. |
3717 | VPBlockBase *Entry; |
3718 | |
3719 | /// Hold the Single Exiting block of the SESE region modelled by the |
3720 | /// VPRegionBlock. |
3721 | VPBlockBase *Exiting; |
3722 | |
3723 | /// An indicator whether this region is to generate multiple replicated |
3724 | /// instances of output IR corresponding to its VPBlockBases. |
3725 | bool IsReplicator; |
3726 | |
3727 | /// Use VPlan::createVPRegionBlock to create VPRegionBlocks. |
3728 | VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, |
3729 | const std::string &Name = "" , bool IsReplicator = false) |
3730 | : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting), |
3731 | IsReplicator(IsReplicator) { |
3732 | assert(Entry->getPredecessors().empty() && "Entry block has predecessors." ); |
3733 | assert(Exiting->getSuccessors().empty() && "Exit block has successors." ); |
3734 | Entry->setParent(this); |
3735 | Exiting->setParent(this); |
3736 | } |
3737 | VPRegionBlock(const std::string &Name = "" , bool IsReplicator = false) |
3738 | : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr), |
3739 | IsReplicator(IsReplicator) {} |
3740 | |
3741 | public: |
3742 | ~VPRegionBlock() override {} |
3743 | |
3744 | /// Method to support type inquiry through isa, cast, and dyn_cast. |
3745 | static inline bool classof(const VPBlockBase *V) { |
3746 | return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC; |
3747 | } |
3748 | |
3749 | const VPBlockBase *getEntry() const { return Entry; } |
3750 | VPBlockBase *getEntry() { return Entry; } |
3751 | |
3752 | /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p |
3753 | /// EntryBlock must have no predecessors. |
3754 | void setEntry(VPBlockBase *EntryBlock) { |
3755 | assert(EntryBlock->getPredecessors().empty() && |
3756 | "Entry block cannot have predecessors." ); |
3757 | Entry = EntryBlock; |
3758 | EntryBlock->setParent(this); |
3759 | } |
3760 | |
3761 | const VPBlockBase *getExiting() const { return Exiting; } |
3762 | VPBlockBase *getExiting() { return Exiting; } |
3763 | |
3764 | /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p |
3765 | /// ExitingBlock must have no successors. |
3766 | void setExiting(VPBlockBase *ExitingBlock) { |
3767 | assert(ExitingBlock->getSuccessors().empty() && |
3768 | "Exit block cannot have successors." ); |
3769 | Exiting = ExitingBlock; |
3770 | ExitingBlock->setParent(this); |
3771 | } |
3772 | |
3773 | /// Returns the pre-header VPBasicBlock of the loop region. |
3774 | VPBasicBlock *() { |
3775 | assert(!isReplicator() && "should only get pre-header of loop regions" ); |
3776 | return getSinglePredecessor()->getExitingBasicBlock(); |
3777 | } |
3778 | |
3779 | /// An indicator whether this region is to generate multiple replicated |
3780 | /// instances of output IR corresponding to its VPBlockBases. |
3781 | bool isReplicator() const { return IsReplicator; } |
3782 | |
3783 | /// The method which generates the output IR instructions that correspond to |
3784 | /// this VPRegionBlock, thereby "executing" the VPlan. |
3785 | void execute(VPTransformState *State) override; |
3786 | |
3787 | // Return the cost of this region. |
3788 | InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override; |
3789 | |
3790 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
3791 | /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with |
3792 | /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using |
3793 | /// consequtive numbers. |
3794 | /// |
3795 | /// Note that the numbering is applied to the whole VPlan, so printing |
3796 | /// individual regions is consistent with the whole VPlan printing. |
3797 | void print(raw_ostream &O, const Twine &Indent, |
3798 | VPSlotTracker &SlotTracker) const override; |
3799 | using VPBlockBase::print; // Get the print(raw_stream &O) version. |
3800 | #endif |
3801 | |
3802 | /// Clone all blocks in the single-entry single-exit region of the block and |
3803 | /// their recipes without updating the operands of the cloned recipes. |
3804 | VPRegionBlock *clone() override; |
3805 | |
3806 | /// Remove the current region from its VPlan, connecting its predecessor to |
3807 | /// its entry, and its exiting block to its successor. |
3808 | void dissolveToCFGLoop(); |
3809 | }; |
3810 | |
3811 | /// VPlan models a candidate for vectorization, encoding various decisions take |
3812 | /// to produce efficient output IR, including which branches, basic-blocks and |
3813 | /// output IR instructions to generate, and their cost. VPlan holds a |
3814 | /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry |
3815 | /// VPBasicBlock. |
3816 | class VPlan { |
3817 | friend class VPlanPrinter; |
3818 | friend class VPSlotTracker; |
3819 | |
3820 | /// VPBasicBlock corresponding to the original preheader. Used to place |
3821 | /// VPExpandSCEV recipes for expressions used during skeleton creation and the |
3822 | /// rest of VPlan execution. |
3823 | /// When this VPlan is used for the epilogue vector loop, the entry will be |
3824 | /// replaced by a new entry block created during skeleton creation. |
3825 | VPBasicBlock *Entry; |
3826 | |
3827 | /// VPIRBasicBlock wrapping the header of the original scalar loop. |
3828 | VPIRBasicBlock *; |
3829 | |
3830 | /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original |
3831 | /// scalar loop. Note that some exit blocks may be unreachable at the moment, |
3832 | /// e.g. if the scalar epilogue always executes. |
3833 | SmallVector<VPIRBasicBlock *, 2> ExitBlocks; |
3834 | |
3835 | /// Holds the VFs applicable to this VPlan. |
3836 | SmallSetVector<ElementCount, 2> VFs; |
3837 | |
3838 | /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for |
3839 | /// any UF. |
3840 | SmallSetVector<unsigned, 2> UFs; |
3841 | |
3842 | /// Holds the name of the VPlan, for printing. |
3843 | std::string Name; |
3844 | |
3845 | /// Represents the trip count of the original loop, for folding |
3846 | /// the tail. |
3847 | VPValue *TripCount = nullptr; |
3848 | |
3849 | /// Represents the backedge taken count of the original loop, for folding |
3850 | /// the tail. It equals TripCount - 1. |
3851 | VPValue *BackedgeTakenCount = nullptr; |
3852 | |
3853 | /// Represents the vector trip count. |
3854 | VPValue VectorTripCount; |
3855 | |
3856 | /// Represents the vectorization factor of the loop. |
3857 | VPValue VF; |
3858 | |
3859 | /// Represents the loop-invariant VF * UF of the vector loop region. |
3860 | VPValue VFxUF; |
3861 | |
3862 | /// Holds a mapping between Values and their corresponding VPValue inside |
3863 | /// VPlan. |
3864 | Value2VPValueTy Value2VPValue; |
3865 | |
3866 | /// Contains all the external definitions created for this VPlan. External |
3867 | /// definitions are VPValues that hold a pointer to their underlying IR. |
3868 | SmallVector<VPValue *, 16> VPLiveIns; |
3869 | |
3870 | /// Mapping from SCEVs to the VPValues representing their expansions. |
3871 | /// NOTE: This mapping is temporary and will be removed once all users have |
3872 | /// been modeled in VPlan directly. |
3873 | DenseMap<const SCEV *, VPValue *> SCEVToExpansion; |
3874 | |
3875 | /// Blocks allocated and owned by the VPlan. They will be deleted once the |
3876 | /// VPlan is destroyed. |
3877 | SmallVector<VPBlockBase *> CreatedBlocks; |
3878 | |
3879 | /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader |
3880 | /// wrapping the original header of the scalar loop. |
3881 | VPlan(VPBasicBlock *Entry, VPIRBasicBlock *) |
3882 | : Entry(Entry), ScalarHeader(ScalarHeader) { |
3883 | Entry->setPlan(this); |
3884 | assert(ScalarHeader->getNumSuccessors() == 0 && |
3885 | "scalar header must be a leaf node" ); |
3886 | } |
3887 | |
3888 | public: |
3889 | /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the |
3890 | /// original preheader and scalar header of \p L, to be used as entry and |
3891 | /// scalar header blocks of the new VPlan. |
3892 | VPlan(Loop *L); |
3893 | |
3894 | /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock |
3895 | /// wrapping \p ScalarHeaderBB and a trip count of \p TC. |
3896 | VPlan(BasicBlock *, VPValue *TC) { |
3897 | setEntry(createVPBasicBlock(Name: "preheader" )); |
3898 | ScalarHeader = createVPIRBasicBlock(IRBB: ScalarHeaderBB); |
3899 | TripCount = TC; |
3900 | } |
3901 | |
3902 | ~VPlan(); |
3903 | |
3904 | void setEntry(VPBasicBlock *VPBB) { |
3905 | Entry = VPBB; |
3906 | VPBB->setPlan(this); |
3907 | } |
3908 | |
3909 | /// Prepare the plan for execution, setting up the required live-in values. |
3910 | void prepareToExecute(Value *TripCount, Value *VectorTripCount, |
3911 | VPTransformState &State); |
3912 | |
3913 | /// Generate the IR code for this VPlan. |
3914 | void execute(VPTransformState *State); |
3915 | |
3916 | /// Return the cost of this plan. |
3917 | InstructionCost cost(ElementCount VF, VPCostContext &Ctx); |
3918 | |
3919 | VPBasicBlock *getEntry() { return Entry; } |
3920 | const VPBasicBlock *getEntry() const { return Entry; } |
3921 | |
3922 | /// Returns the preheader of the vector loop region, if one exists, or null |
3923 | /// otherwise. |
3924 | VPBasicBlock *() { |
3925 | VPRegionBlock *VectorRegion = getVectorLoopRegion(); |
3926 | return VectorRegion |
3927 | ? cast<VPBasicBlock>(Val: VectorRegion->getSinglePredecessor()) |
3928 | : nullptr; |
3929 | } |
3930 | |
3931 | /// Returns the VPRegionBlock of the vector loop. |
3932 | VPRegionBlock *getVectorLoopRegion(); |
3933 | const VPRegionBlock *getVectorLoopRegion() const; |
3934 | |
3935 | /// Returns the 'middle' block of the plan, that is the block that selects |
3936 | /// whether to execute the scalar tail loop or the exit block from the loop |
3937 | /// latch. If there is an early exit from the vector loop, the middle block |
3938 | /// conceptully has the early exit block as third successor, split accross 2 |
3939 | /// VPBBs. In that case, the second VPBB selects whether to execute the scalar |
3940 | /// tail loop or the exit bock. If the scalar tail loop or exit block are |
3941 | /// known to always execute, the middle block may branch directly to that |
3942 | /// block. This function cannot be called once the vector loop region has been |
3943 | /// removed. |
3944 | VPBasicBlock *getMiddleBlock() { |
3945 | VPRegionBlock *LoopRegion = getVectorLoopRegion(); |
3946 | assert( |
3947 | LoopRegion && |
3948 | "cannot call the function after vector loop region has been removed" ); |
3949 | auto *RegionSucc = cast<VPBasicBlock>(Val: LoopRegion->getSingleSuccessor()); |
3950 | if (RegionSucc->getSingleSuccessor() || |
3951 | is_contained(Range&: RegionSucc->getSuccessors(), Element: getScalarPreheader())) |
3952 | return RegionSucc; |
3953 | // There is an early exit. The successor of RegionSucc is the middle block. |
3954 | return cast<VPBasicBlock>(Val: RegionSucc->getSuccessors()[1]); |
3955 | } |
3956 | |
3957 | const VPBasicBlock *getMiddleBlock() const { |
3958 | return const_cast<VPlan *>(this)->getMiddleBlock(); |
3959 | } |
3960 | |
3961 | /// Return the VPBasicBlock for the preheader of the scalar loop. |
3962 | VPBasicBlock *() const { |
3963 | return cast<VPBasicBlock>(Val: getScalarHeader()->getSinglePredecessor()); |
3964 | } |
3965 | |
3966 | /// Return the VPIRBasicBlock wrapping the header of the scalar loop. |
3967 | VPIRBasicBlock *() const { return ScalarHeader; } |
3968 | |
3969 | /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of |
3970 | /// the original scalar loop. |
3971 | ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; } |
3972 | |
3973 | /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an |
3974 | /// exit block. |
3975 | VPIRBasicBlock *getExitBlock(BasicBlock *IRBB) const; |
3976 | |
3977 | /// Returns true if \p VPBB is an exit block. |
3978 | bool isExitBlock(VPBlockBase *VPBB); |
3979 | |
3980 | /// The trip count of the original loop. |
3981 | VPValue *getTripCount() const { |
3982 | assert(TripCount && "trip count needs to be set before accessing it" ); |
3983 | return TripCount; |
3984 | } |
3985 | |
3986 | /// Set the trip count assuming it is currently null; if it is not - use |
3987 | /// resetTripCount(). |
3988 | void setTripCount(VPValue *NewTripCount) { |
3989 | assert(!TripCount && NewTripCount && "TripCount should not be set yet." ); |
3990 | TripCount = NewTripCount; |
3991 | } |
3992 | |
3993 | /// Resets the trip count for the VPlan. The caller must make sure all uses of |
3994 | /// the original trip count have been replaced. |
3995 | void resetTripCount(VPValue *NewTripCount) { |
3996 | assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 && |
3997 | "TripCount must be set when resetting" ); |
3998 | TripCount = NewTripCount; |
3999 | } |
4000 | |
4001 | /// The backedge taken count of the original loop. |
4002 | VPValue *getOrCreateBackedgeTakenCount() { |
4003 | if (!BackedgeTakenCount) |
4004 | BackedgeTakenCount = new VPValue(); |
4005 | return BackedgeTakenCount; |
4006 | } |
4007 | |
4008 | /// The vector trip count. |
4009 | VPValue &getVectorTripCount() { return VectorTripCount; } |
4010 | |
4011 | /// Returns the VF of the vector loop region. |
4012 | VPValue &getVF() { return VF; }; |
4013 | |
4014 | /// Returns VF * UF of the vector loop region. |
4015 | VPValue &getVFxUF() { return VFxUF; } |
4016 | |
4017 | void addVF(ElementCount VF) { VFs.insert(X: VF); } |
4018 | |
4019 | void setVF(ElementCount VF) { |
4020 | assert(hasVF(VF) && "Cannot set VF not already in plan" ); |
4021 | VFs.clear(); |
4022 | VFs.insert(X: VF); |
4023 | } |
4024 | |
4025 | bool hasVF(ElementCount VF) const { return VFs.count(key: VF); } |
4026 | bool hasScalableVF() const { |
4027 | return any_of(Range: VFs, P: [](ElementCount VF) { return VF.isScalable(); }); |
4028 | } |
4029 | |
4030 | /// Returns an iterator range over all VFs of the plan. |
4031 | iterator_range<SmallSetVector<ElementCount, 2>::iterator> |
4032 | vectorFactors() const { |
4033 | return {VFs.begin(), VFs.end()}; |
4034 | } |
4035 | |
4036 | bool hasScalarVFOnly() const { |
4037 | bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar(); |
4038 | assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) && |
4039 | "Plan with scalar VF should only have a single VF" ); |
4040 | return HasScalarVFOnly; |
4041 | } |
4042 | |
4043 | bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(key: UF); } |
4044 | |
4045 | unsigned getUF() const { |
4046 | assert(UFs.size() == 1 && "Expected a single UF" ); |
4047 | return UFs[0]; |
4048 | } |
4049 | |
4050 | void setUF(unsigned UF) { |
4051 | assert(hasUF(UF) && "Cannot set the UF not already in plan" ); |
4052 | UFs.clear(); |
4053 | UFs.insert(X: UF); |
4054 | } |
4055 | |
4056 | /// Returns true if the VPlan already has been unrolled, i.e. it has a single |
4057 | /// concrete UF. |
4058 | bool isUnrolled() const { return UFs.size() == 1; } |
4059 | |
4060 | /// Return a string with the name of the plan and the applicable VFs and UFs. |
4061 | std::string getName() const; |
4062 | |
4063 | void setName(const Twine &newName) { Name = newName.str(); } |
4064 | |
4065 | /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists |
4066 | /// yet) for \p V. |
4067 | VPValue *getOrAddLiveIn(Value *V) { |
4068 | assert(V && "Trying to get or add the VPValue of a null Value" ); |
4069 | auto [It, Inserted] = Value2VPValue.try_emplace(Key: V); |
4070 | if (Inserted) { |
4071 | VPValue *VPV = new VPValue(V); |
4072 | VPLiveIns.push_back(Elt: VPV); |
4073 | assert(VPV->isLiveIn() && "VPV must be a live-in." ); |
4074 | It->second = VPV; |
4075 | } |
4076 | |
4077 | assert(It->second->isLiveIn() && "Only live-ins should be in mapping" ); |
4078 | return It->second; |
4079 | } |
4080 | |
4081 | /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise. |
4082 | VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(Val: V); } |
4083 | |
4084 | /// Return the list of live-in VPValues available in the VPlan. |
4085 | ArrayRef<VPValue *> getLiveIns() const { |
4086 | assert(all_of(Value2VPValue, |
4087 | [this](const auto &P) { |
4088 | return is_contained(VPLiveIns, P.second); |
4089 | }) && |
4090 | "all VPValues in Value2VPValue must also be in VPLiveIns" ); |
4091 | return VPLiveIns; |
4092 | } |
4093 | |
4094 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
4095 | /// Print the live-ins of this VPlan to \p O. |
4096 | void printLiveIns(raw_ostream &O) const; |
4097 | |
4098 | /// Print this VPlan to \p O. |
4099 | void print(raw_ostream &O) const; |
4100 | |
4101 | /// Print this VPlan in DOT format to \p O. |
4102 | void printDOT(raw_ostream &O) const; |
4103 | |
4104 | /// Dump the plan to stderr (for debugging). |
4105 | LLVM_DUMP_METHOD void dump() const; |
4106 | #endif |
4107 | |
4108 | /// Returns the canonical induction recipe of the vector loop. |
4109 | VPCanonicalIVPHIRecipe *getCanonicalIV() { |
4110 | VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock(); |
4111 | if (EntryVPBB->empty()) { |
4112 | // VPlan native path. |
4113 | EntryVPBB = cast<VPBasicBlock>(Val: EntryVPBB->getSingleSuccessor()); |
4114 | } |
4115 | return cast<VPCanonicalIVPHIRecipe>(Val: &*EntryVPBB->begin()); |
4116 | } |
4117 | |
4118 | VPValue *getSCEVExpansion(const SCEV *S) const { |
4119 | return SCEVToExpansion.lookup(Val: S); |
4120 | } |
4121 | |
4122 | void addSCEVExpansion(const SCEV *S, VPValue *V) { |
4123 | assert(!SCEVToExpansion.contains(S) && "SCEV already expanded" ); |
4124 | SCEVToExpansion[S] = V; |
4125 | } |
4126 | |
4127 | /// Clone the current VPlan, update all VPValues of the new VPlan and cloned |
4128 | /// recipes to refer to the clones, and return it. |
4129 | VPlan *duplicate(); |
4130 | |
4131 | /// Create a new VPBasicBlock with \p Name and containing \p Recipe if |
4132 | /// present. The returned block is owned by the VPlan and deleted once the |
4133 | /// VPlan is destroyed. |
4134 | VPBasicBlock *createVPBasicBlock(const Twine &Name, |
4135 | VPRecipeBase *Recipe = nullptr) { |
4136 | auto *VPB = new VPBasicBlock(Name, Recipe); |
4137 | CreatedBlocks.push_back(Elt: VPB); |
4138 | return VPB; |
4139 | } |
4140 | |
4141 | /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p |
4142 | /// IsReplicator is true, the region is a replicate region. The returned block |
4143 | /// is owned by the VPlan and deleted once the VPlan is destroyed. |
4144 | VPRegionBlock *createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, |
4145 | const std::string &Name = "" , |
4146 | bool IsReplicator = false) { |
4147 | auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator); |
4148 | CreatedBlocks.push_back(Elt: VPB); |
4149 | return VPB; |
4150 | } |
4151 | |
4152 | /// Create a new VPRegionBlock with \p Name and entry and exiting blocks set |
4153 | /// to nullptr. If \p IsReplicator is true, the region is a replicate region. |
4154 | /// The returned block is owned by the VPlan and deleted once the VPlan is |
4155 | /// destroyed. |
4156 | VPRegionBlock *createVPRegionBlock(const std::string &Name = "" , |
4157 | bool IsReplicator = false) { |
4158 | auto *VPB = new VPRegionBlock(Name, IsReplicator); |
4159 | CreatedBlocks.push_back(Elt: VPB); |
4160 | return VPB; |
4161 | } |
4162 | |
4163 | /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create |
4164 | /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned |
4165 | /// block is owned by the VPlan and deleted once the VPlan is destroyed. |
4166 | VPIRBasicBlock *createEmptyVPIRBasicBlock(BasicBlock *IRBB); |
4167 | |
4168 | /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all |
4169 | /// instructions in \p IRBB, except its terminator which is managed by the |
4170 | /// successors of the block in VPlan. The returned block is owned by the VPlan |
4171 | /// and deleted once the VPlan is destroyed. |
4172 | VPIRBasicBlock *createVPIRBasicBlock(BasicBlock *IRBB); |
4173 | |
4174 | /// Returns true if the VPlan is based on a loop with an early exit. That is |
4175 | /// the case if the VPlan has either more than one exit block or a single exit |
4176 | /// block with multiple predecessors (one for the exit via the latch and one |
4177 | /// via the other early exit). |
4178 | bool hasEarlyExit() const { |
4179 | return ExitBlocks.size() > 1 || ExitBlocks[0]->getNumPredecessors() > 1; |
4180 | } |
4181 | |
4182 | /// Returns true if the scalar tail may execute after the vector loop. Note |
4183 | /// that this relies on unneeded branches to the scalar tail loop being |
4184 | /// removed. |
4185 | bool hasScalarTail() const { |
4186 | return !(getScalarPreheader()->getNumPredecessors() == 0 || |
4187 | getScalarPreheader()->getSinglePredecessor() == getEntry()); |
4188 | } |
4189 | }; |
4190 | |
4191 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
4192 | inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) { |
4193 | Plan.print(OS); |
4194 | return OS; |
4195 | } |
4196 | #endif |
4197 | |
4198 | } // end namespace llvm |
4199 | |
4200 | #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H |
4201 | |