1 | //===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file provides utility VPlan to VPlan transformations. |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H |
14 | #define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H |
15 | |
16 | #include "VPlan.h" |
17 | #include "VPlanVerifier.h" |
18 | #include "llvm/ADT/STLFunctionalExtras.h" |
19 | #include "llvm/Support/CommandLine.h" |
20 | |
21 | namespace llvm { |
22 | |
23 | class InductionDescriptor; |
24 | class Instruction; |
25 | class PHINode; |
26 | class ScalarEvolution; |
27 | class PredicatedScalarEvolution; |
28 | class TargetLibraryInfo; |
29 | class VPBuilder; |
30 | class VPRecipeBuilder; |
31 | struct VFRange; |
32 | |
33 | extern cl::opt<bool> VerifyEachVPlan; |
34 | |
35 | struct VPlanTransforms { |
36 | /// Helper to run a VPlan transform \p Transform on \p VPlan, forwarding extra |
37 | /// arguments to the transform. Returns the boolean returned by the transform. |
38 | template <typename... ArgsTy> |
39 | static bool runPass(bool (*Transform)(VPlan &, ArgsTy...), VPlan &Plan, |
40 | typename std::remove_reference<ArgsTy>::type &...Args) { |
41 | bool Res = Transform(Plan, Args...); |
42 | if (VerifyEachVPlan) |
43 | verifyVPlanIsValid(Plan); |
44 | return Res; |
45 | } |
46 | /// Helper to run a VPlan transform \p Transform on \p VPlan, forwarding extra |
47 | /// arguments to the transform. |
48 | template <typename... ArgsTy> |
49 | static void runPass(void (*Fn)(VPlan &, ArgsTy...), VPlan &Plan, |
50 | typename std::remove_reference<ArgsTy>::type &...Args) { |
51 | Fn(Plan, Args...); |
52 | if (VerifyEachVPlan) |
53 | verifyVPlanIsValid(Plan); |
54 | } |
55 | |
56 | static std::unique_ptr<VPlan> buildPlainCFG(Loop *TheLoop, LoopInfo &LI); |
57 | |
58 | /// Prepare the plan for vectorization. It will introduce a dedicated |
59 | /// VPBasicBlock for the vector pre-header as well as a VPBasicBlock as exit |
60 | /// block of the main vector loop (middle.block). If a check is needed to |
61 | /// guard executing the scalar epilogue loop, it will be added to the middle |
62 | /// block, together with VPBasicBlocks for the scalar preheader and exit |
63 | /// blocks. \p InductionTy is the type of the canonical induction and used for |
64 | /// related values, like the trip count expression. It also creates a VPValue |
65 | /// expression for the original trip count. |
66 | static void prepareForVectorization(VPlan &Plan, Type *InductionTy, |
67 | PredicatedScalarEvolution &PSE, |
68 | bool RequiresScalarEpilogueCheck, |
69 | bool TailFolded, Loop *TheLoop, |
70 | DebugLoc IVDL, bool HasUncountableExit, |
71 | VFRange &Range); |
72 | |
73 | /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's |
74 | /// flat CFG into a hierarchical CFG. |
75 | static void createLoopRegions(VPlan &Plan); |
76 | |
77 | /// Replaces the VPInstructions in \p Plan with corresponding |
78 | /// widen recipes. Returns false if any VPInstructions could not be converted |
79 | /// to a wide recipe if needed. |
80 | static bool tryToConvertVPInstructionsToVPRecipes( |
81 | VPlanPtr &Plan, |
82 | function_ref<const InductionDescriptor *(PHINode *)> |
83 | GetIntOrFpInductionDescriptor, |
84 | ScalarEvolution &SE, const TargetLibraryInfo &TLI); |
85 | |
86 | /// Try to have all users of fixed-order recurrences appear after the recipe |
87 | /// defining their previous value, by either sinking users or hoisting recipes |
88 | /// defining their previous value (and its operands). Then introduce |
89 | /// FirstOrderRecurrenceSplice VPInstructions to combine the value from the |
90 | /// recurrence phis and previous values. |
91 | /// \returns true if all users of fixed-order recurrences could be re-arranged |
92 | /// as needed or false if it is not possible. In the latter case, \p Plan is |
93 | /// not valid. |
94 | static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder); |
95 | |
96 | /// Clear NSW/NUW flags from reduction instructions if necessary. |
97 | static void clearReductionWrapFlags(VPlan &Plan); |
98 | |
99 | /// Explicitly unroll \p Plan by \p UF. |
100 | static void unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx); |
101 | |
102 | /// Replace each VPReplicateRecipe outside on any replicate region in \p Plan |
103 | /// with \p VF single-scalar recipes. |
104 | /// TODO: Also replicate VPReplicateRecipes inside replicate regions, thereby |
105 | /// dissolving the latter. |
106 | static void replicateByVF(VPlan &Plan, ElementCount VF); |
107 | |
108 | /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the |
109 | /// resulting plan to \p BestVF and \p BestUF. |
110 | static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, |
111 | unsigned BestUF, |
112 | PredicatedScalarEvolution &PSE); |
113 | |
114 | /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe |
115 | /// optimizations, dead recipe removal, replicate region optimizations and |
116 | /// block merging. |
117 | static void optimize(VPlan &Plan); |
118 | |
119 | /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then |
120 | /// region block and remove the mask operand. Optimize the created regions by |
121 | /// iteratively sinking scalar operands into the region, followed by merging |
122 | /// regions until no improvements are remaining. |
123 | static void createAndOptimizeReplicateRegions(VPlan &Plan); |
124 | |
125 | /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an |
126 | /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p |
127 | /// UseActiveLaneMaskForControlFlow is true, introduce an |
128 | /// VPActiveLaneMaskPHIRecipe. If \p DataAndControlFlowWithoutRuntimeCheck is |
129 | /// true, no minimum-iteration runtime check will be created (during skeleton |
130 | /// creation) and instead it is handled using active-lane-mask. \p |
131 | /// DataAndControlFlowWithoutRuntimeCheck implies \p |
132 | /// UseActiveLaneMaskForControlFlow. |
133 | static void addActiveLaneMask(VPlan &Plan, |
134 | bool UseActiveLaneMaskForControlFlow, |
135 | bool DataAndControlFlowWithoutRuntimeCheck); |
136 | |
137 | /// Insert truncates and extends for any truncated recipe. Redundant casts |
138 | /// will be folded later. |
139 | static void |
140 | truncateToMinimalBitwidths(VPlan &Plan, |
141 | const MapVector<Instruction *, uint64_t> &MinBWs); |
142 | |
143 | /// Drop poison flags from recipes that may generate a poison value that is |
144 | /// used after vectorization, even when their operands are not poison. Those |
145 | /// recipes meet the following conditions: |
146 | /// * Contribute to the address computation of a recipe generating a widen |
147 | /// memory load/store (VPWidenMemoryInstructionRecipe or |
148 | /// VPInterleaveRecipe). |
149 | /// * Such a widen memory load/store has at least one underlying Instruction |
150 | /// that is in a basic block that needs predication and after vectorization |
151 | /// the generated instruction won't be predicated. |
152 | /// Uses \p BlockNeedsPredication to check if a block needs predicating. |
153 | /// TODO: Replace BlockNeedsPredication callback with retrieving info from |
154 | /// VPlan directly. |
155 | static void dropPoisonGeneratingRecipes( |
156 | VPlan &Plan, |
157 | const std::function<bool(BasicBlock *)> &BlockNeedsPredication); |
158 | |
159 | /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and |
160 | /// replaces all uses except the canonical IV increment of |
161 | /// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. |
162 | /// VPCanonicalIVPHIRecipe is only used to control the loop after |
163 | /// this transformation. |
164 | /// \returns true if the transformation succeeds, or false if it doesn't. |
165 | static bool |
166 | tryAddExplicitVectorLength(VPlan &Plan, |
167 | const std::optional<unsigned> &MaxEVLSafeElements); |
168 | |
169 | // For each Interleave Group in \p InterleaveGroups replace the Recipes |
170 | // widening its memory instructions with a single VPInterleaveRecipe at its |
171 | // insertion point. |
172 | static void createInterleaveGroups( |
173 | VPlan &Plan, |
174 | const SmallPtrSetImpl<const InterleaveGroup<Instruction> *> |
175 | &InterleaveGroups, |
176 | VPRecipeBuilder &RecipeBuilder, const bool &ScalarEpilogueAllowed); |
177 | |
178 | /// Remove dead recipes from \p Plan. |
179 | static void removeDeadRecipes(VPlan &Plan); |
180 | |
181 | /// Update \p Plan to account for the uncountable early exit from \p |
182 | /// EarlyExitingVPBB to \p EarlyExitVPBB by |
183 | /// * updating the condition exiting the loop via the latch to include the |
184 | /// early exit condition, |
185 | /// * splitting the original middle block to branch to the early exit block |
186 | /// conditionally - according to the early exit condition. |
187 | static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, |
188 | VPBasicBlock *EarlyExitVPBB, |
189 | VPlan &Plan, VPBasicBlock *, |
190 | VPBasicBlock *LatchVPBB, |
191 | VFRange &Range); |
192 | |
193 | /// Replace loop regions with explicit CFG. |
194 | static void dissolveLoopRegions(VPlan &Plan); |
195 | |
196 | /// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p |
197 | /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis. |
198 | static void convertToConcreteRecipes(VPlan &Plan, Type &CanonicalIVTy); |
199 | |
200 | /// This function converts initial recipes to the abstract recipes and clamps |
201 | /// \p Range based on cost model for following optimizations and cost |
202 | /// estimations. The converted abstract recipes will lower to concrete |
203 | /// recipes before codegen. |
204 | static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, |
205 | VFRange &Range); |
206 | |
207 | /// Perform instcombine-like simplifications on recipes in \p Plan. Use \p |
208 | /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis. |
209 | static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy); |
210 | |
211 | /// If there's a single exit block, optimize its phi recipes that use exiting |
212 | /// IV values by feeding them precomputed end values instead, possibly taken |
213 | /// one step backwards. |
214 | static void |
215 | optimizeInductionExitUsers(VPlan &Plan, |
216 | DenseMap<VPValue *, VPValue *> &EndValues); |
217 | |
218 | /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors. |
219 | static void materializeBroadcasts(VPlan &Plan); |
220 | |
221 | /// Try to convert a plan with interleave groups with VF elements to a plan |
222 | /// with the interleave groups replaced by wide loads and stores processing VF |
223 | /// elements, if all transformed interleave groups access the full vector |
224 | /// width (checked via \o VectorRegWidth). This effectively is a very simple |
225 | /// form of loop-aware SLP, where we use interleave groups to identify |
226 | /// candidates. |
227 | static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF, |
228 | unsigned VectorRegWidth); |
229 | |
230 | /// Predicate and linearize the control-flow in the only loop region of |
231 | /// \p Plan. If \p FoldTail is true, create a mask guarding the loop |
232 | /// header, otherwise use all-true for the header mask. Masks for blocks are |
233 | /// added to a block-to-mask map which is returned in order to be used later |
234 | /// for wide recipe construction. This argument is temporary and will be |
235 | /// removed in the future. |
236 | static DenseMap<VPBasicBlock *, VPValue *> |
237 | introduceMasksAndLinearize(VPlan &Plan, bool FoldTail); |
238 | |
239 | /// Add branch weight metadata, if the \p Plan's middle block is terminated by |
240 | /// a BranchOnCond recipe. |
241 | static void |
242 | addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF, |
243 | std::optional<unsigned> VScaleForTuning); |
244 | }; |
245 | |
246 | } // namespace llvm |
247 | |
248 | #endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H |
249 | |