1//===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides utility VPlan to VPlan transformations.
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
14#define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
15
16#include "VPlan.h"
17#include "VPlanVerifier.h"
18#include "llvm/ADT/STLFunctionalExtras.h"
19#include "llvm/Support/CommandLine.h"
20
21namespace llvm {
22
23class InductionDescriptor;
24class Instruction;
25class PHINode;
26class ScalarEvolution;
27class PredicatedScalarEvolution;
28class TargetLibraryInfo;
29class VPBuilder;
30class VPRecipeBuilder;
31struct VFRange;
32
33extern cl::opt<bool> VerifyEachVPlan;
34
35struct VPlanTransforms {
36 /// Helper to run a VPlan transform \p Transform on \p VPlan, forwarding extra
37 /// arguments to the transform. Returns the boolean returned by the transform.
38 template <typename... ArgsTy>
39 static bool runPass(bool (*Transform)(VPlan &, ArgsTy...), VPlan &Plan,
40 typename std::remove_reference<ArgsTy>::type &...Args) {
41 bool Res = Transform(Plan, Args...);
42 if (VerifyEachVPlan)
43 verifyVPlanIsValid(Plan);
44 return Res;
45 }
46 /// Helper to run a VPlan transform \p Transform on \p VPlan, forwarding extra
47 /// arguments to the transform.
48 template <typename... ArgsTy>
49 static void runPass(void (*Fn)(VPlan &, ArgsTy...), VPlan &Plan,
50 typename std::remove_reference<ArgsTy>::type &...Args) {
51 Fn(Plan, Args...);
52 if (VerifyEachVPlan)
53 verifyVPlanIsValid(Plan);
54 }
55
56 static std::unique_ptr<VPlan> buildPlainCFG(Loop *TheLoop, LoopInfo &LI);
57
58 /// Prepare the plan for vectorization. It will introduce a dedicated
59 /// VPBasicBlock for the vector pre-header as well as a VPBasicBlock as exit
60 /// block of the main vector loop (middle.block). If a check is needed to
61 /// guard executing the scalar epilogue loop, it will be added to the middle
62 /// block, together with VPBasicBlocks for the scalar preheader and exit
63 /// blocks. \p InductionTy is the type of the canonical induction and used for
64 /// related values, like the trip count expression. It also creates a VPValue
65 /// expression for the original trip count.
66 static void prepareForVectorization(VPlan &Plan, Type *InductionTy,
67 PredicatedScalarEvolution &PSE,
68 bool RequiresScalarEpilogueCheck,
69 bool TailFolded, Loop *TheLoop,
70 DebugLoc IVDL, bool HasUncountableExit,
71 VFRange &Range);
72
73 /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
74 /// flat CFG into a hierarchical CFG.
75 static void createLoopRegions(VPlan &Plan);
76
77 /// Replaces the VPInstructions in \p Plan with corresponding
78 /// widen recipes. Returns false if any VPInstructions could not be converted
79 /// to a wide recipe if needed.
80 static bool tryToConvertVPInstructionsToVPRecipes(
81 VPlanPtr &Plan,
82 function_ref<const InductionDescriptor *(PHINode *)>
83 GetIntOrFpInductionDescriptor,
84 ScalarEvolution &SE, const TargetLibraryInfo &TLI);
85
86 /// Try to have all users of fixed-order recurrences appear after the recipe
87 /// defining their previous value, by either sinking users or hoisting recipes
88 /// defining their previous value (and its operands). Then introduce
89 /// FirstOrderRecurrenceSplice VPInstructions to combine the value from the
90 /// recurrence phis and previous values.
91 /// \returns true if all users of fixed-order recurrences could be re-arranged
92 /// as needed or false if it is not possible. In the latter case, \p Plan is
93 /// not valid.
94 static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
95
96 /// Clear NSW/NUW flags from reduction instructions if necessary.
97 static void clearReductionWrapFlags(VPlan &Plan);
98
99 /// Explicitly unroll \p Plan by \p UF.
100 static void unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx);
101
102 /// Replace each VPReplicateRecipe outside on any replicate region in \p Plan
103 /// with \p VF single-scalar recipes.
104 /// TODO: Also replicate VPReplicateRecipes inside replicate regions, thereby
105 /// dissolving the latter.
106 static void replicateByVF(VPlan &Plan, ElementCount VF);
107
108 /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
109 /// resulting plan to \p BestVF and \p BestUF.
110 static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
111 unsigned BestUF,
112 PredicatedScalarEvolution &PSE);
113
114 /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
115 /// optimizations, dead recipe removal, replicate region optimizations and
116 /// block merging.
117 static void optimize(VPlan &Plan);
118
119 /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
120 /// region block and remove the mask operand. Optimize the created regions by
121 /// iteratively sinking scalar operands into the region, followed by merging
122 /// regions until no improvements are remaining.
123 static void createAndOptimizeReplicateRegions(VPlan &Plan);
124
125 /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an
126 /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p
127 /// UseActiveLaneMaskForControlFlow is true, introduce an
128 /// VPActiveLaneMaskPHIRecipe. If \p DataAndControlFlowWithoutRuntimeCheck is
129 /// true, no minimum-iteration runtime check will be created (during skeleton
130 /// creation) and instead it is handled using active-lane-mask. \p
131 /// DataAndControlFlowWithoutRuntimeCheck implies \p
132 /// UseActiveLaneMaskForControlFlow.
133 static void addActiveLaneMask(VPlan &Plan,
134 bool UseActiveLaneMaskForControlFlow,
135 bool DataAndControlFlowWithoutRuntimeCheck);
136
137 /// Insert truncates and extends for any truncated recipe. Redundant casts
138 /// will be folded later.
139 static void
140 truncateToMinimalBitwidths(VPlan &Plan,
141 const MapVector<Instruction *, uint64_t> &MinBWs);
142
143 /// Drop poison flags from recipes that may generate a poison value that is
144 /// used after vectorization, even when their operands are not poison. Those
145 /// recipes meet the following conditions:
146 /// * Contribute to the address computation of a recipe generating a widen
147 /// memory load/store (VPWidenMemoryInstructionRecipe or
148 /// VPInterleaveRecipe).
149 /// * Such a widen memory load/store has at least one underlying Instruction
150 /// that is in a basic block that needs predication and after vectorization
151 /// the generated instruction won't be predicated.
152 /// Uses \p BlockNeedsPredication to check if a block needs predicating.
153 /// TODO: Replace BlockNeedsPredication callback with retrieving info from
154 /// VPlan directly.
155 static void dropPoisonGeneratingRecipes(
156 VPlan &Plan,
157 const std::function<bool(BasicBlock *)> &BlockNeedsPredication);
158
159 /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
160 /// replaces all uses except the canonical IV increment of
161 /// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe.
162 /// VPCanonicalIVPHIRecipe is only used to control the loop after
163 /// this transformation.
164 /// \returns true if the transformation succeeds, or false if it doesn't.
165 static bool
166 tryAddExplicitVectorLength(VPlan &Plan,
167 const std::optional<unsigned> &MaxEVLSafeElements);
168
169 // For each Interleave Group in \p InterleaveGroups replace the Recipes
170 // widening its memory instructions with a single VPInterleaveRecipe at its
171 // insertion point.
172 static void createInterleaveGroups(
173 VPlan &Plan,
174 const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
175 &InterleaveGroups,
176 VPRecipeBuilder &RecipeBuilder, const bool &ScalarEpilogueAllowed);
177
178 /// Remove dead recipes from \p Plan.
179 static void removeDeadRecipes(VPlan &Plan);
180
181 /// Update \p Plan to account for the uncountable early exit from \p
182 /// EarlyExitingVPBB to \p EarlyExitVPBB by
183 /// * updating the condition exiting the loop via the latch to include the
184 /// early exit condition,
185 /// * splitting the original middle block to branch to the early exit block
186 /// conditionally - according to the early exit condition.
187 static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
188 VPBasicBlock *EarlyExitVPBB,
189 VPlan &Plan, VPBasicBlock *HeaderVPBB,
190 VPBasicBlock *LatchVPBB,
191 VFRange &Range);
192
193 /// Replace loop regions with explicit CFG.
194 static void dissolveLoopRegions(VPlan &Plan);
195
196 /// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
197 /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
198 static void convertToConcreteRecipes(VPlan &Plan, Type &CanonicalIVTy);
199
200 /// This function converts initial recipes to the abstract recipes and clamps
201 /// \p Range based on cost model for following optimizations and cost
202 /// estimations. The converted abstract recipes will lower to concrete
203 /// recipes before codegen.
204 static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx,
205 VFRange &Range);
206
207 /// Perform instcombine-like simplifications on recipes in \p Plan. Use \p
208 /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
209 static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
210
211 /// If there's a single exit block, optimize its phi recipes that use exiting
212 /// IV values by feeding them precomputed end values instead, possibly taken
213 /// one step backwards.
214 static void
215 optimizeInductionExitUsers(VPlan &Plan,
216 DenseMap<VPValue *, VPValue *> &EndValues);
217
218 /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
219 static void materializeBroadcasts(VPlan &Plan);
220
221 /// Try to convert a plan with interleave groups with VF elements to a plan
222 /// with the interleave groups replaced by wide loads and stores processing VF
223 /// elements, if all transformed interleave groups access the full vector
224 /// width (checked via \o VectorRegWidth). This effectively is a very simple
225 /// form of loop-aware SLP, where we use interleave groups to identify
226 /// candidates.
227 static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
228 unsigned VectorRegWidth);
229
230 /// Predicate and linearize the control-flow in the only loop region of
231 /// \p Plan. If \p FoldTail is true, create a mask guarding the loop
232 /// header, otherwise use all-true for the header mask. Masks for blocks are
233 /// added to a block-to-mask map which is returned in order to be used later
234 /// for wide recipe construction. This argument is temporary and will be
235 /// removed in the future.
236 static DenseMap<VPBasicBlock *, VPValue *>
237 introduceMasksAndLinearize(VPlan &Plan, bool FoldTail);
238
239 /// Add branch weight metadata, if the \p Plan's middle block is terminated by
240 /// a BranchOnCond recipe.
241 static void
242 addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
243 std::optional<unsigned> VScaleForTuning);
244};
245
246} // namespace llvm
247
248#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
249