1//===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides utility VPlan to VPlan transformations.
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
14#define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
15
16#include "VPlan.h"
17#include "VPlanVerifier.h"
18#include "llvm/ADT/STLFunctionalExtras.h"
19#include "llvm/ADT/ScopeExit.h"
20#include "llvm/Support/CommandLine.h"
21#include "llvm/Support/Compiler.h"
22#include "llvm/Support/Regex.h"
23
24namespace llvm {
25
26class InductionDescriptor;
27class Instruction;
28class Loop;
29class LoopVersioning;
30class OptimizationRemarkEmitter;
31class PHINode;
32class ScalarEvolution;
33class PredicatedScalarEvolution;
34class TargetLibraryInfo;
35class TargetTransformInfo;
36class VPBuilder;
37class VPRecipeBuilder;
38struct VFRange;
39
40LLVM_ABI_FOR_TEST extern cl::opt<bool> VerifyEachVPlan;
41LLVM_ABI_FOR_TEST extern cl::opt<bool> EnableWideActiveLaneMask;
42
43#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
44LLVM_ABI_FOR_TEST extern cl::opt<bool> VPlanPrintAfterAll;
45LLVM_ABI_FOR_TEST extern cl::list<std::string> VPlanPrintAfterPasses;
46LLVM_ABI_FOR_TEST extern cl::opt<bool> VPlanPrintVectorRegionScope;
47#endif
48
49struct VPlanTransforms {
50 /// Helper to run a VPlan pass \p Pass on \p VPlan, forwarding extra arguments
51 /// to the pass. Performs verification/printing after each VPlan pass if
52 /// requested via command line options.
53 template <bool EnableVerify = true, typename PassTy, typename... ArgsTy>
54 static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan,
55 ArgsTy &&...Args) {
56 scope_exit PostTransformActions{[&]() {
57#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
58 // Make sure to print before verification, so that output is more useful
59 // in case of failures:
60 if (VPlanPrintAfterAll ||
61 (VPlanPrintAfterPasses.getNumOccurrences() > 0 &&
62 any_of(VPlanPrintAfterPasses, [PassName](StringRef Entry) {
63 return Regex(Entry).match(PassName);
64 }))) {
65 dbgs()
66 << "VPlan for loop in '"
67 << Plan.getScalarHeader()->getIRBasicBlock()->getParent()->getName()
68 << "' after " << PassName << '\n';
69 if (VPlanPrintVectorRegionScope && Plan.getVectorLoopRegion())
70 Plan.getVectorLoopRegion()->print(dbgs());
71 else
72 dbgs() << Plan << '\n';
73 }
74#endif
75 if (VerifyEachVPlan && EnableVerify) {
76 if (!verifyVPlanIsValid(Plan))
77 report_fatal_error(reason: "Broken VPlan found, compilation aborted!");
78 }
79 }};
80
81 return std::forward<PassTy>(Pass)(Plan, std::forward<ArgsTy>(Args)...);
82 }
83#define RUN_VPLAN_PASS(PASS, ...) \
84 llvm::VPlanTransforms::runPass(#PASS, PASS, __VA_ARGS__)
85#define RUN_VPLAN_PASS_NO_VERIFY(PASS, ...) \
86 llvm::VPlanTransforms::runPass<false>(#PASS, PASS, __VA_ARGS__)
87
88 /// Create a base VPlan0, serving as the common starting point for all later
89 /// candidates. It consists of an initial plain CFG loop with loop blocks from
90 /// \p TheLoop being directly translated to VPBasicBlocks with VPInstruction
91 /// corresponding to the input IR.
92 ///
93 /// The created loop is wrapped in an initial skeleton to facilitate
94 /// vectorization, consisting of a vector pre-header, an exit block for the
95 /// main vector loop (middle.block) and a new block as preheader of the scalar
96 /// loop (scalar.ph). See below for an illustration. It also adds a canonical
97 /// IV and its increment, using \p InductionTy and \p IVDL, and creates a
98 /// VPValue expression for the original trip count.
99 ///
100 /// [ ] <-- Plan's entry VPIRBasicBlock, wrapping the original loop's
101 /// / \ old preheader. Will contain iteration number check and SCEV
102 /// | | expansions.
103 /// | |
104 /// / v
105 /// | [ ] <-- vector loop bypass (may consist of multiple blocks) will be
106 /// | / | added later.
107 /// | / v
108 /// || [ ] <-- vector pre header.
109 /// |/ |
110 /// | v
111 /// | [ ] \ <-- plain CFG loop wrapping original loop to be vectorized.
112 /// | [ ]_|
113 /// | |
114 /// | v
115 /// | [ ] <--- middle-block with the branch to successors
116 /// | / |
117 /// | / |
118 /// | | v
119 /// \--->[ ] <--- scalar preheader (initial a VPBasicBlock, which will be
120 /// | | replaced later by a VPIRBasicBlock wrapping the scalar
121 /// | | preheader basic block.
122 /// | |
123 /// v <-- edge from middle to exit iff epilogue is not required.
124 /// | [ ] \
125 /// | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue,
126 /// | | header wrapped in VPIRBasicBlock).
127 /// \ |
128 /// \ v
129 /// >[ ] <-- original loop exit block(s), wrapped in VPIRBasicBlocks.
130 LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan>
131 buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL,
132 PredicatedScalarEvolution &PSE, LoopVersioning *LVer = nullptr);
133
134 /// Replace VPPhi recipes in \p Plan's header with corresponding
135 /// VPHeaderPHIRecipe subclasses for inductions, reductions, and
136 /// fixed-order recurrences. This processes all header phis and creates
137 /// the appropriate widened recipe for each one.
138 static void createHeaderPhiRecipes(
139 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
140 const MapVector<PHINode *, InductionDescriptor> &Inductions,
141 const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
142 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
143 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering);
144
145 /// Create VPReductionRecipes for in-loop reductions. This processes chains
146 /// of operations contributing to in-loop reductions and creates appropriate
147 /// VPReductionRecipe instances.
148 static void createInLoopReductionRecipes(
149 VPlan &Plan, const DenseSet<BasicBlock *> &BlocksNeedingPredication,
150 ElementCount MinVF);
151
152 /// Update \p Plan to account for all early exits. If \p Style is not
153 /// NoUncountableExit, handles uncountable early exits and checks that all
154 /// loads are dereferenceable. Returns false if a non-dereferenceable load is
155 /// found.
156 LLVM_ABI_FOR_TEST static bool
157 handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop,
158 PredicatedScalarEvolution &PSE, DominatorTree &DT,
159 AssumptionCache *AC);
160
161 /// If a check is needed to guard executing the scalar epilogue loop, it will
162 /// be added to the middle block.
163 LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan, bool TailFolded);
164
165 // Create a check to \p Plan to see if the vector loop should be executed.
166 // If \p CheckBlock is non-null, the compare and branch are placed there;
167 // ExpandSCEV recipes are always placed in Entry.
168 static void addMinimumIterationCheck(
169 VPlan &Plan, ElementCount VF, unsigned UF,
170 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
171 bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
172 DebugLoc DL, PredicatedScalarEvolution &PSE,
173 VPBasicBlock *CheckBlock = nullptr);
174
175 /// Add a new check block before the vector preheader to \p Plan to check if
176 /// the main vector loop should be executed (TC >= VF * UF).
177 static void
178 addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF,
179 bool RequiresScalarEpilogue, Loop *OrigLoop,
180 const uint32_t *MinItersBypassWeights,
181 DebugLoc DL, PredicatedScalarEvolution &PSE);
182
183 /// Add a check to \p Plan to see if the epilogue vector loop should be
184 /// executed.
185 static void addMinimumVectorEpilogueIterationCheck(
186 VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue,
187 ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep,
188 unsigned EpilogueLoopStep, ScalarEvolution &SE);
189
190 /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
191 /// flat CFG into a hierarchical CFG.
192 LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
193
194 /// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
195 /// VPValue and connect the block to \p Plan, using the VPValue as branch
196 /// condition.
197 static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock,
198 bool AddBranchWeights);
199
200 /// Replaces the VPInstructions in \p Plan with corresponding
201 /// widen recipes. Returns false if any VPInstructions could not be converted
202 /// to a wide recipe if needed.
203 LLVM_ABI_FOR_TEST static bool
204 tryToConvertVPInstructionsToVPRecipes(VPlan &Plan,
205 const TargetLibraryInfo &TLI);
206
207 /// Try to legalize reductions with multiple in-loop uses. Currently only
208 /// strict and non-strict min/max reductions used by FindLastIV reductions are
209 /// supported, corresponding to computing the first and last argmin/argmax,
210 /// respectively. Otherwise return false.
211 static bool handleMultiUseReductions(VPlan &Plan,
212 OptimizationRemarkEmitter *ORE,
213 Loop *TheLoop);
214
215 /// Try to have all users of fixed-order recurrences appear after the recipe
216 /// defining their previous value, by either sinking users or hoisting recipes
217 /// defining their previous value (and its operands). Then introduce
218 /// FirstOrderRecurrenceSplice VPInstructions to combine the value from the
219 /// recurrence phis and previous values.
220 /// \returns true if all users of fixed-order recurrences could be re-arranged
221 /// as needed or false if it is not possible. In the latter case, \p Plan is
222 /// not valid.
223 static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
224
225 /// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do,
226 /// try to update the vector loop to exit early if any input is NaN and resume
227 /// executing in the scalar loop to handle the NaNs there. Return false if
228 /// this attempt was unsuccessful.
229 static bool handleMaxMinNumReductions(VPlan &Plan);
230
231 /// Check if \p Plan contains any FindLast reductions. If it does, try to
232 /// update the vector loop to save the appropriate state using selects
233 /// for entire vectors for both the latest mask containing at least one active
234 /// element and the corresponding data vector. Return false if this attempt
235 /// was unsuccessful.
236 static bool handleFindLastReductions(VPlan &Plan);
237
238 /// Clear NSW/NUW flags from reduction instructions if necessary.
239 static void clearReductionWrapFlags(VPlan &Plan);
240
241 /// Explicitly unroll \p Plan by \p UF.
242 static void unrollByUF(VPlan &Plan, unsigned UF);
243
244 /// Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and
245 /// VPInstruction in \p Plan with \p VF single-scalar recipes. Replicate
246 /// regions are dissolved by replicating their blocks and their recipes \p VF
247 /// times.
248 /// TODO: Also dissolve replicate regions with live outs.
249 static void replicateByVF(VPlan &Plan, ElementCount VF);
250
251 /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
252 /// resulting plan to \p BestVF and \p BestUF.
253 static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
254 unsigned BestUF,
255 PredicatedScalarEvolution &PSE);
256
257 /// Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL
258 /// is known to be <= VF, replacing them with the AVL directly.
259 static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
260 PredicatedScalarEvolution &PSE);
261
262 /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
263 /// optimizations, dead recipe removal, replicate region optimizations and
264 /// block merging.
265 LLVM_ABI_FOR_TEST static void optimize(VPlan &Plan);
266
267 /// Remove redundant VPBasicBlocks by merging them into their single
268 /// predecessor if the latter has a single successor.
269 static bool mergeBlocksIntoPredecessors(VPlan &Plan);
270
271 /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
272 /// region block and remove the mask operand. Optimize the created regions by
273 /// iteratively sinking scalar operands into the region, followed by merging
274 /// regions until no improvements are remaining.
275 static void createAndOptimizeReplicateRegions(VPlan &Plan);
276
277 /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an
278 /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p
279 /// UseActiveLaneMaskForControlFlow is true, introduce an
280 /// VPActiveLaneMaskPHIRecipe.
281 static void addActiveLaneMask(VPlan &Plan,
282 bool UseActiveLaneMaskForControlFlow);
283
284 /// Insert truncates and extends for any truncated recipe. Redundant casts
285 /// will be folded later.
286 static void
287 truncateToMinimalBitwidths(VPlan &Plan,
288 const MapVector<Instruction *, uint64_t> &MinBWs);
289
290 /// Replace symbolic strides from \p StridesMap in \p Plan with constants when
291 /// possible.
292 static void
293 replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE,
294 const DenseMap<Value *, const SCEV *> &StridesMap);
295
296 /// Drop poison flags from recipes that may generate a poison value that is
297 /// used after vectorization, even when their operands are not poison. Those
298 /// recipes meet the following conditions:
299 /// * Contribute to the address computation of a recipe generating a widen
300 /// memory load/store (VPWidenMemoryInstructionRecipe or
301 /// VPInterleaveRecipe).
302 /// * Such a widen memory load/store has at least one underlying Instruction
303 /// that is in a basic block that needs predication and after vectorization
304 /// the generated instruction won't be predicated.
305 /// Uses \p BlockNeedsPredication to check if a block needs predicating.
306 /// TODO: Replace BlockNeedsPredication callback with retrieving info from
307 /// VPlan directly.
308 static void dropPoisonGeneratingRecipes(
309 VPlan &Plan,
310 const std::function<bool(BasicBlock *)> &BlockNeedsPredication);
311
312 /// Add a VPCurrentIterationPHIRecipe and related recipes to \p Plan and
313 /// replaces all uses except the canonical IV increment of
314 /// VPCanonicalIVPHIRecipe with a VPCurrentIterationPHIRecipe.
315 /// VPCanonicalIVPHIRecipe is only used to control the loop after
316 /// this transformation.
317 static void
318 addExplicitVectorLength(VPlan &Plan,
319 const std::optional<unsigned> &MaxEVLSafeElements);
320
321 /// Optimize recipes which use an EVL-based header mask to VP intrinsics, for
322 /// example:
323 ///
324 /// %mask = icmp ult step-vector, EVL
325 /// %load = load %ptr, %mask
326 /// -->
327 /// %load = vp.load %ptr, EVL
328 static void optimizeEVLMasks(VPlan &Plan);
329
330 // For each Interleave Group in \p InterleaveGroups replace the Recipes
331 // widening its memory instructions with a single VPInterleaveRecipe at its
332 // insertion point.
333 static void createInterleaveGroups(
334 VPlan &Plan,
335 const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
336 &InterleaveGroups,
337 VPRecipeBuilder &RecipeBuilder, const bool &ScalarEpilogueAllowed);
338
339 /// Remove dead recipes from \p Plan.
340 static void removeDeadRecipes(VPlan &Plan);
341
342 /// Update \p Plan to account for uncountable early exits by introducing
343 /// appropriate branching logic in the latch that handles early exits and the
344 /// latch exit condition. Multiple exits are handled with a dispatch block
345 /// that determines which exit to take based on lane-by-lane semantics.
346 static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB,
347 VPBasicBlock *LatchVPBB,
348 VPBasicBlock *MiddleVPBB,
349 UncountableExitStyle Style);
350
351 /// Replaces the exit condition from
352 /// (branch-on-cond eq CanonicalIVInc, VectorTripCount)
353 /// to
354 /// (branch-on-cond eq AVLNext, 0)
355 static void convertEVLExitCond(VPlan &Plan);
356
357 /// Replace loop regions with explicit CFG.
358 static void dissolveLoopRegions(VPlan &Plan);
359
360 /// Expand BranchOnTwoConds instructions into explicit CFG with
361 /// BranchOnCond instructions. Should be called after dissolveLoopRegions.
362 static void expandBranchOnTwoConds(VPlan &Plan);
363
364 /// Transform loops with variable-length stepping after region
365 /// dissolution.
366 ///
367 /// Once loop regions are replaced with explicit CFG, loops can step with
368 /// variable vector lengths instead of fixed lengths. This transformation:
369 /// * Makes CurrentIteration-Phi concrete.
370 // * Removes CanonicalIV and increment.
371 static void convertToVariableLengthStep(VPlan &Plan);
372
373 /// Lower abstract recipes to concrete ones, that can be codegen'd.
374 static void convertToConcreteRecipes(VPlan &Plan);
375
376 /// This function converts initial recipes to the abstract recipes and clamps
377 /// \p Range based on cost model for following optimizations and cost
378 /// estimations. The converted abstract recipes will lower to concrete
379 /// recipes before codegen.
380 static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx,
381 VFRange &Range);
382
383 /// Perform instcombine-like simplifications on recipes in \p Plan.
384 static void simplifyRecipes(VPlan &Plan);
385
386 /// Remove BranchOnCond recipes with true or false conditions together with
387 /// removing dead edges to their successors. If \p OnlyLatches is true, only
388 /// process loop latches.
389 static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches = false);
390
391 /// Perform common-subexpression-elimination on \p Plan.
392 static void cse(VPlan &Plan);
393
394 /// If there's a single exit block, optimize its phi recipes that use exiting
395 /// IV values by feeding them precomputed end values instead, possibly taken
396 /// one step backwards.
397 static void optimizeInductionLiveOutUsers(VPlan &Plan,
398 PredicatedScalarEvolution &PSE,
399 bool FoldTail);
400
401 /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
402 static void materializeBroadcasts(VPlan &Plan);
403
404 /// Hoist single-scalar loads with invariant addresses out of the vector loop
405 /// to the preheader, if they are proven not to alias with any stores in the
406 /// plan using noalias metadata.
407 static void hoistInvariantLoads(VPlan &Plan);
408
409 /// Hoist predicated loads from the same address to the loop entry block, if
410 /// they are guaranteed to execute on both paths (i.e., in replicate regions
411 /// with complementary masks P and NOT P).
412 static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE,
413 const Loop *L);
414
415 /// Sink predicated stores to the same address with complementary predicates
416 /// (P and NOT P) to an unconditional store with select recipes for the
417 /// stored values. This eliminates branching overhead when all paths
418 /// unconditionally store to the same location.
419 static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE,
420 const Loop *L);
421
422 // Materialize vector trip counts for constants early if it can simply be
423 // computed as (Original TC / VF * UF) * VF * UF.
424 static void
425 materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF,
426 unsigned BestUF,
427 PredicatedScalarEvolution &PSE);
428
429 /// Materialize vector trip count computations to a set of VPInstructions.
430 /// \p Step is used as the step value for the trip count computation.
431 static void materializeVectorTripCount(VPlan &Plan,
432 VPBasicBlock *VectorPHVPBB,
433 bool TailByMasking,
434 bool RequiresScalarEpilogue,
435 VPValue *Step);
436
437 /// Materialize the backedge-taken count to be computed explicitly using
438 /// VPInstructions.
439 static void materializeBackedgeTakenCount(VPlan &Plan,
440 VPBasicBlock *VectorPH);
441
442 /// Add explicit Build[Struct]Vector recipes to Pack multiple scalar values
443 /// into vectors and Unpack recipes to extract scalars from vectors as
444 /// needed.
445 static void materializePacksAndUnpacks(VPlan &Plan);
446
447 /// Materialize UF, VF and VFxUF to be computed explicitly using
448 /// VPInstructions.
449 static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH,
450 ElementCount VF);
451
452 /// Expand VPExpandSCEVRecipes in \p Plan's entry block. Each
453 /// VPExpandSCEVRecipe is replaced with a live-in wrapping the expanded IR
454 /// value. A mapping from SCEV expressions to their expanded IR value is
455 /// returned.
456 static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan,
457 ScalarEvolution &SE);
458
459 /// Try to find a single VF among \p Plan's VFs for which all interleave
460 /// groups (with known minimum VF elements) can be replaced by wide loads and
461 /// stores processing VF elements, if all transformed interleave groups access
462 /// the full vector width (checked via the maximum vector register width). If
463 /// the transformation can be applied, the original \p Plan will be split in
464 /// 2:
465 /// 1. The original Plan with the single VF containing the optimized recipes
466 /// using wide loads instead of interleave groups.
467 /// 2. A new clone which contains all VFs of Plan except the optimized VF.
468 ///
469 /// This effectively is a very simple form of loop-aware SLP, where we use
470 /// interleave groups to identify candidates.
471 static std::unique_ptr<VPlan>
472 narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI);
473
474 /// Adapts the vector loop region for tail folding by introducing a header
475 /// mask and conditionally executing the content of the region:
476 ///
477 /// Vector loop region before:
478 /// +-------------------------------------------+
479 /// |%iv = ... |
480 /// |... |
481 /// |%iv.next = add %iv, vfxuf |
482 /// |branch-on-count %iv.next, vector-trip-count|
483 /// +-------------------------------------------+
484 ///
485 /// Vector loop region after:
486 /// +-------------------------------------------+
487 /// |%iv = ... |
488 /// |%wide.iv = widen-canonical-iv ... |
489 /// |%header-mask = icmp ule %wide.iv, BTC |
490 /// |branch-on-cond %header-mask |---+
491 /// +-------------------------------------------+ |
492 /// | |
493 /// v |
494 /// +-------------------------------------------+ |
495 /// | ... | |
496 /// +-------------------------------------------+ |
497 /// | |
498 /// v |
499 /// +-------------------------------------------+ |
500 /// |<phis> = phi [..., ...], [poison, header] |
501 /// |%iv.next = add %iv, vfxuf |<--+
502 /// |branch-on-count %iv.next, vector-trip-count|
503 /// +-------------------------------------------+
504 ///
505 /// Any VPInstruction::ExtractLastLanes are also updated to extract from the
506 /// last active lane of the header mask.
507 static void foldTailByMasking(VPlan &Plan);
508
509 /// Predicate and linearize the control-flow in the only loop region of
510 /// \p Plan.
511 static void introduceMasksAndLinearize(VPlan &Plan);
512
513 /// Add branch weight metadata, if the \p Plan's middle block is terminated by
514 /// a BranchOnCond recipe.
515 static void
516 addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
517 std::optional<unsigned> VScaleForTuning);
518
519 /// Handle users in the exit block for first order reductions in the original
520 /// exit block. The penultimate value of recurrences is fed to their LCSSA phi
521 /// users in the original exit block using the VPIRInstruction wrapping to the
522 /// LCSSA phi.
523 static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range);
524
525 /// Optimize FindLast reductions selecting IVs (or expressions of IVs) by
526 /// converting them to FindIV reductions, if their IV range excludes a
527 /// suitable sentinel value. For expressions of IVs, the expression is sunk
528 /// to the middle block.
529 static void optimizeFindIVReductions(VPlan &Plan,
530 PredicatedScalarEvolution &PSE, Loop &L);
531
532 /// Detect and create partial reduction recipes for scaled reductions in
533 /// \p Plan. Must be called after recipe construction. If partial reductions
534 /// are only valid for a subset of VFs in Range, Range.End is updated.
535 static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx,
536 VFRange &Range);
537};
538
539} // namespace llvm
540
541#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
542