1//===- VPRecipeBuilder.h - Helper class to build recipes --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H
10#define LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H
11
12#include "LoopVectorizationPlanner.h"
13#include "VPlan.h"
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/Analysis/ScalarEvolutionExpressions.h"
16
17namespace llvm {
18
19class LoopVectorizationLegality;
20class LoopVectorizationCostModel;
21class TargetLibraryInfo;
22class TargetTransformInfo;
23struct HistogramInfo;
24struct VFRange;
25
26/// A chain of instructions that form a partial reduction.
27/// Designed to match either:
28/// reduction_bin_op (extend (A), accumulator), or
29/// reduction_bin_op (bin_op (extend (A), (extend (B))), accumulator).
30struct PartialReductionChain {
31 PartialReductionChain(Instruction *Reduction, Instruction *ExtendA,
32 Instruction *ExtendB, Instruction *ExtendUser)
33 : Reduction(Reduction), ExtendA(ExtendA), ExtendB(ExtendB),
34 ExtendUser(ExtendUser) {}
35 /// The top-level binary operation that forms the reduction to a scalar
36 /// after the loop body.
37 Instruction *Reduction;
38 /// The extension of each of the inner binary operation's operands.
39 Instruction *ExtendA;
40 Instruction *ExtendB;
41
42 /// The user of the extends that is then reduced.
43 Instruction *ExtendUser;
44};
45
46/// Helper class to create VPRecipies from IR instructions.
47class VPRecipeBuilder {
48 /// The VPlan new recipes are added to.
49 VPlan &Plan;
50
51 /// The loop that we evaluate.
52 Loop *OrigLoop;
53
54 /// Target Library Info.
55 const TargetLibraryInfo *TLI;
56
57 // Target Transform Info.
58 const TargetTransformInfo *TTI;
59
60 /// The legality analysis.
61 LoopVectorizationLegality *Legal;
62
63 /// The profitablity analysis.
64 LoopVectorizationCostModel &CM;
65
66 PredicatedScalarEvolution &PSE;
67
68 VPBuilder &Builder;
69
70 /// The mask of each VPBB, generated earlier and used for predicating recipes
71 /// in VPBB.
72 /// TODO: remove by applying predication when generating the masks.
73 DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache;
74
75 // VPlan construction support: Hold a mapping from ingredients to
76 // their recipe.
77 DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
78
79 /// Cross-iteration reduction & first-order recurrence phis for which we need
80 /// to add the incoming value from the backedge after all recipes have been
81 /// created.
82 SmallVector<VPHeaderPHIRecipe *, 4> PhisToFix;
83
84 /// A mapping of partial reduction exit instructions to their scaling factor.
85 DenseMap<const Instruction *, unsigned> ScaledReductionMap;
86
87 /// Loop versioning instance for getting noalias metadata guaranteed by
88 /// runtime checks.
89 LoopVersioning *LVer;
90
91 /// Check if \p I can be widened at the start of \p Range and possibly
92 /// decrease the range such that the returned value holds for the entire \p
93 /// Range. The function should not be called for memory instructions or calls.
94 bool shouldWiden(Instruction *I, VFRange &Range) const;
95
96 /// Check if the load or store instruction \p I should widened for \p
97 /// Range.Start and potentially masked. Such instructions are handled by a
98 /// recipe that takes an additional VPInstruction for the mask.
99 VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I,
100 ArrayRef<VPValue *> Operands,
101 VFRange &Range);
102
103 /// Check if an induction recipe should be constructed for \p Phi. If so build
104 /// and return it. If not, return null.
105 VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi,
106 ArrayRef<VPValue *> Operands,
107 VFRange &Range);
108
109 /// Optimize the special case where the operand of \p I is a constant integer
110 /// induction variable.
111 VPWidenIntOrFpInductionRecipe *
112 tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands,
113 VFRange &Range);
114
115 /// Handle call instructions. If \p CI can be widened for \p Range.Start,
116 /// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be
117 /// decreased to ensure same decision from \p Range.Start to \p Range.End.
118 VPSingleDefRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
119 VFRange &Range);
120
121 /// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
122 /// if it can. The function should only be called if the cost-model indicates
123 /// that widening should be performed.
124 VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands);
125
126 /// Makes Histogram count operations safe for vectorization, by emitting a
127 /// llvm.experimental.vector.histogram.add intrinsic in place of the
128 /// Load + Add|Sub + Store operations that perform the histogram in the
129 /// original scalar loop.
130 VPHistogramRecipe *tryToWidenHistogram(const HistogramInfo *HI,
131 ArrayRef<VPValue *> Operands);
132
133 /// Examines reduction operations to see if the target can use a cheaper
134 /// operation with a wider per-iteration input VF and narrower PHI VF.
135 /// Each element within Chains is a pair with a struct containing reduction
136 /// information and the scaling factor between the number of elements in
137 /// the input and output.
138 /// Recursively calls itself to identify chained scaled reductions.
139 /// Returns true if this invocation added an entry to Chains, otherwise false.
140 /// i.e. returns false in the case that a subcall adds an entry to Chains,
141 /// but the top-level call does not.
142 bool getScaledReductions(
143 Instruction *PHI, Instruction *RdxExitInstr, VFRange &Range,
144 SmallVectorImpl<std::pair<PartialReductionChain, unsigned>> &Chains);
145
146public:
147 VPRecipeBuilder(VPlan &Plan, Loop *OrigLoop, const TargetLibraryInfo *TLI,
148 const TargetTransformInfo *TTI,
149 LoopVectorizationLegality *Legal,
150 LoopVectorizationCostModel &CM,
151 PredicatedScalarEvolution &PSE, VPBuilder &Builder,
152 DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache,
153 LoopVersioning *LVer)
154 : Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
155 CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache),
156 LVer(LVer) {}
157
158 std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
159 auto It = ScaledReductionMap.find(Val: ExitInst);
160 return It == ScaledReductionMap.end() ? std::nullopt
161 : std::make_optional(t&: It->second);
162 }
163
164 /// Find all possible partial reductions in the loop and track all of those
165 /// that are valid so recipes can be formed later.
166 void collectScaledReductions(VFRange &Range);
167
168 /// Create and return a widened recipe for \p R if one can be created within
169 /// the given VF \p Range.
170 VPRecipeBase *tryToCreateWidenRecipe(VPSingleDefRecipe *R, VFRange &Range);
171
172 /// Create and return a partial reduction recipe for a reduction instruction
173 /// along with binary operation and reduction phi operands.
174 VPRecipeBase *tryToCreatePartialReduction(Instruction *Reduction,
175 ArrayRef<VPValue *> Operands,
176 unsigned ScaleFactor);
177
178 /// Set the recipe created for given ingredient.
179 void setRecipe(Instruction *I, VPRecipeBase *R) {
180 assert(!Ingredient2Recipe.contains(I) &&
181 "Cannot reset recipe for instruction.");
182 Ingredient2Recipe[I] = R;
183 }
184
185 /// Returns the *entry* mask for block \p VPBB or null if the mask is
186 /// all-true.
187 VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
188 return BlockMaskCache.lookup(Val: VPBB);
189 }
190
191 /// Return the recipe created for given ingredient.
192 VPRecipeBase *getRecipe(Instruction *I) {
193 assert(Ingredient2Recipe.count(I) &&
194 "Recording this ingredients recipe was not requested");
195 assert(Ingredient2Recipe[I] != nullptr &&
196 "Ingredient doesn't have a recipe");
197 return Ingredient2Recipe[I];
198 }
199
200 /// Build a VPReplicationRecipe for \p I using \p Operands. If it is
201 /// predicated, add the mask as last operand. Range.End may be decreased to
202 /// ensure same recipe behavior from \p Range.Start to \p Range.End.
203 VPReplicateRecipe *handleReplication(Instruction *I,
204 ArrayRef<VPValue *> Operands,
205 VFRange &Range);
206
207 VPValue *getVPValueOrAddLiveIn(Value *V) {
208 if (auto *I = dyn_cast<Instruction>(Val: V)) {
209 if (auto *R = Ingredient2Recipe.lookup(Val: I))
210 return R->getVPSingleValue();
211 }
212 return Plan.getOrAddLiveIn(V);
213 }
214
215 void updateBlockMaskCache(DenseMap<VPValue *, VPValue *> &Old2New) {
216 for (auto &[_, V] : BlockMaskCache) {
217 if (auto *New = Old2New.lookup(Val: V)) {
218 V->replaceAllUsesWith(New);
219 V = New;
220 }
221 }
222 }
223};
224} // end namespace llvm
225
226#endif // LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H
227