| 1 | //===- VPRecipeBuilder.h - Helper class to build recipes --------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H |
| 10 | #define LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H |
| 11 | |
| 12 | #include "LoopVectorizationPlanner.h" |
| 13 | #include "VPlan.h" |
| 14 | #include "llvm/ADT/DenseMap.h" |
| 15 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
| 16 | |
| 17 | namespace llvm { |
| 18 | |
| 19 | class LoopVectorizationLegality; |
| 20 | class LoopVectorizationCostModel; |
| 21 | class TargetLibraryInfo; |
| 22 | class TargetTransformInfo; |
| 23 | struct HistogramInfo; |
| 24 | struct VFRange; |
| 25 | |
| 26 | /// A chain of instructions that form a partial reduction. |
| 27 | /// Designed to match either: |
| 28 | /// reduction_bin_op (extend (A), accumulator), or |
| 29 | /// reduction_bin_op (bin_op (extend (A), (extend (B))), accumulator). |
| 30 | struct PartialReductionChain { |
| 31 | PartialReductionChain(Instruction *Reduction, Instruction *ExtendA, |
| 32 | Instruction *ExtendB, Instruction *ExtendUser) |
| 33 | : Reduction(Reduction), ExtendA(ExtendA), ExtendB(ExtendB), |
| 34 | ExtendUser(ExtendUser) {} |
| 35 | /// The top-level binary operation that forms the reduction to a scalar |
| 36 | /// after the loop body. |
| 37 | Instruction *Reduction; |
| 38 | /// The extension of each of the inner binary operation's operands. |
| 39 | Instruction *ExtendA; |
| 40 | Instruction *ExtendB; |
| 41 | |
| 42 | /// The user of the extends that is then reduced. |
| 43 | Instruction *ExtendUser; |
| 44 | }; |
| 45 | |
| 46 | /// Helper class to create VPRecipies from IR instructions. |
| 47 | class VPRecipeBuilder { |
| 48 | /// The VPlan new recipes are added to. |
| 49 | VPlan &Plan; |
| 50 | |
| 51 | /// The loop that we evaluate. |
| 52 | Loop *OrigLoop; |
| 53 | |
| 54 | /// Target Library Info. |
| 55 | const TargetLibraryInfo *TLI; |
| 56 | |
| 57 | // Target Transform Info. |
| 58 | const TargetTransformInfo *TTI; |
| 59 | |
| 60 | /// The legality analysis. |
| 61 | LoopVectorizationLegality *Legal; |
| 62 | |
| 63 | /// The profitablity analysis. |
| 64 | LoopVectorizationCostModel &CM; |
| 65 | |
| 66 | PredicatedScalarEvolution &PSE; |
| 67 | |
| 68 | VPBuilder &Builder; |
| 69 | |
| 70 | /// The mask of each VPBB, generated earlier and used for predicating recipes |
| 71 | /// in VPBB. |
| 72 | /// TODO: remove by applying predication when generating the masks. |
| 73 | DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache; |
| 74 | |
| 75 | // VPlan construction support: Hold a mapping from ingredients to |
| 76 | // their recipe. |
| 77 | DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe; |
| 78 | |
| 79 | /// Cross-iteration reduction & first-order recurrence phis for which we need |
| 80 | /// to add the incoming value from the backedge after all recipes have been |
| 81 | /// created. |
| 82 | SmallVector<VPHeaderPHIRecipe *, 4> PhisToFix; |
| 83 | |
| 84 | /// A mapping of partial reduction exit instructions to their scaling factor. |
| 85 | DenseMap<const Instruction *, unsigned> ScaledReductionMap; |
| 86 | |
| 87 | /// Loop versioning instance for getting noalias metadata guaranteed by |
| 88 | /// runtime checks. |
| 89 | LoopVersioning *LVer; |
| 90 | |
| 91 | /// Check if \p I can be widened at the start of \p Range and possibly |
| 92 | /// decrease the range such that the returned value holds for the entire \p |
| 93 | /// Range. The function should not be called for memory instructions or calls. |
| 94 | bool shouldWiden(Instruction *I, VFRange &Range) const; |
| 95 | |
| 96 | /// Check if the load or store instruction \p I should widened for \p |
| 97 | /// Range.Start and potentially masked. Such instructions are handled by a |
| 98 | /// recipe that takes an additional VPInstruction for the mask. |
| 99 | VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I, |
| 100 | ArrayRef<VPValue *> Operands, |
| 101 | VFRange &Range); |
| 102 | |
| 103 | /// Check if an induction recipe should be constructed for \p Phi. If so build |
| 104 | /// and return it. If not, return null. |
| 105 | VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi, |
| 106 | ArrayRef<VPValue *> Operands, |
| 107 | VFRange &Range); |
| 108 | |
| 109 | /// Optimize the special case where the operand of \p I is a constant integer |
| 110 | /// induction variable. |
| 111 | VPWidenIntOrFpInductionRecipe * |
| 112 | tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands, |
| 113 | VFRange &Range); |
| 114 | |
| 115 | /// Handle call instructions. If \p CI can be widened for \p Range.Start, |
| 116 | /// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be |
| 117 | /// decreased to ensure same decision from \p Range.Start to \p Range.End. |
| 118 | VPSingleDefRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands, |
| 119 | VFRange &Range); |
| 120 | |
| 121 | /// Check if \p I has an opcode that can be widened and return a VPWidenRecipe |
| 122 | /// if it can. The function should only be called if the cost-model indicates |
| 123 | /// that widening should be performed. |
| 124 | VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands); |
| 125 | |
| 126 | /// Makes Histogram count operations safe for vectorization, by emitting a |
| 127 | /// llvm.experimental.vector.histogram.add intrinsic in place of the |
| 128 | /// Load + Add|Sub + Store operations that perform the histogram in the |
| 129 | /// original scalar loop. |
| 130 | VPHistogramRecipe *tryToWidenHistogram(const HistogramInfo *HI, |
| 131 | ArrayRef<VPValue *> Operands); |
| 132 | |
| 133 | /// Examines reduction operations to see if the target can use a cheaper |
| 134 | /// operation with a wider per-iteration input VF and narrower PHI VF. |
| 135 | /// Each element within Chains is a pair with a struct containing reduction |
| 136 | /// information and the scaling factor between the number of elements in |
| 137 | /// the input and output. |
| 138 | /// Recursively calls itself to identify chained scaled reductions. |
| 139 | /// Returns true if this invocation added an entry to Chains, otherwise false. |
| 140 | /// i.e. returns false in the case that a subcall adds an entry to Chains, |
| 141 | /// but the top-level call does not. |
| 142 | bool getScaledReductions( |
| 143 | Instruction *PHI, Instruction *RdxExitInstr, VFRange &Range, |
| 144 | SmallVectorImpl<std::pair<PartialReductionChain, unsigned>> &Chains); |
| 145 | |
| 146 | public: |
| 147 | VPRecipeBuilder(VPlan &Plan, Loop *OrigLoop, const TargetLibraryInfo *TLI, |
| 148 | const TargetTransformInfo *TTI, |
| 149 | LoopVectorizationLegality *Legal, |
| 150 | LoopVectorizationCostModel &CM, |
| 151 | PredicatedScalarEvolution &PSE, VPBuilder &Builder, |
| 152 | DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache, |
| 153 | LoopVersioning *LVer) |
| 154 | : Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal), |
| 155 | CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache), |
| 156 | LVer(LVer) {} |
| 157 | |
| 158 | std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) { |
| 159 | auto It = ScaledReductionMap.find(Val: ExitInst); |
| 160 | return It == ScaledReductionMap.end() ? std::nullopt |
| 161 | : std::make_optional(t&: It->second); |
| 162 | } |
| 163 | |
| 164 | /// Find all possible partial reductions in the loop and track all of those |
| 165 | /// that are valid so recipes can be formed later. |
| 166 | void collectScaledReductions(VFRange &Range); |
| 167 | |
| 168 | /// Create and return a widened recipe for \p R if one can be created within |
| 169 | /// the given VF \p Range. |
| 170 | VPRecipeBase *tryToCreateWidenRecipe(VPSingleDefRecipe *R, VFRange &Range); |
| 171 | |
| 172 | /// Create and return a partial reduction recipe for a reduction instruction |
| 173 | /// along with binary operation and reduction phi operands. |
| 174 | VPRecipeBase *tryToCreatePartialReduction(Instruction *Reduction, |
| 175 | ArrayRef<VPValue *> Operands, |
| 176 | unsigned ScaleFactor); |
| 177 | |
| 178 | /// Set the recipe created for given ingredient. |
| 179 | void setRecipe(Instruction *I, VPRecipeBase *R) { |
| 180 | assert(!Ingredient2Recipe.contains(I) && |
| 181 | "Cannot reset recipe for instruction." ); |
| 182 | Ingredient2Recipe[I] = R; |
| 183 | } |
| 184 | |
| 185 | /// Returns the *entry* mask for block \p VPBB or null if the mask is |
| 186 | /// all-true. |
| 187 | VPValue *getBlockInMask(VPBasicBlock *VPBB) const { |
| 188 | return BlockMaskCache.lookup(Val: VPBB); |
| 189 | } |
| 190 | |
| 191 | /// Return the recipe created for given ingredient. |
| 192 | VPRecipeBase *getRecipe(Instruction *I) { |
| 193 | assert(Ingredient2Recipe.count(I) && |
| 194 | "Recording this ingredients recipe was not requested" ); |
| 195 | assert(Ingredient2Recipe[I] != nullptr && |
| 196 | "Ingredient doesn't have a recipe" ); |
| 197 | return Ingredient2Recipe[I]; |
| 198 | } |
| 199 | |
| 200 | /// Build a VPReplicationRecipe for \p I using \p Operands. If it is |
| 201 | /// predicated, add the mask as last operand. Range.End may be decreased to |
| 202 | /// ensure same recipe behavior from \p Range.Start to \p Range.End. |
| 203 | VPReplicateRecipe *handleReplication(Instruction *I, |
| 204 | ArrayRef<VPValue *> Operands, |
| 205 | VFRange &Range); |
| 206 | |
| 207 | VPValue *getVPValueOrAddLiveIn(Value *V) { |
| 208 | if (auto *I = dyn_cast<Instruction>(Val: V)) { |
| 209 | if (auto *R = Ingredient2Recipe.lookup(Val: I)) |
| 210 | return R->getVPSingleValue(); |
| 211 | } |
| 212 | return Plan.getOrAddLiveIn(V); |
| 213 | } |
| 214 | |
| 215 | void updateBlockMaskCache(DenseMap<VPValue *, VPValue *> &Old2New) { |
| 216 | for (auto &[_, V] : BlockMaskCache) { |
| 217 | if (auto *New = Old2New.lookup(Val: V)) { |
| 218 | V->replaceAllUsesWith(New); |
| 219 | V = New; |
| 220 | } |
| 221 | } |
| 222 | } |
| 223 | }; |
| 224 | } // end namespace llvm |
| 225 | |
| 226 | #endif // LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H |
| 227 | |