1//===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "VPlanAnalysis.h"
10#include "VPlan.h"
11#include "VPlanCFG.h"
12#include "VPlanDominatorTree.h"
13#include "VPlanHelpers.h"
14#include "VPlanPatternMatch.h"
15#include "llvm/ADT/PostOrderIterator.h"
16#include "llvm/Analysis/TargetTransformInfo.h"
17
18using namespace llvm;
19using namespace VPlanPatternMatch;
20
21#define DEBUG_TYPE "vplan"
22
23void llvm::collectEphemeralRecipesForVPlan(
24 VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) {
25 // First, collect seed recipes which are operands of assumes.
26 SmallVector<VPRecipeBase *> Worklist;
27 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
28 Range: vp_depth_first_deep(G: Plan.getVectorLoopRegion()->getEntry()))) {
29 for (VPRecipeBase &R : *VPBB) {
30 auto *RepR = dyn_cast<VPReplicateRecipe>(Val: &R);
31 if (!RepR || !match(V: RepR, P: m_Intrinsic<Intrinsic::assume>()))
32 continue;
33 Worklist.push_back(Elt: RepR);
34 EphRecipes.insert(V: RepR);
35 }
36 }
37
38 // Process operands of candidates in worklist and add them to the set of
39 // ephemeral recipes, if they don't have side-effects and are only used by
40 // other ephemeral recipes.
41 while (!Worklist.empty()) {
42 VPRecipeBase *Cur = Worklist.pop_back_val();
43 for (VPValue *Op : Cur->operands()) {
44 auto *OpR = Op->getDefiningRecipe();
45 if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(V: OpR))
46 continue;
47 if (any_of(Range: Op->users(), P: [EphRecipes](VPUser *U) {
48 auto *UR = dyn_cast<VPRecipeBase>(Val: U);
49 return !UR || !EphRecipes.contains(V: UR);
50 }))
51 continue;
52 EphRecipes.insert(V: OpR);
53 Worklist.push_back(Elt: OpR);
54 }
55 }
56}
57
58template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>(
59 DominatorTreeBase<VPBlockBase, false> &DT);
60
61bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
62 const VPRecipeBase *B) const {
63 if (A == B)
64 return false;
65
66 auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
67 for (auto &R : *A->getParent()) {
68 if (&R == A)
69 return true;
70 if (&R == B)
71 return false;
72 }
73 llvm_unreachable("recipe not found");
74 };
75 const VPBlockBase *ParentA = A->getParent();
76 const VPBlockBase *ParentB = B->getParent();
77 if (ParentA == ParentB)
78 return LocalComesBefore(A, B);
79
80 return Base::properlyDominates(A: ParentA, B: ParentB);
81}
82
83InstructionCost
84VPRegisterUsage::spillCost(const TargetTransformInfo &TTI,
85 TargetTransformInfo::TargetCostKind CostKind,
86 unsigned OverrideMaxNumRegs) const {
87 InstructionCost Cost;
88 for (const auto &[RegClass, MaxUsers] : MaxLocalUsers) {
89 unsigned AvailableRegs = OverrideMaxNumRegs > 0
90 ? OverrideMaxNumRegs
91 : TTI.getNumberOfRegisters(ClassID: RegClass);
92 if (MaxUsers > AvailableRegs) {
93 // Assume that for each register used past what's available we get one
94 // spill and reload.
95 unsigned Spills = MaxUsers - AvailableRegs;
96 InstructionCost SpillCost =
97 TTI.getRegisterClassSpillCost(ClassID: RegClass, CostKind) +
98 TTI.getRegisterClassReloadCost(ClassID: RegClass, CostKind);
99 InstructionCost TotalCost = Spills * SpillCost;
100 LLVM_DEBUG(dbgs() << "LV(REG): Cost of " << TotalCost << " from "
101 << Spills << " spills of "
102 << TTI.getRegisterClassName(RegClass) << "\n");
103 Cost += TotalCost;
104 }
105 }
106 return Cost;
107}
108
109SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
110 VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI,
111 const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
112 // Each 'key' in the map opens a new interval. The values
113 // of the map are the index of the 'last seen' usage of the
114 // VPValue that is the key.
115 using IntervalMap = SmallDenseMap<VPValue *, unsigned, 16>;
116
117 // Maps indices to recipes.
118 SmallVector<VPRecipeBase *, 64> Idx2Recipe;
119 // Marks the end of each interval.
120 IntervalMap EndPoint;
121 // Saves the list of VPValues that are used in the loop.
122 SmallPtrSet<VPValue *, 8> Ends;
123 // Saves the list of values that are used in the loop but are defined outside
124 // the loop (not including non-recipe values such as arguments and
125 // constants).
126 SmallSetVector<VPValue *, 8> LoopInvariants;
127 if (!Plan.getVectorTripCount().user_empty())
128 LoopInvariants.insert(X: &Plan.getVectorTripCount());
129
130 // We scan the loop in a topological order in order and assign a number to
131 // each recipe. We use RPO to ensure that defs are met before their users. We
132 // assume that each recipe that has in-loop users starts an interval. We
133 // record every time that an in-loop value is used, so we have a list of the
134 // first occurences of each recipe and last occurrence of each VPValue.
135 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
136 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
137 LoopRegion);
138 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Range&: RPOT)) {
139 if (!VPBB->getParent())
140 break;
141 for (VPRecipeBase &R : *VPBB) {
142 Idx2Recipe.push_back(Elt: &R);
143
144 // Save the end location of each USE.
145 for (VPValue *U : R.operands()) {
146 if (isa<VPRecipeValue>(Val: U)) {
147 // Overwrite previous end points.
148 EndPoint[U] = Idx2Recipe.size();
149 Ends.insert(Ptr: U);
150 } else if (auto *IRV = dyn_cast<VPIRValue>(Val: U)) {
151 // Ignore non-recipe values such as arguments, constants, etc.
152 // FIXME: Might need some motivation why these values are ignored. If
153 // for example an argument is used inside the loop it will increase
154 // the register pressure (so shouldn't we add it to LoopInvariants).
155 if (!isa<Instruction>(Val: IRV->getValue()))
156 continue;
157 // This recipe is outside the loop, record it and continue.
158 LoopInvariants.insert(X: U);
159 }
160 // Other types of VPValue are currently not tracked.
161 }
162 }
163 if (VPBB == LoopRegion->getExiting()) {
164 // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
165 // exiting block, where their increment will get materialized eventually.
166 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
167 if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(Val: &R)) {
168 EndPoint[WideIV] = Idx2Recipe.size();
169 Ends.insert(Ptr: WideIV);
170 }
171 }
172 }
173 }
174
175 // Saves the list of intervals that end with the index in 'key'.
176 using VPValueList = SmallVector<VPValue *, 2>;
177 SmallDenseMap<unsigned, VPValueList, 16> TransposeEnds;
178
179 // Next, we transpose the EndPoints into a multi map that holds the list of
180 // intervals that *end* at a specific location.
181 for (auto &Interval : EndPoint)
182 TransposeEnds[Interval.second].push_back(Elt: Interval.first);
183
184 SmallPtrSet<VPValue *, 8> OpenIntervals;
185 SmallVector<VPRegisterUsage, 8> RUs(VFs.size());
186 SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
187
188 LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
189
190 const auto &TTICapture = TTI;
191 auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
192 if (Ty->isTokenTy() || !VectorType::isValidElementType(ElemTy: Ty) ||
193 (VF.isScalable() &&
194 !TTICapture.isElementTypeLegalForScalableVector(Ty)))
195 return 0;
196 return TTICapture.getRegUsageForType(Ty: VectorType::get(ElementType: Ty, EC: VF));
197 };
198
199 VPValue *CanIV = LoopRegion->getCanonicalIV();
200 // Note: canonical IVs are retained even if they have no users.
201 if (!CanIV->user_empty())
202 OpenIntervals.insert(Ptr: CanIV);
203
204 // We scan the instructions linearly and record each time that a new interval
205 // starts, by placing it in a set. If we find this value in TransposEnds then
206 // we remove it from the set. The max register usage is the maximum register
207 // usage of the recipes of the set.
208 for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
209 VPRecipeBase *R = Idx2Recipe[Idx];
210
211 // Remove all of the VPValues that end at this location.
212 VPValueList &List = TransposeEnds[Idx];
213 for (VPValue *ToRemove : List)
214 OpenIntervals.erase(Ptr: ToRemove);
215
216 // Ignore recipes that are never used within the loop and do not have side
217 // effects.
218 if (none_of(Range: R->definedValues(),
219 P: [&Ends](VPValue *Def) { return Ends.count(Ptr: Def); }) &&
220 !R->mayHaveSideEffects())
221 continue;
222
223 // Skip recipes for ignored values.
224 // TODO: Should mark recipes for ephemeral values that cannot be removed
225 // explictly in VPlan.
226 if (isa<VPSingleDefRecipe>(Val: R) &&
227 ValuesToIgnore.contains(
228 Ptr: cast<VPSingleDefRecipe>(Val: R)->getUnderlyingValue()))
229 continue;
230
231 // For each VF find the maximum usage of registers.
232 for (unsigned J = 0, E = VFs.size(); J < E; ++J) {
233 // Count the number of registers used, per register class, given all open
234 // intervals.
235 // Note that elements in this SmallMapVector will be default constructed
236 // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if
237 // there is no previous entry for ClassID.
238 SmallMapVector<unsigned, unsigned, 4> RegUsage;
239
240 for (auto *VPV : OpenIntervals) {
241 // Skip artificial values or values that weren't present in the original
242 // loop.
243 // TODO: Remove skipping values that weren't present in the original
244 // loop after removing the legacy
245 // LoopVectorizationCostModel::calculateRegisterUsage
246 if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
247 VPBranchOnMaskRecipe>(Val: VPV) ||
248 match(V: VPV, P: m_ExtractLastPart(Op0: m_VPValue())))
249 continue;
250
251 if (VFs[J].isScalar() ||
252 isa<VPRegionValue, VPReplicateRecipe, VPDerivedIVRecipe,
253 VPCurrentIterationPHIRecipe, VPScalarIVStepsRecipe>(Val: VPV) ||
254 (isa<VPInstruction>(Val: VPV) && vputils::onlyScalarValuesUsed(Def: VPV)) ||
255 (isa<VPReductionPHIRecipe>(Val: VPV) &&
256 (cast<VPReductionPHIRecipe>(Val: VPV))->isInLoop())) {
257 unsigned ClassID =
258 TTI.getRegisterClassForType(Vector: false, Ty: VPV->getScalarType());
259 // FIXME: The target might use more than one register for the type
260 // even in the scalar case.
261 RegUsage[ClassID] += 1;
262 } else {
263 // The output from scaled phis and scaled reductions actually has
264 // fewer lanes than the VF.
265 unsigned ScaleFactor =
266 vputils::getVFScaleFactor(R: VPV->getDefiningRecipe());
267 ElementCount VF = VFs[J];
268 if (ScaleFactor > 1) {
269 VF = VFs[J].divideCoefficientBy(RHS: ScaleFactor);
270 LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J]
271 << " to " << VF << " for " << *R << "\n";);
272 }
273
274 Type *ScalarTy = VPV->getScalarType();
275 unsigned ClassID = TTI.getRegisterClassForType(Vector: true, Ty: ScalarTy);
276 RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
277 }
278 }
279
280 for (const auto &Pair : RegUsage) {
281 auto &Entry = MaxUsages[J][Pair.first];
282 Entry = std::max(a: Entry, b: Pair.second);
283 }
284 }
285
286 LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
287 << OpenIntervals.size() << '\n');
288
289 // Add used VPValues defined by the current recipe to the list of open
290 // intervals.
291 for (VPValue *DefV : R->definedValues())
292 if (Ends.contains(Ptr: DefV))
293 OpenIntervals.insert(Ptr: DefV);
294 }
295
296 // We also search for instructions that are defined outside the loop, but are
297 // used inside the loop. We need this number separately from the max-interval
298 // usage number because when we unroll, loop-invariant values do not take
299 // more register.
300 VPRegisterUsage RU;
301 for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {
302 // Note that elements in this SmallMapVector will be default constructed
303 // as 0. So we can use "Invariant[ClassID] += n" in the code below even if
304 // there is no previous entry for ClassID.
305 SmallMapVector<unsigned, unsigned, 4> Invariant;
306
307 for (auto *In : LoopInvariants) {
308 // FIXME: The target might use more than one register for the type
309 // even in the scalar case.
310 bool IsScalar = vputils::onlyScalarValuesUsed(Def: In);
311
312 ElementCount VF = IsScalar ? ElementCount::getFixed(MinVal: 1) : VFs[Idx];
313 unsigned ClassID =
314 TTI.getRegisterClassForType(Vector: VF.isVector(), Ty: In->getScalarType());
315 Invariant[ClassID] += GetRegUsage(In->getScalarType(), VF);
316 }
317
318 LLVM_DEBUG({
319 dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n';
320 dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
321 << " item\n";
322 for (const auto &pair : MaxUsages[Idx]) {
323 dbgs() << "LV(REG): RegisterClass: "
324 << TTI.getRegisterClassName(pair.first) << ", " << pair.second
325 << " registers\n";
326 }
327 dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
328 << " item\n";
329 for (const auto &pair : Invariant) {
330 dbgs() << "LV(REG): RegisterClass: "
331 << TTI.getRegisterClassName(pair.first) << ", " << pair.second
332 << " registers\n";
333 }
334 });
335
336 RU.LoopInvariantRegs = Invariant;
337 RU.MaxLocalUsers = MaxUsages[Idx];
338 RUs[Idx] = RU;
339 }
340
341 return RUs;
342}
343