1 | //===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "VPlanAnalysis.h" |
10 | #include "VPlan.h" |
11 | #include "VPlanCFG.h" |
12 | #include "VPlanDominatorTree.h" |
13 | #include "llvm/ADT/PostOrderIterator.h" |
14 | #include "llvm/ADT/TypeSwitch.h" |
15 | #include "llvm/Analysis/ScalarEvolution.h" |
16 | #include "llvm/Analysis/TargetTransformInfo.h" |
17 | #include "llvm/IR/Instruction.h" |
18 | #include "llvm/IR/PatternMatch.h" |
19 | #include "llvm/Support/GenericDomTreeConstruction.h" |
20 | |
21 | using namespace llvm; |
22 | |
23 | #define DEBUG_TYPE "vplan" |
24 | |
25 | VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) |
26 | : Ctx(Plan.getScalarHeader()->getIRBasicBlock()->getContext()) { |
27 | if (auto LoopRegion = Plan.getVectorLoopRegion()) { |
28 | if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>( |
29 | Val: &LoopRegion->getEntryBasicBlock()->front())) { |
30 | CanonicalIVTy = CanIV->getScalarType(); |
31 | return; |
32 | } |
33 | } |
34 | |
35 | // If there's no canonical IV, retrieve the type from the trip count |
36 | // expression. |
37 | auto *TC = Plan.getTripCount(); |
38 | if (TC->isLiveIn()) { |
39 | CanonicalIVTy = TC->getLiveInIRValue()->getType(); |
40 | return; |
41 | } |
42 | CanonicalIVTy = cast<VPExpandSCEVRecipe>(Val: TC)->getSCEV()->getType(); |
43 | } |
44 | |
45 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) { |
46 | Type *ResTy = inferScalarType(V: R->getIncomingValue(Idx: 0)); |
47 | for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) { |
48 | VPValue *Inc = R->getIncomingValue(Idx: I); |
49 | assert(inferScalarType(Inc) == ResTy && |
50 | "different types inferred for different incoming values" ); |
51 | CachedTypes[Inc] = ResTy; |
52 | } |
53 | return ResTy; |
54 | } |
55 | |
56 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { |
57 | // Set the result type from the first operand, check if the types for all |
58 | // other operands match and cache them. |
59 | auto SetResultTyFromOp = [this, R]() { |
60 | Type *ResTy = inferScalarType(V: R->getOperand(N: 0)); |
61 | for (unsigned Op = 1; Op != R->getNumOperands(); ++Op) { |
62 | VPValue *OtherV = R->getOperand(N: Op); |
63 | assert(inferScalarType(OtherV) == ResTy && |
64 | "different types inferred for different operands" ); |
65 | CachedTypes[OtherV] = ResTy; |
66 | } |
67 | return ResTy; |
68 | }; |
69 | |
70 | unsigned Opcode = R->getOpcode(); |
71 | if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode)) |
72 | return SetResultTyFromOp(); |
73 | |
74 | switch (Opcode) { |
75 | case Instruction::ExtractElement: |
76 | case Instruction::Freeze: |
77 | case VPInstruction::ReductionStartVector: |
78 | return inferScalarType(V: R->getOperand(N: 0)); |
79 | case Instruction::Select: { |
80 | Type *ResTy = inferScalarType(V: R->getOperand(N: 1)); |
81 | VPValue *OtherV = R->getOperand(N: 2); |
82 | assert(inferScalarType(OtherV) == ResTy && |
83 | "different types inferred for different operands" ); |
84 | CachedTypes[OtherV] = ResTy; |
85 | return ResTy; |
86 | } |
87 | case Instruction::ICmp: |
88 | case VPInstruction::ActiveLaneMask: |
89 | assert(inferScalarType(R->getOperand(0)) == |
90 | inferScalarType(R->getOperand(1)) && |
91 | "different types inferred for different operands" ); |
92 | return IntegerType::get(C&: Ctx, NumBits: 1); |
93 | case VPInstruction::ComputeAnyOfResult: |
94 | return inferScalarType(V: R->getOperand(N: 1)); |
95 | case VPInstruction::ComputeFindIVResult: |
96 | case VPInstruction::ComputeReductionResult: { |
97 | return inferScalarType(V: R->getOperand(N: 0)); |
98 | } |
99 | case VPInstruction::ExplicitVectorLength: |
100 | return Type::getIntNTy(C&: Ctx, N: 32); |
101 | case Instruction::PHI: |
102 | // Infer the type of first operand only, as other operands of header phi's |
103 | // may lead to infinite recursion. |
104 | return inferScalarType(V: R->getOperand(N: 0)); |
105 | case VPInstruction::FirstOrderRecurrenceSplice: |
106 | case VPInstruction::Not: |
107 | case VPInstruction::CalculateTripCountMinusVF: |
108 | case VPInstruction::CanonicalIVIncrementForPart: |
109 | case VPInstruction::AnyOf: |
110 | case VPInstruction::BuildStructVector: |
111 | case VPInstruction::BuildVector: |
112 | return SetResultTyFromOp(); |
113 | case VPInstruction::FirstActiveLane: |
114 | return Type::getIntNTy(C&: Ctx, N: 64); |
115 | case VPInstruction::ExtractLastElement: |
116 | case VPInstruction::ExtractPenultimateElement: { |
117 | Type *BaseTy = inferScalarType(V: R->getOperand(N: 0)); |
118 | if (auto *VecTy = dyn_cast<VectorType>(Val: BaseTy)) |
119 | return VecTy->getElementType(); |
120 | return BaseTy; |
121 | } |
122 | case VPInstruction::LogicalAnd: |
123 | assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) && |
124 | inferScalarType(R->getOperand(1))->isIntegerTy(1) && |
125 | "LogicalAnd operands should be bool" ); |
126 | return IntegerType::get(C&: Ctx, NumBits: 1); |
127 | case VPInstruction::Broadcast: |
128 | case VPInstruction::PtrAdd: |
129 | // Return the type based on first operand. |
130 | return inferScalarType(V: R->getOperand(N: 0)); |
131 | case VPInstruction::BranchOnCond: |
132 | case VPInstruction::BranchOnCount: |
133 | return Type::getVoidTy(C&: Ctx); |
134 | default: |
135 | break; |
136 | } |
137 | // Type inference not implemented for opcode. |
138 | LLVM_DEBUG({ |
139 | dbgs() << "LV: Found unhandled opcode for: " ; |
140 | R->getVPSingleValue()->dump(); |
141 | }); |
142 | llvm_unreachable("Unhandled opcode!" ); |
143 | } |
144 | |
145 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) { |
146 | unsigned Opcode = R->getOpcode(); |
147 | if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || |
148 | Instruction::isBitwiseLogicOp(Opcode)) { |
149 | Type *ResTy = inferScalarType(V: R->getOperand(N: 0)); |
150 | assert(ResTy == inferScalarType(R->getOperand(1)) && |
151 | "types for both operands must match for binary op" ); |
152 | CachedTypes[R->getOperand(N: 1)] = ResTy; |
153 | return ResTy; |
154 | } |
155 | |
156 | switch (Opcode) { |
157 | case Instruction::ICmp: |
158 | case Instruction::FCmp: |
159 | return IntegerType::get(C&: Ctx, NumBits: 1); |
160 | case Instruction::FNeg: |
161 | case Instruction::Freeze: |
162 | return inferScalarType(V: R->getOperand(N: 0)); |
163 | case Instruction::ExtractValue: { |
164 | assert(R->getNumOperands() == 2 && "expected single level extractvalue" ); |
165 | auto *StructTy = cast<StructType>(Val: inferScalarType(V: R->getOperand(N: 0))); |
166 | auto *CI = cast<ConstantInt>(Val: R->getOperand(N: 1)->getLiveInIRValue()); |
167 | return StructTy->getTypeAtIndex(N: CI->getZExtValue()); |
168 | } |
169 | default: |
170 | break; |
171 | } |
172 | |
173 | // Type inference not implemented for opcode. |
174 | LLVM_DEBUG({ |
175 | dbgs() << "LV: Found unhandled opcode for: " ; |
176 | R->getVPSingleValue()->dump(); |
177 | }); |
178 | llvm_unreachable("Unhandled opcode!" ); |
179 | } |
180 | |
181 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) { |
182 | auto &CI = *cast<CallInst>(Val: R->getUnderlyingInstr()); |
183 | return CI.getType(); |
184 | } |
185 | |
186 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) { |
187 | assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) && |
188 | "Store recipes should not define any values" ); |
189 | return cast<LoadInst>(Val: &R->getIngredient())->getType(); |
190 | } |
191 | |
192 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenSelectRecipe *R) { |
193 | Type *ResTy = inferScalarType(V: R->getOperand(N: 1)); |
194 | VPValue *OtherV = R->getOperand(N: 2); |
195 | assert(inferScalarType(OtherV) == ResTy && |
196 | "different types inferred for different operands" ); |
197 | CachedTypes[OtherV] = ResTy; |
198 | return ResTy; |
199 | } |
200 | |
201 | Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) { |
202 | unsigned Opcode = R->getUnderlyingInstr()->getOpcode(); |
203 | |
204 | if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || |
205 | Instruction::isBitwiseLogicOp(Opcode)) { |
206 | Type *ResTy = inferScalarType(V: R->getOperand(N: 0)); |
207 | assert(ResTy == inferScalarType(R->getOperand(1)) && |
208 | "inferred types for operands of binary op don't match" ); |
209 | CachedTypes[R->getOperand(N: 1)] = ResTy; |
210 | return ResTy; |
211 | } |
212 | |
213 | if (Instruction::isCast(Opcode)) |
214 | return R->getUnderlyingInstr()->getType(); |
215 | |
216 | switch (Opcode) { |
217 | case Instruction::Call: { |
218 | unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1); |
219 | return cast<Function>(Val: R->getOperand(N: CallIdx)->getLiveInIRValue()) |
220 | ->getReturnType(); |
221 | } |
222 | case Instruction::Select: { |
223 | Type *ResTy = inferScalarType(V: R->getOperand(N: 1)); |
224 | assert(ResTy == inferScalarType(R->getOperand(2)) && |
225 | "inferred types for operands of select op don't match" ); |
226 | CachedTypes[R->getOperand(N: 2)] = ResTy; |
227 | return ResTy; |
228 | } |
229 | case Instruction::ICmp: |
230 | case Instruction::FCmp: |
231 | return IntegerType::get(C&: Ctx, NumBits: 1); |
232 | case Instruction::Alloca: |
233 | case Instruction::ExtractValue: |
234 | return R->getUnderlyingInstr()->getType(); |
235 | case Instruction::Freeze: |
236 | case Instruction::FNeg: |
237 | case Instruction::GetElementPtr: |
238 | return inferScalarType(V: R->getOperand(N: 0)); |
239 | case Instruction::Load: |
240 | return cast<LoadInst>(Val: R->getUnderlyingInstr())->getType(); |
241 | case Instruction::Store: |
242 | // FIXME: VPReplicateRecipes with store opcodes still define a result |
243 | // VPValue, so we need to handle them here. Remove the code here once this |
244 | // is modeled accurately in VPlan. |
245 | return Type::getVoidTy(C&: Ctx); |
246 | default: |
247 | break; |
248 | } |
249 | // Type inference not implemented for opcode. |
250 | LLVM_DEBUG({ |
251 | dbgs() << "LV: Found unhandled opcode for: " ; |
252 | R->getVPSingleValue()->dump(); |
253 | }); |
254 | llvm_unreachable("Unhandled opcode" ); |
255 | } |
256 | |
257 | Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { |
258 | if (Type *CachedTy = CachedTypes.lookup(Val: V)) |
259 | return CachedTy; |
260 | |
261 | if (V->isLiveIn()) { |
262 | if (auto *IRValue = V->getLiveInIRValue()) |
263 | return IRValue->getType(); |
264 | // All VPValues without any underlying IR value (like the vector trip count |
265 | // or the backedge-taken count) have the same type as the canonical IV. |
266 | return CanonicalIVTy; |
267 | } |
268 | |
269 | Type *ResultTy = |
270 | TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe()) |
271 | .Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe, |
272 | VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe, |
273 | VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>( |
274 | caseFn: [this](const auto *R) { |
275 | // Handle header phi recipes, except VPWidenIntOrFpInduction |
276 | // which needs special handling due it being possibly truncated. |
277 | // TODO: consider inferring/caching type of siblings, e.g., |
278 | // backedge value, here and in cases below. |
279 | return inferScalarType(V: R->getStartValue()); |
280 | }) |
281 | .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>( |
282 | caseFn: [](const auto *R) { return R->getScalarType(); }) |
283 | .Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe, |
284 | VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe, |
285 | VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe, |
286 | VPPartialReductionRecipe>(caseFn: [this](const VPRecipeBase *R) { |
287 | return inferScalarType(V: R->getOperand(N: 0)); |
288 | }) |
289 | // VPInstructionWithType must be handled before VPInstruction. |
290 | .Case<VPInstructionWithType, VPWidenIntrinsicRecipe, |
291 | VPWidenCastRecipe>( |
292 | caseFn: [](const auto *R) { return R->getResultType(); }) |
293 | .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe, |
294 | VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>( |
295 | caseFn: [this](const auto *R) { return inferScalarTypeForRecipe(R); }) |
296 | .Case<VPInterleaveRecipe>(caseFn: [V](const VPInterleaveRecipe *R) { |
297 | // TODO: Use info from interleave group. |
298 | return V->getUnderlyingValue()->getType(); |
299 | }) |
300 | .Case<VPExpandSCEVRecipe>(caseFn: [](const VPExpandSCEVRecipe *R) { |
301 | return R->getSCEV()->getType(); |
302 | }) |
303 | .Case<VPReductionRecipe>(caseFn: [this](const auto *R) { |
304 | return inferScalarType(V: R->getChainOp()); |
305 | }) |
306 | .Case<VPExpressionRecipe>(caseFn: [this](const auto *R) { |
307 | return inferScalarType(V: R->getOperandOfResultType()); |
308 | }); |
309 | |
310 | assert(ResultTy && "could not infer type for the given VPValue" ); |
311 | CachedTypes[V] = ResultTy; |
312 | return ResultTy; |
313 | } |
314 | |
315 | void llvm::collectEphemeralRecipesForVPlan( |
316 | VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) { |
317 | // First, collect seed recipes which are operands of assumes. |
318 | SmallVector<VPRecipeBase *> Worklist; |
319 | for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( |
320 | Range: vp_depth_first_deep(G: Plan.getVectorLoopRegion()->getEntry()))) { |
321 | for (VPRecipeBase &R : *VPBB) { |
322 | auto *RepR = dyn_cast<VPReplicateRecipe>(Val: &R); |
323 | if (!RepR || !match(V: RepR->getUnderlyingInstr(), |
324 | P: PatternMatch::m_Intrinsic<Intrinsic::assume>())) |
325 | continue; |
326 | Worklist.push_back(Elt: RepR); |
327 | EphRecipes.insert(V: RepR); |
328 | } |
329 | } |
330 | |
331 | // Process operands of candidates in worklist and add them to the set of |
332 | // ephemeral recipes, if they don't have side-effects and are only used by |
333 | // other ephemeral recipes. |
334 | while (!Worklist.empty()) { |
335 | VPRecipeBase *Cur = Worklist.pop_back_val(); |
336 | for (VPValue *Op : Cur->operands()) { |
337 | auto *OpR = Op->getDefiningRecipe(); |
338 | if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(V: OpR)) |
339 | continue; |
340 | if (any_of(Range: Op->users(), P: [EphRecipes](VPUser *U) { |
341 | auto *UR = dyn_cast<VPRecipeBase>(Val: U); |
342 | return !UR || !EphRecipes.contains(V: UR); |
343 | })) |
344 | continue; |
345 | EphRecipes.insert(V: OpR); |
346 | Worklist.push_back(Elt: OpR); |
347 | } |
348 | } |
349 | } |
350 | |
351 | template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>( |
352 | DominatorTreeBase<VPBlockBase, false> &DT); |
353 | |
354 | bool VPDominatorTree::properlyDominates(const VPRecipeBase *A, |
355 | const VPRecipeBase *B) { |
356 | if (A == B) |
357 | return false; |
358 | |
359 | auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) { |
360 | for (auto &R : *A->getParent()) { |
361 | if (&R == A) |
362 | return true; |
363 | if (&R == B) |
364 | return false; |
365 | } |
366 | llvm_unreachable("recipe not found" ); |
367 | }; |
368 | const VPBlockBase *ParentA = A->getParent(); |
369 | const VPBlockBase *ParentB = B->getParent(); |
370 | if (ParentA == ParentB) |
371 | return LocalComesBefore(A, B); |
372 | |
373 | #ifndef NDEBUG |
374 | auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * { |
375 | auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent()); |
376 | if (Region && Region->isReplicator()) { |
377 | assert(Region->getNumSuccessors() == 1 && |
378 | Region->getNumPredecessors() == 1 && "Expected SESE region!" ); |
379 | assert(R->getParent()->size() == 1 && |
380 | "A recipe in an original replicator region must be the only " |
381 | "recipe in its block" ); |
382 | return Region; |
383 | } |
384 | return nullptr; |
385 | }; |
386 | assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) && |
387 | "No replicate regions expected at this point" ); |
388 | assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) && |
389 | "No replicate regions expected at this point" ); |
390 | #endif |
391 | return Base::properlyDominates(A: ParentA, B: ParentB); |
392 | } |
393 | |
394 | /// Get the VF scaling factor applied to the recipe's output, if the recipe has |
395 | /// one. |
396 | static unsigned getVFScaleFactor(VPRecipeBase *R) { |
397 | if (auto *RR = dyn_cast<VPReductionPHIRecipe>(Val: R)) |
398 | return RR->getVFScaleFactor(); |
399 | if (auto *RR = dyn_cast<VPPartialReductionRecipe>(Val: R)) |
400 | return RR->getVFScaleFactor(); |
401 | assert( |
402 | (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() != |
403 | VPInstruction::ReductionStartVector) && |
404 | "getting scaling factor of reduction-start-vector not implemented yet" ); |
405 | return 1; |
406 | } |
407 | |
408 | bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI) const { |
409 | return any_of(Range: MaxLocalUsers, P: [&TTI](auto &LU) { |
410 | return LU.second > TTI.getNumberOfRegisters(ClassID: LU.first); |
411 | }); |
412 | } |
413 | |
414 | SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan( |
415 | VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI, |
416 | const SmallPtrSetImpl<const Value *> &ValuesToIgnore) { |
417 | // Each 'key' in the map opens a new interval. The values |
418 | // of the map are the index of the 'last seen' usage of the |
419 | // recipe that is the key. |
420 | using IntervalMap = SmallDenseMap<VPRecipeBase *, unsigned, 16>; |
421 | |
422 | // Maps indices to recipes. |
423 | SmallVector<VPRecipeBase *, 64> Idx2Recipe; |
424 | // Marks the end of each interval. |
425 | IntervalMap EndPoint; |
426 | // Saves the list of recipe indices that are used in the loop. |
427 | SmallPtrSet<VPRecipeBase *, 8> Ends; |
428 | // Saves the list of values that are used in the loop but are defined outside |
429 | // the loop (not including non-recipe values such as arguments and |
430 | // constants). |
431 | SmallSetVector<VPValue *, 8> LoopInvariants; |
432 | LoopInvariants.insert(X: &Plan.getVectorTripCount()); |
433 | |
434 | // We scan the loop in a topological order in order and assign a number to |
435 | // each recipe. We use RPO to ensure that defs are met before their users. We |
436 | // assume that each recipe that has in-loop users starts an interval. We |
437 | // record every time that an in-loop value is used, so we have a list of the |
438 | // first and last occurrences of each recipe. |
439 | ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( |
440 | Plan.getVectorLoopRegion()); |
441 | for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Range: RPOT)) { |
442 | if (!VPBB->getParent()) |
443 | break; |
444 | for (VPRecipeBase &R : *VPBB) { |
445 | Idx2Recipe.push_back(Elt: &R); |
446 | |
447 | // Save the end location of each USE. |
448 | for (VPValue *U : R.operands()) { |
449 | auto *DefR = U->getDefiningRecipe(); |
450 | |
451 | // Ignore non-recipe values such as arguments, constants, etc. |
452 | // FIXME: Might need some motivation why these values are ignored. If |
453 | // for example an argument is used inside the loop it will increase the |
454 | // register pressure (so shouldn't we add it to LoopInvariants). |
455 | if (!DefR && (!U->getLiveInIRValue() || |
456 | !isa<Instruction>(Val: U->getLiveInIRValue()))) |
457 | continue; |
458 | |
459 | // If this recipe is outside the loop then record it and continue. |
460 | if (!DefR) { |
461 | LoopInvariants.insert(X: U); |
462 | continue; |
463 | } |
464 | |
465 | // Overwrite previous end points. |
466 | EndPoint[DefR] = Idx2Recipe.size(); |
467 | Ends.insert(Ptr: DefR); |
468 | } |
469 | } |
470 | if (VPBB == Plan.getVectorLoopRegion()->getExiting()) { |
471 | // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the |
472 | // exiting block, where their increment will get materialized eventually. |
473 | for (auto &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { |
474 | if (isa<VPWidenIntOrFpInductionRecipe>(Val: &R)) { |
475 | EndPoint[&R] = Idx2Recipe.size(); |
476 | Ends.insert(Ptr: &R); |
477 | } |
478 | } |
479 | } |
480 | } |
481 | |
482 | // Saves the list of intervals that end with the index in 'key'. |
483 | using RecipeList = SmallVector<VPRecipeBase *, 2>; |
484 | SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds; |
485 | |
486 | // Next, we transpose the EndPoints into a multi map that holds the list of |
487 | // intervals that *end* at a specific location. |
488 | for (auto &Interval : EndPoint) |
489 | TransposeEnds[Interval.second].push_back(Elt: Interval.first); |
490 | |
491 | SmallPtrSet<VPRecipeBase *, 8> OpenIntervals; |
492 | SmallVector<VPRegisterUsage, 8> RUs(VFs.size()); |
493 | SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size()); |
494 | |
495 | LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n" ); |
496 | |
497 | VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); |
498 | |
499 | const auto &TTICapture = TTI; |
500 | auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned { |
501 | if (Ty->isTokenTy() || !VectorType::isValidElementType(ElemTy: Ty) || |
502 | (VF.isScalable() && |
503 | !TTICapture.isElementTypeLegalForScalableVector(Ty))) |
504 | return 0; |
505 | return TTICapture.getRegUsageForType(Ty: VectorType::get(ElementType: Ty, EC: VF)); |
506 | }; |
507 | |
508 | // We scan the instructions linearly and record each time that a new interval |
509 | // starts, by placing it in a set. If we find this value in TransposEnds then |
510 | // we remove it from the set. The max register usage is the maximum register |
511 | // usage of the recipes of the set. |
512 | for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) { |
513 | VPRecipeBase *R = Idx2Recipe[Idx]; |
514 | |
515 | // Remove all of the recipes that end at this location. |
516 | RecipeList &List = TransposeEnds[Idx]; |
517 | for (VPRecipeBase *ToRemove : List) |
518 | OpenIntervals.erase(Ptr: ToRemove); |
519 | |
520 | // Ignore recipes that are never used within the loop and do not have side |
521 | // effects. |
522 | if (!Ends.count(Ptr: R) && !R->mayHaveSideEffects()) |
523 | continue; |
524 | |
525 | // Skip recipes for ignored values. |
526 | // TODO: Should mark recipes for ephemeral values that cannot be removed |
527 | // explictly in VPlan. |
528 | if (isa<VPSingleDefRecipe>(Val: R) && |
529 | ValuesToIgnore.contains( |
530 | Ptr: cast<VPSingleDefRecipe>(Val: R)->getUnderlyingValue())) |
531 | continue; |
532 | |
533 | // For each VF find the maximum usage of registers. |
534 | for (unsigned J = 0, E = VFs.size(); J < E; ++J) { |
535 | // Count the number of registers used, per register class, given all open |
536 | // intervals. |
537 | // Note that elements in this SmallMapVector will be default constructed |
538 | // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if |
539 | // there is no previous entry for ClassID. |
540 | SmallMapVector<unsigned, unsigned, 4> RegUsage; |
541 | |
542 | for (auto *R : OpenIntervals) { |
543 | // Skip recipes that weren't present in the original loop. |
544 | // TODO: Remove after removing the legacy |
545 | // LoopVectorizationCostModel::calculateRegisterUsage |
546 | if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe, |
547 | VPBranchOnMaskRecipe>(Val: R)) |
548 | continue; |
549 | |
550 | if (VFs[J].isScalar() || |
551 | isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe, |
552 | VPScalarIVStepsRecipe>(Val: R) || |
553 | (isa<VPInstruction>(Val: R) && |
554 | all_of(Range: cast<VPSingleDefRecipe>(Val: R)->users(), |
555 | P: [&](VPUser *U) { |
556 | return cast<VPRecipeBase>(Val: U)->usesScalars( |
557 | Op: R->getVPSingleValue()); |
558 | })) || |
559 | (isa<VPReductionPHIRecipe>(Val: R) && |
560 | (cast<VPReductionPHIRecipe>(Val: R))->isInLoop())) { |
561 | unsigned ClassID = TTI.getRegisterClassForType( |
562 | Vector: false, Ty: TypeInfo.inferScalarType(V: R->getVPSingleValue())); |
563 | // FIXME: The target might use more than one register for the type |
564 | // even in the scalar case. |
565 | RegUsage[ClassID] += 1; |
566 | } else { |
567 | // The output from scaled phis and scaled reductions actually has |
568 | // fewer lanes than the VF. |
569 | unsigned ScaleFactor = getVFScaleFactor(R); |
570 | ElementCount VF = VFs[J].divideCoefficientBy(RHS: ScaleFactor); |
571 | LLVM_DEBUG(if (VF != VFs[J]) { |
572 | dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF |
573 | << " for " << *R << "\n" ; |
574 | }); |
575 | |
576 | for (VPValue *DefV : R->definedValues()) { |
577 | Type *ScalarTy = TypeInfo.inferScalarType(V: DefV); |
578 | unsigned ClassID = TTI.getRegisterClassForType(Vector: true, Ty: ScalarTy); |
579 | RegUsage[ClassID] += GetRegUsage(ScalarTy, VF); |
580 | } |
581 | } |
582 | } |
583 | |
584 | for (const auto &Pair : RegUsage) { |
585 | auto &Entry = MaxUsages[J][Pair.first]; |
586 | Entry = std::max(a: Entry, b: Pair.second); |
587 | } |
588 | } |
589 | |
590 | LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # " |
591 | << OpenIntervals.size() << '\n'); |
592 | |
593 | // Add the current recipe to the list of open intervals. |
594 | OpenIntervals.insert(Ptr: R); |
595 | } |
596 | |
597 | // We also search for instructions that are defined outside the loop, but are |
598 | // used inside the loop. We need this number separately from the max-interval |
599 | // usage number because when we unroll, loop-invariant values do not take |
600 | // more register. |
601 | VPRegisterUsage RU; |
602 | for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) { |
603 | // Note that elements in this SmallMapVector will be default constructed |
604 | // as 0. So we can use "Invariant[ClassID] += n" in the code below even if |
605 | // there is no previous entry for ClassID. |
606 | SmallMapVector<unsigned, unsigned, 4> Invariant; |
607 | |
608 | for (auto *In : LoopInvariants) { |
609 | // FIXME: The target might use more than one register for the type |
610 | // even in the scalar case. |
611 | bool IsScalar = all_of(Range: In->users(), P: [&](VPUser *U) { |
612 | return cast<VPRecipeBase>(Val: U)->usesScalars(Op: In); |
613 | }); |
614 | |
615 | ElementCount VF = IsScalar ? ElementCount::getFixed(MinVal: 1) : VFs[Idx]; |
616 | unsigned ClassID = TTI.getRegisterClassForType( |
617 | Vector: VF.isVector(), Ty: TypeInfo.inferScalarType(V: In)); |
618 | Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(V: In), VF); |
619 | } |
620 | |
621 | LLVM_DEBUG({ |
622 | dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n'; |
623 | dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size() |
624 | << " item\n" ; |
625 | for (const auto &pair : MaxUsages[Idx]) { |
626 | dbgs() << "LV(REG): RegisterClass: " |
627 | << TTI.getRegisterClassName(pair.first) << ", " << pair.second |
628 | << " registers\n" ; |
629 | } |
630 | dbgs() << "LV(REG): Found invariant usage: " << Invariant.size() |
631 | << " item\n" ; |
632 | for (const auto &pair : Invariant) { |
633 | dbgs() << "LV(REG): RegisterClass: " |
634 | << TTI.getRegisterClassName(pair.first) << ", " << pair.second |
635 | << " registers\n" ; |
636 | } |
637 | }); |
638 | |
639 | RU.LoopInvariantRegs = Invariant; |
640 | RU.MaxLocalUsers = MaxUsages[Idx]; |
641 | RUs[Idx] = RU; |
642 | } |
643 | |
644 | return RUs; |
645 | } |
646 | |