1//===- VPlanHelpers.h - VPlan-related auxiliary helpers -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of different VPlan-related auxiliary
11/// helpers.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
16#define LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
17
18#include "VPlanAnalysis.h"
19#include "VPlanDominatorTree.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/Analysis/DomTreeUpdater.h"
24#include "llvm/Analysis/TargetTransformInfo.h"
25#include "llvm/IR/DebugLoc.h"
26#include "llvm/IR/ModuleSlotTracker.h"
27#include "llvm/Support/InstructionCost.h"
28
29namespace llvm {
30
31class AssumptionCache;
32class BasicBlock;
33class DominatorTree;
34class InnerLoopVectorizer;
35class IRBuilderBase;
36class LoopInfo;
37class SCEV;
38class Type;
39class VPBasicBlock;
40class VPRegionBlock;
41class VPlan;
42class Value;
43
44/// Returns a calculation for the total number of elements for a given \p VF.
45/// For fixed width vectors this value is a constant, whereas for scalable
46/// vectors it is an expression determined at runtime.
47Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
48
49/// Return a value for Step multiplied by VF.
50Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
51 int64_t Step);
52
53/// Compute the transformed value of Index at offset StartValue using step
54/// StepValue.
55/// For integer induction, returns StartValue + Index * StepValue.
56/// For pointer induction, returns StartValue[Index * StepValue].
57Value *emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
58 Value *Step,
59 InductionDescriptor::InductionKind InductionKind,
60 const BinaryOperator *InductionBinOp);
61
62/// A range of powers-of-2 vectorization factors with fixed start and
63/// adjustable end. The range includes start and excludes end, e.g.,:
64/// [1, 16) = {1, 2, 4, 8}
65struct VFRange {
66 // A power of 2.
67 const ElementCount Start;
68
69 // A power of 2. If End <= Start range is empty.
70 ElementCount End;
71
72 bool isEmpty() const {
73 return End.getKnownMinValue() <= Start.getKnownMinValue();
74 }
75
76 VFRange(const ElementCount &Start, const ElementCount &End)
77 : Start(Start), End(End) {
78 assert(Start.isScalable() == End.isScalable() &&
79 "Both Start and End should have the same scalable flag");
80 assert(isPowerOf2_32(Start.getKnownMinValue()) &&
81 "Expected Start to be a power of 2");
82 assert(isPowerOf2_32(End.getKnownMinValue()) &&
83 "Expected End to be a power of 2");
84 }
85
86 /// Iterator to iterate over vectorization factors in a VFRange.
87 class iterator
88 : public iterator_facade_base<iterator, std::forward_iterator_tag,
89 ElementCount> {
90 ElementCount VF;
91
92 public:
93 iterator(ElementCount VF) : VF(VF) {}
94
95 bool operator==(const iterator &Other) const { return VF == Other.VF; }
96
97 ElementCount operator*() const { return VF; }
98
99 iterator &operator++() {
100 VF *= 2;
101 return *this;
102 }
103 };
104
105 iterator begin() { return iterator(Start); }
106 iterator end() {
107 assert(isPowerOf2_32(End.getKnownMinValue()));
108 return iterator(End);
109 }
110};
111
112/// In what follows, the term "input IR" refers to code that is fed into the
113/// vectorizer whereas the term "output IR" refers to code that is generated by
114/// the vectorizer.
115
116/// VPLane provides a way to access lanes in both fixed width and scalable
117/// vectors, where for the latter the lane index sometimes needs calculating
118/// as a runtime expression.
119class VPLane {
120public:
121 /// Kind describes how to interpret Lane.
122 enum class Kind : uint8_t {
123 /// For First, Lane is the index into the first N elements of a
124 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
125 First,
126 /// For ScalableLast, Lane is the offset from the start of the last
127 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
128 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
129 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
130 ScalableLast
131 };
132
133private:
134 /// in [0..VF)
135 unsigned Lane;
136
137 /// Indicates how the Lane should be interpreted, as described above.
138 Kind LaneKind = Kind::First;
139
140public:
141 VPLane(unsigned Lane) : Lane(Lane) {}
142 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
143
144 static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
145
146 static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
147 assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
148 "trying to extract with invalid offset");
149 unsigned LaneOffset = VF.getKnownMinValue() - Offset;
150 Kind LaneKind;
151 if (VF.isScalable())
152 // In this case 'LaneOffset' refers to the offset from the start of the
153 // last subvector with VF.getKnownMinValue() elements.
154 LaneKind = VPLane::Kind::ScalableLast;
155 else
156 LaneKind = VPLane::Kind::First;
157 return VPLane(LaneOffset, LaneKind);
158 }
159
160 static VPLane getLastLaneForVF(const ElementCount &VF) {
161 return getLaneFromEnd(VF, Offset: 1);
162 }
163
164 /// Returns a compile-time known value for the lane index and asserts if the
165 /// lane can only be calculated at runtime.
166 unsigned getKnownLane() const {
167 assert(LaneKind == Kind::First &&
168 "can only get known lane from the beginning");
169 return Lane;
170 }
171
172 /// Returns an expression describing the lane index that can be used at
173 /// runtime.
174 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
175
176 /// Returns the Kind of lane offset.
177 Kind getKind() const { return LaneKind; }
178
179 /// Returns true if this is the first lane of the whole vector.
180 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
181
182 /// Maps the lane to a cache index based on \p VF.
183 unsigned mapToCacheIndex(const ElementCount &VF) const {
184 switch (LaneKind) {
185 case VPLane::Kind::ScalableLast:
186 assert(VF.isScalable() && Lane < VF.getKnownMinValue() &&
187 "ScalableLast can only be used with scalable VFs");
188 return VF.getKnownMinValue() + Lane;
189 default:
190 assert(Lane < VF.getKnownMinValue() &&
191 "Cannot extract lane larger than VF");
192 return Lane;
193 }
194 }
195};
196
197/// VPTransformState holds information passed down when "executing" a VPlan,
198/// needed for generating the output IR.
199struct VPTransformState {
200 VPTransformState(const TargetTransformInfo *TTI, ElementCount VF,
201 LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
202 IRBuilderBase &Builder, VPlan *Plan, Loop *CurrentParentLoop,
203 Type *CanonicalIVTy);
204 /// Target Transform Info.
205 const TargetTransformInfo *TTI;
206
207 /// The chosen Vectorization Factor of the loop being vectorized.
208 ElementCount VF;
209
210 /// Hold the index to generate specific scalar instructions. Null indicates
211 /// that all instances are to be generated, using either scalar or vector
212 /// instructions.
213 std::optional<VPLane> Lane;
214
215 struct DataState {
216 // Each value from the original loop, when vectorized, is represented by a
217 // vector value in the map.
218 DenseMap<const VPValue *, Value *> VPV2Vector;
219
220 DenseMap<const VPValue *, SmallVector<Value *, 4>> VPV2Scalars;
221 } Data;
222
223 /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
224 /// is false, otherwise return the generated scalar. \See set.
225 Value *get(const VPValue *Def, bool IsScalar = false);
226
227 /// Get the generated Value for a given VPValue and given Part and Lane.
228 Value *get(const VPValue *Def, const VPLane &Lane);
229
230 bool hasVectorValue(const VPValue *Def) {
231 return Data.VPV2Vector.contains(Val: Def);
232 }
233
234 bool hasScalarValue(const VPValue *Def, VPLane Lane) {
235 auto I = Data.VPV2Scalars.find(Val: Def);
236 if (I == Data.VPV2Scalars.end())
237 return false;
238 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
239 return CacheIdx < I->second.size() && I->second[CacheIdx];
240 }
241
242 /// Set the generated vector Value for a given VPValue, if \p
243 /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
244 void set(const VPValue *Def, Value *V, bool IsScalar = false) {
245 if (IsScalar) {
246 set(Def, V, Lane: VPLane(0));
247 return;
248 }
249 assert((VF.isScalar() || isVectorizedTy(V->getType())) &&
250 "scalar values must be stored as (0, 0)");
251 Data.VPV2Vector[Def] = V;
252 }
253
254 /// Reset an existing vector value for \p Def and a given \p Part.
255 void reset(const VPValue *Def, Value *V) {
256 assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
257 Data.VPV2Vector[Def] = V;
258 }
259
260 /// Set the generated scalar \p V for \p Def and the given \p Lane.
261 void set(const VPValue *Def, Value *V, const VPLane &Lane) {
262 auto &Scalars = Data.VPV2Scalars[Def];
263 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
264 if (Scalars.size() <= CacheIdx)
265 Scalars.resize(N: CacheIdx + 1);
266 assert(!Scalars[CacheIdx] && "should overwrite existing value");
267 Scalars[CacheIdx] = V;
268 }
269
270 /// Reset an existing scalar value for \p Def and a given \p Lane.
271 void reset(const VPValue *Def, Value *V, const VPLane &Lane) {
272 auto Iter = Data.VPV2Scalars.find(Val: Def);
273 assert(Iter != Data.VPV2Scalars.end() &&
274 "need to overwrite existing value");
275 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
276 assert(CacheIdx < Iter->second.size() &&
277 "need to overwrite existing value");
278 Iter->second[CacheIdx] = V;
279 }
280
281 /// Set the debug location in the builder using the debug location \p DL.
282 void setDebugLocFrom(DebugLoc DL);
283
284 /// Insert the scalar value of \p Def at \p Lane into \p Lane of \p WideValue
285 /// and return the resulting value.
286 Value *packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue,
287 const VPLane &Lane);
288
289 /// Hold state information used when constructing the CFG of the output IR,
290 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
291 struct CFGState {
292 /// The previous VPBasicBlock visited. Initially set to null.
293 VPBasicBlock *PrevVPBB = nullptr;
294
295 /// The previous IR BasicBlock created or used. Initially set to the new
296 /// header BasicBlock.
297 BasicBlock *PrevBB = nullptr;
298
299 /// The last IR BasicBlock in the output IR. Set to the exit block of the
300 /// vector loop.
301 BasicBlock *ExitBB = nullptr;
302
303 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
304 /// of replication, maps the BasicBlock of the last replica created.
305 SmallDenseMap<const VPBasicBlock *, BasicBlock *> VPBB2IRBB;
306
307 /// Updater for the DominatorTree.
308 DomTreeUpdater DTU;
309
310 CFGState(DominatorTree *DT)
311 : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
312 } CFG;
313
314 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
315 LoopInfo *LI;
316
317 /// Hold a pointer to AssumptionCache to register new assumptions after
318 /// replicating assume calls.
319 AssumptionCache *AC;
320
321 /// Hold a reference to the IRBuilder used to generate output IR code.
322 IRBuilderBase &Builder;
323
324 /// Pointer to the VPlan code is generated for.
325 VPlan *Plan;
326
327 /// The parent loop object for the current scope, or nullptr.
328 Loop *CurrentParentLoop = nullptr;
329
330 /// VPlan-based type analysis.
331 VPTypeAnalysis TypeAnalysis;
332
333 /// VPlan-based dominator tree.
334 VPDominatorTree VPDT;
335};
336
337/// Struct to hold various analysis needed for cost computations.
338struct VPCostContext {
339 const TargetTransformInfo &TTI;
340 const TargetLibraryInfo &TLI;
341 VPTypeAnalysis Types;
342 LLVMContext &LLVMCtx;
343 LoopVectorizationCostModel &CM;
344 SmallPtrSet<Instruction *, 8> SkipCostComputation;
345 TargetTransformInfo::TargetCostKind CostKind;
346 PredicatedScalarEvolution &PSE;
347 const Loop *L;
348
349 VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
350 const VPlan &Plan, LoopVectorizationCostModel &CM,
351 TargetTransformInfo::TargetCostKind CostKind,
352 PredicatedScalarEvolution &PSE, const Loop *L)
353 : TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM),
354 CostKind(CostKind), PSE(PSE), L(L) {}
355
356 /// Return the cost for \p UI with \p VF using the legacy cost model as
357 /// fallback until computing the cost of all recipes migrates to VPlan.
358 InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const;
359
360 /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
361 /// has already been pre-computed.
362 bool skipCostComputation(Instruction *UI, bool IsVector) const;
363
364 /// \returns how much the cost of a predicated block should be divided by.
365 /// Forwards to LoopVectorizationCostModel::getPredBlockCostDivisor.
366 unsigned getPredBlockCostDivisor(BasicBlock *BB) const;
367
368 /// Returns the OperandInfo for \p V, if it is a live-in.
369 TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const;
370
371 /// Return true if \p I is considered uniform-after-vectorization in the
372 /// legacy cost model for \p VF. Only used to check for additional VPlan
373 /// simplifications.
374 bool isLegacyUniformAfterVectorization(Instruction *I, ElementCount VF) const;
375
376 /// Estimate the overhead of scalarizing a recipe with result type \p ResultTy
377 /// and \p Operands with \p VF. This is a convenience wrapper for the
378 /// type-based getScalarizationOverhead API. \p VIC provides context about
379 /// whether the scalarization is for a load/store operation. If \p
380 /// AlwaysIncludeReplicatingR is true, always compute the cost of scalarizing
381 /// replicating operands.
382 InstructionCost getScalarizationOverhead(
383 Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF,
384 TTI::VectorInstrContext VIC = TTI::VectorInstrContext::None,
385 bool AlwaysIncludeReplicatingR = false);
386};
387
388/// This class can be used to assign names to VPValues. For VPValues without
389/// underlying value, assign consecutive numbers and use those as names (wrapped
390/// in vp<>). Otherwise, use the name from the underlying value (wrapped in
391/// ir<>), appending a .V version number if there are multiple uses of the same
392/// name. Allows querying names for VPValues for printing, similar to the
393/// ModuleSlotTracker for IR values.
394class VPSlotTracker {
395 /// Keep track of versioned names assigned to VPValues with underlying IR
396 /// values.
397 DenseMap<const VPValue *, std::string> VPValue2Name;
398 /// Keep track of the next number to use to version the base name.
399 StringMap<unsigned> BaseName2Version;
400
401 /// Number to assign to the next VPValue without underlying value.
402 unsigned NextSlot = 0;
403
404 /// Lazily created ModuleSlotTracker, used only when unnamed IR instructions
405 /// require slot tracking.
406 std::unique_ptr<ModuleSlotTracker> MST;
407
408 /// Cached metadata kind names from the Module's LLVMContext.
409 SmallVector<StringRef> MDNames;
410
411 /// Cached Module pointer for printing metadata.
412 const Module *M = nullptr;
413
414 void assignName(const VPValue *V);
415 LLVM_ABI_FOR_TEST void assignNames(const VPlan &Plan);
416 void assignNames(const VPBasicBlock *VPBB);
417 std::string getName(const Value *V);
418
419public:
420 VPSlotTracker(const VPlan *Plan = nullptr) {
421 if (Plan) {
422 assignNames(Plan: *Plan);
423 if (auto *ScalarHeader = Plan->getScalarHeader())
424 M = ScalarHeader->getIRBasicBlock()->getModule();
425 }
426 }
427
428 /// Returns the name assigned to \p V, if there is one, otherwise try to
429 /// construct one from the underlying value, if there's one; else return
430 /// <badref>.
431 std::string getOrCreateName(const VPValue *V) const;
432
433 /// Returns the cached metadata kind names.
434 ArrayRef<StringRef> getMDNames() {
435 if (MDNames.empty() && M)
436 M->getContext().getMDKindNames(Result&: MDNames);
437 return MDNames;
438 }
439
440 /// Returns the cached Module pointer.
441 const Module *getModule() const { return M; }
442};
443
444#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
445/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
446/// indented and follows the dot format.
447class VPlanPrinter {
448 raw_ostream &OS;
449 const VPlan &Plan;
450 unsigned Depth = 0;
451 unsigned TabWidth = 2;
452 std::string Indent;
453 unsigned BID = 0;
454 SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
455
456 VPSlotTracker SlotTracker;
457
458 /// Handle indentation.
459 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
460
461 /// Print a given \p Block of the Plan.
462 void dumpBlock(const VPBlockBase *Block);
463
464 /// Print the information related to the CFG edges going out of a given
465 /// \p Block, followed by printing the successor blocks themselves.
466 void dumpEdges(const VPBlockBase *Block);
467
468 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
469 /// its successor blocks.
470 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
471
472 /// Print a given \p Region of the Plan.
473 void dumpRegion(const VPRegionBlock *Region);
474
475 unsigned getOrCreateBID(const VPBlockBase *Block) {
476 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
477 }
478
479 Twine getOrCreateName(const VPBlockBase *Block);
480
481 Twine getUID(const VPBlockBase *Block);
482
483 /// Print the information related to a CFG edge between two VPBlockBases.
484 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
485 const Twine &Label);
486
487public:
488 VPlanPrinter(raw_ostream &O, const VPlan &P)
489 : OS(O), Plan(P), SlotTracker(&P) {}
490
491 LLVM_DUMP_METHOD void dump();
492};
493#endif
494
495/// Check if a constant \p CI can be safely treated as having been extended
496/// from a narrower type with the given extension kind.
497bool canConstantBeExtended(const APInt *C, Type *NarrowType,
498 TTI::PartialReductionExtendKind ExtKind);
499} // end namespace llvm
500
501#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
502