1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
20#include "LoopVectorizationPlanner.h"
21#include "VPlanCFG.h"
22#include "VPlanDominatorTree.h"
23#include "VPlanHelpers.h"
24#include "VPlanPatternMatch.h"
25#include "VPlanTransforms.h"
26#include "VPlanUtils.h"
27#include "llvm/ADT/PostOrderIterator.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/DomTreeUpdater.h"
33#include "llvm/Analysis/LoopInfo.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/CFG.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/Type.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/Debug.h"
44#include "llvm/Support/GraphWriter.h"
45#include "llvm/Support/raw_ostream.h"
46#include "llvm/Transforms/Utils/BasicBlockUtils.h"
47#include "llvm/Transforms/Utils/LoopVersioning.h"
48#include <cassert>
49#include <string>
50
51using namespace llvm;
52using namespace llvm::VPlanPatternMatch;
53
54namespace llvm {
55extern cl::opt<bool> EnableVPlanNativePath;
56}
57
58extern cl::opt<unsigned> ForceTargetInstructionCost;
59
60static cl::opt<bool> PrintVPlansInDotFormat(
61 "vplan-print-in-dot-format", cl::Hidden,
62 cl::desc("Use dot format instead of plain text when dumping VPlans"));
63
64#define DEBUG_TYPE "loop-vectorize"
65
66#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
67raw_ostream &llvm::operator<<(raw_ostream &OS, const VPRecipeBase &R) {
68 const VPBasicBlock *Parent = R.getParent();
69 VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr);
70 R.print(OS, "", SlotTracker);
71 return OS;
72}
73#endif
74
75Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
76 const ElementCount &VF) const {
77 switch (LaneKind) {
78 case VPLane::Kind::ScalableLast:
79 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
80 return Builder.CreateSub(LHS: getRuntimeVF(B&: Builder, Ty: Builder.getInt32Ty(), VF),
81 RHS: Builder.getInt32(C: VF.getKnownMinValue() - Lane));
82 case VPLane::Kind::First:
83 return Builder.getInt32(C: Lane);
84 }
85 llvm_unreachable("Unknown lane kind");
86}
87
88VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
89 : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
90 if (Def)
91 Def->addDefinedValue(V: this);
92}
93
94VPValue::~VPValue() {
95 assert(Users.empty() && "trying to delete a VPValue with remaining users");
96 if (Def)
97 Def->removeDefinedValue(V: this);
98}
99
100#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
101void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
102 if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
103 R->print(OS, "", SlotTracker);
104 else
105 printAsOperand(OS, SlotTracker);
106}
107
108void VPValue::dump() const {
109 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
110 VPSlotTracker SlotTracker(
111 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
112 print(dbgs(), SlotTracker);
113 dbgs() << "\n";
114}
115
116void VPDef::dump() const {
117 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
118 VPSlotTracker SlotTracker(
119 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
120 print(dbgs(), "", SlotTracker);
121 dbgs() << "\n";
122}
123#endif
124
125VPRecipeBase *VPValue::getDefiningRecipe() {
126 return cast_or_null<VPRecipeBase>(Val: Def);
127}
128
129const VPRecipeBase *VPValue::getDefiningRecipe() const {
130 return cast_or_null<VPRecipeBase>(Val: Def);
131}
132
133// Get the top-most entry block of \p Start. This is the entry block of the
134// containing VPlan. This function is templated to support both const and non-const blocks
135template <typename T> static T *getPlanEntry(T *Start) {
136 T *Next = Start;
137 T *Current = Start;
138 while ((Next = Next->getParent()))
139 Current = Next;
140
141 SmallSetVector<T *, 8> WorkList;
142 WorkList.insert(Current);
143
144 for (unsigned i = 0; i < WorkList.size(); i++) {
145 T *Current = WorkList[i];
146 if (Current->getNumPredecessors() == 0)
147 return Current;
148 auto &Predecessors = Current->getPredecessors();
149 WorkList.insert_range(Predecessors);
150 }
151
152 llvm_unreachable("VPlan without any entry node without predecessors");
153}
154
155VPlan *VPBlockBase::getPlan() { return getPlanEntry(Start: this)->Plan; }
156
157const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(Start: this)->Plan; }
158
159/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
160const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
161 const VPBlockBase *Block = this;
162 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Val: Block))
163 Block = Region->getEntry();
164 return cast<VPBasicBlock>(Val: Block);
165}
166
167VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
168 VPBlockBase *Block = this;
169 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Val: Block))
170 Block = Region->getEntry();
171 return cast<VPBasicBlock>(Val: Block);
172}
173
174void VPBlockBase::setPlan(VPlan *ParentPlan) {
175 assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
176 Plan = ParentPlan;
177}
178
179/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
180const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const {
181 const VPBlockBase *Block = this;
182 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Val: Block))
183 Block = Region->getExiting();
184 return cast<VPBasicBlock>(Val: Block);
185}
186
187VPBasicBlock *VPBlockBase::getExitingBasicBlock() {
188 VPBlockBase *Block = this;
189 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Val: Block))
190 Block = Region->getExiting();
191 return cast<VPBasicBlock>(Val: Block);
192}
193
194VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
195 if (!Successors.empty() || !Parent)
196 return this;
197 assert(Parent->getExiting() == this &&
198 "Block w/o successors not the exiting block of its parent.");
199 return Parent->getEnclosingBlockWithSuccessors();
200}
201
202VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
203 if (!Predecessors.empty() || !Parent)
204 return this;
205 assert(Parent->getEntry() == this &&
206 "Block w/o predecessors not the entry of its parent.");
207 return Parent->getEnclosingBlockWithPredecessors();
208}
209
210bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
211 const VPDominatorTree &VPDT) {
212 auto *VPBB = dyn_cast<VPBasicBlock>(Val: VPB);
213 if (!VPBB)
214 return false;
215
216 // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
217 // VPBB as its entry, i.e., free of predecessors.
218 if (auto *R = VPBB->getParent())
219 return !R->isReplicator() && VPBB->getNumPredecessors() == 0;
220
221 // A header dominates its second predecessor (the latch), with the other
222 // predecessor being the preheader
223 return VPB->getPredecessors().size() == 2 &&
224 VPDT.dominates(A: VPB, B: VPB->getPredecessors()[1]);
225}
226
227bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
228 const VPDominatorTree &VPDT) {
229 // A latch has a header as its second successor, with its other successor
230 // leaving the loop. A preheader OTOH has a header as its first (and only)
231 // successor.
232 return VPB->getNumSuccessors() == 2 &&
233 VPBlockUtils::isHeader(VPB: VPB->getSuccessors()[1], VPDT);
234}
235
236VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
237 iterator It = begin();
238 while (It != end() && It->isPhi())
239 It++;
240 return It;
241}
242
243VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
244 ElementCount VF, LoopInfo *LI,
245 DominatorTree *DT, AssumptionCache *AC,
246 IRBuilderBase &Builder, VPlan *Plan,
247 Loop *CurrentParentLoop, Type *CanonicalIVTy)
248 : TTI(TTI), VF(VF), CFG(DT), LI(LI), AC(AC), Builder(Builder), Plan(Plan),
249 CurrentParentLoop(CurrentParentLoop), TypeAnalysis(CanonicalIVTy),
250 VPDT(*Plan) {}
251
252Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
253 if (Def->isLiveIn())
254 return Def->getLiveInIRValue();
255
256 if (hasScalarValue(Def, Lane))
257 return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
258
259 if (!Lane.isFirstLane() && vputils::isSingleScalar(VPV: Def) &&
260 hasScalarValue(Def, Lane: VPLane::getFirstLane())) {
261 return Data.VPV2Scalars[Def][0];
262 }
263
264 // Look through BuildVector to avoid redundant extracts.
265 // TODO: Remove once replicate regions are unrolled explicitly.
266 if (Lane.getKind() == VPLane::Kind::First && match(V: Def, P: m_BuildVector())) {
267 auto *BuildVector = cast<VPInstruction>(Val: Def);
268 return get(Def: BuildVector->getOperand(N: Lane.getKnownLane()), IsScalar: true);
269 }
270
271 assert(hasVectorValue(Def));
272 auto *VecPart = Data.VPV2Vector[Def];
273 if (!VecPart->getType()->isVectorTy()) {
274 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
275 return VecPart;
276 }
277 // TODO: Cache created scalar values.
278 Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
279 auto *Extract = Builder.CreateExtractElement(Vec: VecPart, Idx: LaneV);
280 // set(Def, Extract, Instance);
281 return Extract;
282}
283
284Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
285 if (NeedsScalar) {
286 assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) ||
287 !vputils::onlyFirstLaneUsed(Def) ||
288 (hasScalarValue(Def, VPLane(0)) &&
289 Data.VPV2Scalars[Def].size() == 1)) &&
290 "Trying to access a single scalar per part but has multiple scalars "
291 "per part.");
292 return get(Def, Lane: VPLane(0));
293 }
294
295 // If Values have been set for this Def return the one relevant for \p Part.
296 if (hasVectorValue(Def))
297 return Data.VPV2Vector[Def];
298
299 auto GetBroadcastInstrs = [this, Def](Value *V) {
300 bool SafeToHoist =
301 !Def->hasDefiningRecipe() ||
302 VPDT.properlyDominates(A: Def->getDefiningRecipe()->getParent(),
303 B: Plan->getVectorPreheader());
304
305 if (VF.isScalar())
306 return V;
307 // Place the code for broadcasting invariant variables in the new preheader.
308 IRBuilder<>::InsertPointGuard Guard(Builder);
309 if (SafeToHoist) {
310 BasicBlock *LoopVectorPreHeader =
311 CFG.VPBB2IRBB[Plan->getVectorPreheader()];
312 if (LoopVectorPreHeader)
313 Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
314 }
315
316 // Place the code for broadcasting invariant variables in the new preheader.
317 // Broadcast the scalar into all locations in the vector.
318 Value *Shuf = Builder.CreateVectorSplat(EC: VF, V, Name: "broadcast");
319
320 return Shuf;
321 };
322
323 if (!hasScalarValue(Def, Lane: {0})) {
324 assert(Def->isLiveIn() && "expected a live-in");
325 Value *IRV = Def->getLiveInIRValue();
326 Value *B = GetBroadcastInstrs(IRV);
327 set(Def, V: B);
328 return B;
329 }
330
331 Value *ScalarValue = get(Def, Lane: VPLane(0));
332 // If we aren't vectorizing, we can just copy the scalar map values over
333 // to the vector map.
334 if (VF.isScalar()) {
335 set(Def, V: ScalarValue);
336 return ScalarValue;
337 }
338
339 bool IsSingleScalar = vputils::isSingleScalar(VPV: Def);
340
341 VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
342 // Check if there is a scalar value for the selected lane.
343 if (!hasScalarValue(Def, Lane: LastLane)) {
344 // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
345 // VPExpandSCEVRecipes can also be a single scalar.
346 assert((isa<VPWidenIntOrFpInductionRecipe, VPScalarIVStepsRecipe,
347 VPExpandSCEVRecipe>(Def->getDefiningRecipe())) &&
348 "unexpected recipe found to be invariant");
349 IsSingleScalar = true;
350 LastLane = 0;
351 }
352
353 auto *LastInst = cast<Instruction>(Val: get(Def, Lane: LastLane));
354 // Set the insert point after the last scalarized instruction or after the
355 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
356 // will directly follow the scalar definitions.
357 auto OldIP = Builder.saveIP();
358 auto NewIP = isa<PHINode>(Val: LastInst)
359 ? LastInst->getParent()->getFirstNonPHIIt()
360 : std::next(x: BasicBlock::iterator(LastInst));
361 Builder.SetInsertPoint(&*NewIP);
362
363 // However, if we are vectorizing, we need to construct the vector values.
364 // If the value is known to be uniform after vectorization, we can just
365 // broadcast the scalar value corresponding to lane zero. Otherwise, we
366 // construct the vector values using insertelement instructions. Since the
367 // resulting vectors are stored in State, we will only generate the
368 // insertelements once.
369 Value *VectorValue = nullptr;
370 if (IsSingleScalar) {
371 VectorValue = GetBroadcastInstrs(ScalarValue);
372 set(Def, V: VectorValue);
373 } else {
374 assert(!VF.isScalable() && "VF is assumed to be non scalable.");
375 // Initialize packing with insertelements to start from poison.
376 VectorValue = PoisonValue::get(T: toVectorizedTy(Ty: LastInst->getType(), EC: VF));
377 for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
378 VectorValue = packScalarIntoVectorizedValue(Def, WideValue: VectorValue, Lane);
379 set(Def, V: VectorValue);
380 }
381 Builder.restoreIP(IP: OldIP);
382 return VectorValue;
383}
384
385void VPTransformState::setDebugLocFrom(DebugLoc DL) {
386 const DILocation *DIL = DL;
387 // When a FSDiscriminator is enabled, we don't need to add the multiply
388 // factors to the discriminators.
389 if (DIL &&
390 Builder.GetInsertBlock()
391 ->getParent()
392 ->shouldEmitDebugInfoForProfiling() &&
393 !EnableFSDiscriminator) {
394 // FIXME: For scalable vectors, assume vscale=1.
395 unsigned UF = Plan->getUF();
396 auto NewDIL =
397 DIL->cloneByMultiplyingDuplicationFactor(DF: UF * VF.getKnownMinValue());
398 if (NewDIL)
399 Builder.SetCurrentDebugLocation(*NewDIL);
400 else
401 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
402 << DIL->getFilename() << " Line: " << DIL->getLine());
403 } else
404 Builder.SetCurrentDebugLocation(DL);
405}
406
407Value *VPTransformState::packScalarIntoVectorizedValue(const VPValue *Def,
408 Value *WideValue,
409 const VPLane &Lane) {
410 Value *ScalarInst = get(Def, Lane);
411 Value *LaneExpr = Lane.getAsRuntimeExpr(Builder, VF);
412 if (auto *StructTy = dyn_cast<StructType>(Val: WideValue->getType())) {
413 // We must handle each element of a vectorized struct type.
414 for (unsigned I = 0, E = StructTy->getNumElements(); I != E; I++) {
415 Value *ScalarValue = Builder.CreateExtractValue(Agg: ScalarInst, Idxs: I);
416 Value *VectorValue = Builder.CreateExtractValue(Agg: WideValue, Idxs: I);
417 VectorValue =
418 Builder.CreateInsertElement(Vec: VectorValue, NewElt: ScalarValue, Idx: LaneExpr);
419 WideValue = Builder.CreateInsertValue(Agg: WideValue, Val: VectorValue, Idxs: I);
420 }
421 } else {
422 WideValue = Builder.CreateInsertElement(Vec: WideValue, NewElt: ScalarInst, Idx: LaneExpr);
423 }
424 return WideValue;
425}
426
427BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
428 auto &CFG = State.CFG;
429 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
430 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
431 BasicBlock *PrevBB = CFG.PrevBB;
432 BasicBlock *NewBB = BasicBlock::Create(Context&: PrevBB->getContext(), Name: getName(),
433 Parent: PrevBB->getParent(), InsertBefore: CFG.ExitBB);
434 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
435
436 return NewBB;
437}
438
439void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
440 auto &CFG = State.CFG;
441 BasicBlock *NewBB = CFG.VPBB2IRBB[this];
442
443 // Register NewBB in its loop. In innermost loops its the same for all
444 // BB's.
445 Loop *ParentLoop = State.CurrentParentLoop;
446 // If this block has a sole successor that is an exit block or is an exit
447 // block itself then it needs adding to the same parent loop as the exit
448 // block.
449 VPBlockBase *SuccOrExitVPB = getSingleSuccessor();
450 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB : this;
451 if (State.Plan->isExitBlock(VPBB: SuccOrExitVPB)) {
452 ParentLoop = State.LI->getLoopFor(
453 BB: cast<VPIRBasicBlock>(Val: SuccOrExitVPB)->getIRBasicBlock());
454 }
455
456 if (ParentLoop && !State.LI->getLoopFor(BB: NewBB))
457 ParentLoop->addBasicBlockToLoop(NewBB, LI&: *State.LI);
458
459 SmallVector<VPBlockBase *> Preds;
460 if (VPBlockUtils::isHeader(VPB: this, VPDT: State.VPDT)) {
461 // There's no block for the latch yet, connect to the preheader only.
462 Preds = {getPredecessors()[0]};
463 } else {
464 Preds = to_vector(Range&: getPredecessors());
465 }
466
467 // Hook up the new basic block to its predecessors.
468 for (VPBlockBase *PredVPBlock : Preds) {
469 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
470 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
471 assert(CFG.VPBB2IRBB.contains(PredVPBB) &&
472 "Predecessor basic-block not found building successor.");
473 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
474 auto *PredBBTerminator = PredBB->getTerminator();
475 LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n');
476
477 auto *TermBr = dyn_cast<BranchInst>(Val: PredBBTerminator);
478 if (isa<UnreachableInst>(Val: PredBBTerminator)) {
479 assert(PredVPSuccessors.size() == 1 &&
480 "Predecessor ending w/o branch must have single successor.");
481 DebugLoc DL = PredBBTerminator->getDebugLoc();
482 PredBBTerminator->eraseFromParent();
483 auto *Br = BranchInst::Create(IfTrue: NewBB, InsertBefore: PredBB);
484 Br->setDebugLoc(DL);
485 } else if (TermBr && !TermBr->isConditional()) {
486 TermBr->setSuccessor(idx: 0, NewSucc: NewBB);
487 } else {
488 // Set each forward successor here when it is created, excluding
489 // backedges. A backward successor is set when the branch is created.
490 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
491 assert((TermBr && (!TermBr->getSuccessor(idx) ||
492 (isa<VPIRBasicBlock>(this) &&
493 TermBr->getSuccessor(idx) == NewBB))) &&
494 "Trying to reset an existing successor block.");
495 TermBr->setSuccessor(idx, NewSucc: NewBB);
496 }
497 CFG.DTU.applyUpdates(Updates: {{DominatorTree::Insert, PredBB, NewBB}});
498 }
499}
500
501void VPIRBasicBlock::execute(VPTransformState *State) {
502 assert(getHierarchicalSuccessors().size() <= 2 &&
503 "VPIRBasicBlock can have at most two successors at the moment!");
504 State->Builder.SetInsertPoint(IRBB->getTerminator());
505 State->CFG.PrevBB = IRBB;
506 State->CFG.VPBB2IRBB[this] = IRBB;
507 executeRecipes(State, BB: IRBB);
508 // Create a branch instruction to terminate IRBB if one was not created yet
509 // and is needed.
510 if (getSingleSuccessor() && isa<UnreachableInst>(Val: IRBB->getTerminator())) {
511 auto *Br = State->Builder.CreateBr(Dest: IRBB);
512 Br->setOperand(i_nocapture: 0, Val_nocapture: nullptr);
513 IRBB->getTerminator()->eraseFromParent();
514 } else {
515 assert(
516 (getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
517 "other blocks must be terminated by a branch");
518 }
519
520 connectToPredecessors(State&: *State);
521}
522
523VPIRBasicBlock *VPIRBasicBlock::clone() {
524 auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
525 for (VPRecipeBase &R : Recipes)
526 NewBlock->appendRecipe(Recipe: R.clone());
527 return NewBlock;
528}
529
530void VPBasicBlock::execute(VPTransformState *State) {
531 bool Replica = bool(State->Lane);
532 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
533
534 if (VPBlockUtils::isHeader(VPB: this, VPDT: State->VPDT)) {
535 // Create and register the new vector loop.
536 Loop *PrevParentLoop = State->CurrentParentLoop;
537 State->CurrentParentLoop = State->LI->AllocateLoop();
538
539 // Insert the new loop into the loop nest and register the new basic blocks
540 // before calling any utilities such as SCEV that require valid LoopInfo.
541 if (PrevParentLoop)
542 PrevParentLoop->addChildLoop(NewChild: State->CurrentParentLoop);
543 else
544 State->LI->addTopLevelLoop(New: State->CurrentParentLoop);
545 }
546
547 auto IsReplicateRegion = [](VPBlockBase *BB) {
548 auto *R = dyn_cast_or_null<VPRegionBlock>(Val: BB);
549 assert((!R || R->isReplicator()) &&
550 "only replicate region blocks should remain");
551 return R;
552 };
553 // 1. Create an IR basic block.
554 if ((Replica && this == getParent()->getEntry()) ||
555 IsReplicateRegion(getSingleHierarchicalPredecessor())) {
556 // Reuse the previous basic block if the current VPBB is either
557 // * the entry to a replicate region, or
558 // * the exit of a replicate region.
559 State->CFG.VPBB2IRBB[this] = NewBB;
560 } else {
561 NewBB = createEmptyBasicBlock(State&: *State);
562
563 State->Builder.SetInsertPoint(NewBB);
564 // Temporarily terminate with unreachable until CFG is rewired.
565 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
566 State->Builder.SetInsertPoint(Terminator);
567
568 State->CFG.PrevBB = NewBB;
569 State->CFG.VPBB2IRBB[this] = NewBB;
570 connectToPredecessors(State&: *State);
571 }
572
573 // 2. Fill the IR basic block with IR instructions.
574 executeRecipes(State, BB: NewBB);
575
576 // If this block is a latch, update CurrentParentLoop.
577 if (VPBlockUtils::isLatch(VPB: this, VPDT: State->VPDT))
578 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
579}
580
581VPBasicBlock *VPBasicBlock::clone() {
582 auto *NewBlock = getPlan()->createVPBasicBlock(Name: getName());
583 for (VPRecipeBase &R : *this)
584 NewBlock->appendRecipe(Recipe: R.clone());
585 return NewBlock;
586}
587
588void VPBasicBlock::executeRecipes(VPTransformState *State, BasicBlock *BB) {
589 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName()
590 << " in BB: " << BB->getName() << '\n');
591
592 State->CFG.PrevVPBB = this;
593
594 for (VPRecipeBase &Recipe : Recipes) {
595 State->setDebugLocFrom(Recipe.getDebugLoc());
596 Recipe.execute(State&: *State);
597 }
598
599 LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB);
600}
601
602VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
603 assert((SplitAt == end() || SplitAt->getParent() == this) &&
604 "can only split at a position in the same block");
605
606 // Create new empty block after the block to split.
607 auto *SplitBlock = getPlan()->createVPBasicBlock(Name: getName() + ".split");
608 VPBlockUtils::insertBlockAfter(NewBlock: SplitBlock, BlockPtr: this);
609
610 // Finally, move the recipes starting at SplitAt to new block.
611 for (VPRecipeBase &ToMove :
612 make_early_inc_range(Range: make_range(x: SplitAt, y: this->end())))
613 ToMove.moveBefore(BB&: *SplitBlock, I: SplitBlock->end());
614
615 return SplitBlock;
616}
617
618/// Return the enclosing loop region for region \p P. The templated version is
619/// used to support both const and non-const block arguments.
620template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
621 if (P && P->isReplicator()) {
622 P = P->getParent();
623 // Multiple loop regions can be nested, but replicate regions can only be
624 // nested inside a loop region or must be outside any other region.
625 assert((!P || !P->isReplicator()) && "unexpected nested replicate regions");
626 }
627 return P;
628}
629
630VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
631 return getEnclosingLoopRegionForRegion(P: getParent());
632}
633
634const VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() const {
635 return getEnclosingLoopRegionForRegion(P: getParent());
636}
637
638static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
639 if (VPBB->empty()) {
640 assert(
641 VPBB->getNumSuccessors() < 2 &&
642 "block with multiple successors doesn't have a recipe as terminator");
643 return false;
644 }
645
646 const VPRecipeBase *R = &VPBB->back();
647 bool IsSwitch = isa<VPInstruction>(Val: R) &&
648 cast<VPInstruction>(Val: R)->getOpcode() == Instruction::Switch;
649 bool IsCondBranch = isa<VPBranchOnMaskRecipe>(Val: R) ||
650 match(V: R, P: m_BranchOnCond(Op0: m_VPValue())) ||
651 match(V: R, P: m_BranchOnCount(Op0: m_VPValue(), Op1: m_VPValue()));
652 (void)IsCondBranch;
653 (void)IsSwitch;
654 if (VPBB->getNumSuccessors() == 2 ||
655 (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
656 assert((IsCondBranch || IsSwitch) &&
657 "block with multiple successors not terminated by "
658 "conditional branch nor switch recipe");
659
660 return true;
661 }
662
663 if (VPBB->getNumSuccessors() > 2) {
664 assert(IsSwitch && "block with more than 2 successors not terminated by "
665 "a switch recipe");
666 return true;
667 }
668
669 assert(
670 !IsCondBranch &&
671 "block with 0 or 1 successors terminated by conditional branch recipe");
672 return false;
673}
674
675VPRecipeBase *VPBasicBlock::getTerminator() {
676 if (hasConditionalTerminator(VPBB: this))
677 return &back();
678 return nullptr;
679}
680
681const VPRecipeBase *VPBasicBlock::getTerminator() const {
682 if (hasConditionalTerminator(VPBB: this))
683 return &back();
684 return nullptr;
685}
686
687bool VPBasicBlock::isExiting() const {
688 return getParent() && getParent()->getExitingBasicBlock() == this;
689}
690
691#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
692void VPBlockBase::print(raw_ostream &O) const {
693 VPSlotTracker SlotTracker(getPlan());
694 print(O, "", SlotTracker);
695}
696
697void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
698 if (getSuccessors().empty()) {
699 O << Indent << "No successors\n";
700 } else {
701 O << Indent << "Successor(s): ";
702 ListSeparator LS;
703 for (auto *Succ : getSuccessors())
704 O << LS << Succ->getName();
705 O << '\n';
706 }
707}
708
709void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
710 VPSlotTracker &SlotTracker) const {
711 O << Indent << getName() << ":\n";
712
713 auto RecipeIndent = Indent + " ";
714 for (const VPRecipeBase &Recipe : *this) {
715 Recipe.print(O, RecipeIndent, SlotTracker);
716 O << '\n';
717 }
718
719 printSuccessors(O, Indent);
720}
721#endif
722
723static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
724
725// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
726// the blocks and their recipes. Operands of cloned recipes will NOT be updated.
727// Remapping of operands must be done separately. Returns a pair with the new
728// entry and exiting blocks of the cloned region. If \p Entry isn't part of a
729// region, return nullptr for the exiting block.
730static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
731 DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
732 VPBlockBase *Exiting = nullptr;
733 bool InRegion = Entry->getParent();
734 // First, clone blocks reachable from Entry.
735 for (VPBlockBase *BB : vp_depth_first_shallow(G: Entry)) {
736 VPBlockBase *NewBB = BB->clone();
737 Old2NewVPBlocks[BB] = NewBB;
738 if (InRegion && BB->getNumSuccessors() == 0) {
739 assert(!Exiting && "Multiple exiting blocks?");
740 Exiting = BB;
741 }
742 }
743 assert((!InRegion || Exiting) && "regions must have a single exiting block");
744
745 // Second, update the predecessors & successors of the cloned blocks.
746 for (VPBlockBase *BB : vp_depth_first_shallow(G: Entry)) {
747 VPBlockBase *NewBB = Old2NewVPBlocks[BB];
748 SmallVector<VPBlockBase *> NewPreds;
749 for (VPBlockBase *Pred : BB->getPredecessors()) {
750 NewPreds.push_back(Elt: Old2NewVPBlocks[Pred]);
751 }
752 NewBB->setPredecessors(NewPreds);
753 SmallVector<VPBlockBase *> NewSuccs;
754 for (VPBlockBase *Succ : BB->successors()) {
755 NewSuccs.push_back(Elt: Old2NewVPBlocks[Succ]);
756 }
757 NewBB->setSuccessors(NewSuccs);
758 }
759
760#if !defined(NDEBUG)
761 // Verify that the order of predecessors and successors matches in the cloned
762 // version.
763 for (const auto &[OldBB, NewBB] :
764 zip(vp_depth_first_shallow(Entry),
765 vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
766 for (const auto &[OldPred, NewPred] :
767 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
768 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
769
770 for (const auto &[OldSucc, NewSucc] :
771 zip(OldBB->successors(), NewBB->successors()))
772 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
773 }
774#endif
775
776 return std::make_pair(x&: Old2NewVPBlocks[Entry],
777 y: Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
778}
779
780VPRegionBlock *VPRegionBlock::clone() {
781 const auto &[NewEntry, NewExiting] = cloneFrom(Entry: getEntry());
782 auto *NewRegion = getPlan()->createVPRegionBlock(Entry: NewEntry, Exiting: NewExiting,
783 Name: getName(), IsReplicator: isReplicator());
784 for (VPBlockBase *Block : vp_depth_first_shallow(G: NewEntry))
785 Block->setParent(NewRegion);
786 return NewRegion;
787}
788
789void VPRegionBlock::execute(VPTransformState *State) {
790 assert(isReplicator() &&
791 "Loop regions should have been lowered to plain CFG");
792 assert(!State->Lane && "Replicating a Region with non-null instance.");
793 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
794
795 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
796 Entry);
797 State->Lane = VPLane(0);
798 for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
799 State->Lane = VPLane(Lane, VPLane::Kind::First);
800 // Visit the VPBlocks connected to \p this, starting from it.
801 for (VPBlockBase *Block : RPOT) {
802 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
803 Block->execute(State);
804 }
805 }
806
807 // Exit replicating mode.
808 State->Lane.reset();
809}
810
811InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) {
812 InstructionCost Cost = 0;
813 for (VPRecipeBase &R : Recipes)
814 Cost += R.cost(VF, Ctx);
815 return Cost;
816}
817
818const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
819 const VPBlockBase *Pred = nullptr;
820 if (getNumPredecessors() > 0) {
821 Pred = getPredecessors()[Idx];
822 } else {
823 auto *Region = getParent();
824 assert(Region && !Region->isReplicator() && Region->getEntry() == this &&
825 "must be in the entry block of a non-replicate region");
826 assert(Idx < 2 && Region->getNumPredecessors() == 1 &&
827 "loop region has a single predecessor (preheader), its entry block "
828 "has 2 incoming blocks");
829
830 // Idx == 0 selects the predecessor of the region, Idx == 1 selects the
831 // region itself whose exiting block feeds the phi across the backedge.
832 Pred = Idx == 0 ? Region->getSinglePredecessor() : Region;
833 }
834 return Pred->getExitingBasicBlock();
835}
836
837InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) {
838 if (!isReplicator()) {
839 InstructionCost Cost = 0;
840 for (VPBlockBase *Block : vp_depth_first_shallow(G: getEntry()))
841 Cost += Block->cost(VF, Ctx);
842 InstructionCost BackedgeCost =
843 ForceTargetInstructionCost.getNumOccurrences()
844 ? InstructionCost(ForceTargetInstructionCost.getNumOccurrences())
845 : Ctx.TTI.getCFInstrCost(Opcode: Instruction::Br, CostKind: Ctx.CostKind);
846 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
847 << ": vector loop backedge\n");
848 Cost += BackedgeCost;
849 return Cost;
850 }
851
852 // Compute the cost of a replicate region. Replicating isn't supported for
853 // scalable vectors, return an invalid cost for them.
854 // TODO: Discard scalable VPlans with replicate recipes earlier after
855 // construction.
856 if (VF.isScalable())
857 return InstructionCost::getInvalid();
858
859 // First compute the cost of the conditionally executed recipes, followed by
860 // account for the branching cost, except if the mask is a header mask or
861 // uniform condition.
862 using namespace llvm::VPlanPatternMatch;
863 VPBasicBlock *Then = cast<VPBasicBlock>(Val: getEntry()->getSuccessors()[0]);
864 InstructionCost ThenCost = Then->cost(VF, Ctx);
865
866 // For the scalar case, we may not always execute the original predicated
867 // block, Thus, scale the block's cost by the probability of executing it.
868 if (VF.isScalar())
869 return ThenCost / getPredBlockCostDivisor(CostKind: Ctx.CostKind);
870
871 return ThenCost;
872}
873
874#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
875void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
876 VPSlotTracker &SlotTracker) const {
877 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
878 auto NewIndent = Indent + " ";
879 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
880 O << '\n';
881 BlockBase->print(O, NewIndent, SlotTracker);
882 }
883 O << Indent << "}\n";
884
885 printSuccessors(O, Indent);
886}
887#endif
888
889void VPRegionBlock::dissolveToCFGLoop() {
890 auto *Header = cast<VPBasicBlock>(Val: getEntry());
891 if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(Val: &Header->front())) {
892 assert(this == getPlan()->getVectorLoopRegion() &&
893 "Canonical IV must be in the entry of the top-level loop region");
894 auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
895 IncomingValues: {CanIV->getStartValue(), CanIV->getBackedgeValue()},
896 DL: CanIV->getDebugLoc(), Name: "index");
897 CanIV->replaceAllUsesWith(New: ScalarR);
898 CanIV->eraseFromParent();
899 }
900
901 VPBlockBase *Preheader = getSinglePredecessor();
902 auto *ExitingLatch = cast<VPBasicBlock>(Val: getExiting());
903 VPBlockBase *Middle = getSingleSuccessor();
904 VPBlockUtils::disconnectBlocks(From: Preheader, To: this);
905 VPBlockUtils::disconnectBlocks(From: this, To: Middle);
906
907 for (VPBlockBase *VPB : vp_depth_first_shallow(G: Entry))
908 VPB->setParent(getParent());
909
910 VPBlockUtils::connectBlocks(From: Preheader, To: Header);
911 VPBlockUtils::connectBlocks(From: ExitingLatch, To: Middle);
912 VPBlockUtils::connectBlocks(From: ExitingLatch, To: Header);
913}
914
915VPlan::VPlan(Loop *L) {
916 setEntry(createVPIRBasicBlock(IRBB: L->getLoopPreheader()));
917 ScalarHeader = createVPIRBasicBlock(IRBB: L->getHeader());
918
919 SmallVector<BasicBlock *> IRExitBlocks;
920 L->getUniqueExitBlocks(ExitBlocks&: IRExitBlocks);
921 for (BasicBlock *EB : IRExitBlocks)
922 ExitBlocks.push_back(Elt: createVPIRBasicBlock(IRBB: EB));
923}
924
925VPlan::~VPlan() {
926 VPValue DummyValue;
927
928 for (auto *VPB : CreatedBlocks) {
929 if (auto *VPBB = dyn_cast<VPBasicBlock>(Val: VPB)) {
930 // Replace all operands of recipes and all VPValues defined in VPBB with
931 // DummyValue so the block can be deleted.
932 for (VPRecipeBase &R : *VPBB) {
933 for (auto *Def : R.definedValues())
934 Def->replaceAllUsesWith(New: &DummyValue);
935
936 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
937 R.setOperand(I, New: &DummyValue);
938 }
939 }
940 delete VPB;
941 }
942 for (VPValue *VPV : getLiveIns())
943 delete VPV;
944 if (BackedgeTakenCount)
945 delete BackedgeTakenCount;
946}
947
948void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
949 VPTransformState &State) {
950 Type *TCTy = TripCountV->getType();
951 // Check if the backedge taken count is needed, and if so build it.
952 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
953 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
954 auto *TCMO = Builder.CreateSub(LHS: TripCountV, RHS: ConstantInt::get(Ty: TCTy, V: 1),
955 Name: "trip.count.minus.1");
956 BackedgeTakenCount->setUnderlyingValue(TCMO);
957 }
958
959 VectorTripCount.setUnderlyingValue(VectorTripCountV);
960
961 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
962 // FIXME: Model VF * UF computation completely in VPlan.
963 unsigned UF = getUF();
964 if (VF.getNumUsers()) {
965 Value *RuntimeVF = getRuntimeVF(B&: Builder, Ty: TCTy, VF: State.VF);
966 VF.setUnderlyingValue(RuntimeVF);
967 VFxUF.setUnderlyingValue(
968 UF > 1 ? Builder.CreateMul(LHS: RuntimeVF, RHS: ConstantInt::get(Ty: TCTy, V: UF))
969 : RuntimeVF);
970 } else {
971 VFxUF.setUnderlyingValue(createStepForVF(B&: Builder, Ty: TCTy, VF: State.VF, Step: UF));
972 }
973}
974
975VPIRBasicBlock *VPlan::getExitBlock(BasicBlock *IRBB) const {
976 auto Iter = find_if(Range: getExitBlocks(), P: [IRBB](const VPIRBasicBlock *VPIRBB) {
977 return VPIRBB->getIRBasicBlock() == IRBB;
978 });
979 assert(Iter != getExitBlocks().end() && "no exit block found");
980 return *Iter;
981}
982
983bool VPlan::isExitBlock(VPBlockBase *VPBB) {
984 return is_contained(Range&: ExitBlocks, Element: VPBB);
985}
986
987/// Generate the code inside the preheader and body of the vectorized loop.
988/// Assumes a single pre-header basic-block was created for this. Introduce
989/// additional basic-blocks as needed, and fill them all.
990void VPlan::execute(VPTransformState *State) {
991 // Initialize CFG state.
992 State->CFG.PrevVPBB = nullptr;
993 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
994
995 // Update VPDominatorTree since VPBasicBlock may be removed after State was
996 // constructed.
997 State->VPDT.recalculate(Func&: *this);
998
999 // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
1000 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
1001 cast<BranchInst>(Val: VectorPreHeader->getTerminator())->setSuccessor(idx: 0, NewSucc: nullptr);
1002 State->CFG.DTU.applyUpdates(
1003 Updates: {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
1004
1005 LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
1006 << ", UF=" << getUF() << '\n');
1007 setName("Final VPlan");
1008 LLVM_DEBUG(dump());
1009
1010 // Disconnect scalar preheader and scalar header, as the dominator tree edge
1011 // will be updated as part of VPlan execution. This allows keeping the DTU
1012 // logic generic during VPlan execution.
1013 BasicBlock *ScalarPh = State->CFG.ExitBB;
1014 State->CFG.DTU.applyUpdates(
1015 Updates: {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
1016
1017 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
1018 Entry);
1019 // Generate code for the VPlan, in parts of the vector skeleton, loop body and
1020 // successor blocks including the middle, exit and scalar preheader blocks.
1021 for (VPBlockBase *Block : RPOT)
1022 Block->execute(State);
1023
1024 State->CFG.DTU.flush();
1025
1026 VPBasicBlock *Header = vputils::getFirstLoopHeader(Plan&: *this, VPDT&: State->VPDT);
1027 if (!Header)
1028 return;
1029
1030 auto *LatchVPBB = cast<VPBasicBlock>(Val: Header->getPredecessors()[1]);
1031 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1032
1033 // Fix the latch value of canonical, reduction and first-order recurrences
1034 // phis in the vector loop.
1035 for (VPRecipeBase &R : Header->phis()) {
1036 // Skip phi-like recipes that generate their backedege values themselves.
1037 if (isa<VPWidenPHIRecipe>(Val: &R))
1038 continue;
1039
1040 if (auto *WidenPhi = dyn_cast<VPWidenPointerInductionRecipe>(Val: &R)) {
1041 assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1042 "recipe generating only scalars should have been replaced");
1043 auto *GEP = cast<GetElementPtrInst>(Val: State->get(Def: WidenPhi));
1044 PHINode *Phi = cast<PHINode>(Val: GEP->getPointerOperand());
1045
1046 Phi->setIncomingBlock(i: 1, BB: VectorLatchBB);
1047
1048 // Move the last step to the end of the latch block. This ensures
1049 // consistent placement of all induction updates.
1050 Instruction *Inc = cast<Instruction>(Val: Phi->getIncomingValue(i: 1));
1051 Inc->moveBefore(InsertPos: std::prev(x: VectorLatchBB->getTerminator()->getIterator()));
1052 continue;
1053 }
1054
1055 auto *PhiR = cast<VPSingleDefRecipe>(Val: &R);
1056 // VPInstructions currently model scalar Phis only.
1057 bool NeedsScalar = isa<VPInstruction>(Val: PhiR) ||
1058 (isa<VPReductionPHIRecipe>(Val: PhiR) &&
1059 cast<VPReductionPHIRecipe>(Val: PhiR)->isInLoop());
1060
1061 Value *Phi = State->get(Def: PhiR, NeedsScalar);
1062 // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1063 // not.
1064 Value *Val = State->get(Def: PhiR->getOperand(N: 1), NeedsScalar);
1065 cast<PHINode>(Val: Phi)->addIncoming(V: Val, BB: VectorLatchBB);
1066 }
1067}
1068
1069InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
1070 // For now only return the cost of the vector loop region, ignoring any other
1071 // blocks, like the preheader or middle blocks.
1072 InstructionCost Cost = getVectorLoopRegion()->cost(VF, Ctx);
1073
1074 // If any instructions in the middle block are invalid return invalid.
1075 // TODO: Remove once no VPlans with VF == vscale x 1 and first-order recurrences are created.
1076 if (!getMiddleBlock()->cost(VF, Ctx).isValid())
1077 return InstructionCost::getInvalid();
1078
1079 return Cost;
1080}
1081
1082VPRegionBlock *VPlan::getVectorLoopRegion() {
1083 // TODO: Cache if possible.
1084 for (VPBlockBase *B : vp_depth_first_shallow(G: getEntry()))
1085 if (auto *R = dyn_cast<VPRegionBlock>(Val: B))
1086 return R->isReplicator() ? nullptr : R;
1087 return nullptr;
1088}
1089
1090const VPRegionBlock *VPlan::getVectorLoopRegion() const {
1091 for (const VPBlockBase *B : vp_depth_first_shallow(G: getEntry()))
1092 if (auto *R = dyn_cast<VPRegionBlock>(Val: B))
1093 return R->isReplicator() ? nullptr : R;
1094 return nullptr;
1095}
1096
1097#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1098void VPlan::printLiveIns(raw_ostream &O) const {
1099 VPSlotTracker SlotTracker(this);
1100
1101 if (VF.getNumUsers() > 0) {
1102 O << "\nLive-in ";
1103 VF.printAsOperand(O, SlotTracker);
1104 O << " = VF";
1105 }
1106
1107 if (VFxUF.getNumUsers() > 0) {
1108 O << "\nLive-in ";
1109 VFxUF.printAsOperand(O, SlotTracker);
1110 O << " = VF * UF";
1111 }
1112
1113 if (VectorTripCount.getNumUsers() > 0) {
1114 O << "\nLive-in ";
1115 VectorTripCount.printAsOperand(O, SlotTracker);
1116 O << " = vector-trip-count";
1117 }
1118
1119 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1120 O << "\nLive-in ";
1121 BackedgeTakenCount->printAsOperand(O, SlotTracker);
1122 O << " = backedge-taken count";
1123 }
1124
1125 O << "\n";
1126 if (TripCount) {
1127 if (TripCount->isLiveIn())
1128 O << "Live-in ";
1129 TripCount->printAsOperand(O, SlotTracker);
1130 O << " = original trip-count";
1131 O << "\n";
1132 }
1133}
1134
1135LLVM_DUMP_METHOD
1136void VPlan::print(raw_ostream &O) const {
1137 VPSlotTracker SlotTracker(this);
1138
1139 O << "VPlan '" << getName() << "' {";
1140
1141 printLiveIns(O);
1142
1143 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<const VPBlockBase *>>
1144 RPOT(getEntry());
1145 for (const VPBlockBase *Block : RPOT) {
1146 O << '\n';
1147 Block->print(O, "", SlotTracker);
1148 }
1149
1150 O << "}\n";
1151}
1152
1153std::string VPlan::getName() const {
1154 std::string Out;
1155 raw_string_ostream RSO(Out);
1156 RSO << Name << " for ";
1157 if (!VFs.empty()) {
1158 RSO << "VF={" << VFs[0];
1159 for (ElementCount VF : drop_begin(VFs))
1160 RSO << "," << VF;
1161 RSO << "},";
1162 }
1163
1164 if (UFs.empty()) {
1165 RSO << "UF>=1";
1166 } else {
1167 RSO << "UF={" << UFs[0];
1168 for (unsigned UF : drop_begin(UFs))
1169 RSO << "," << UF;
1170 RSO << "}";
1171 }
1172
1173 return Out;
1174}
1175
1176LLVM_DUMP_METHOD
1177void VPlan::printDOT(raw_ostream &O) const {
1178 VPlanPrinter Printer(O, *this);
1179 Printer.dump();
1180}
1181
1182LLVM_DUMP_METHOD
1183void VPlan::dump() const { print(dbgs()); }
1184#endif
1185
1186static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1187 DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1188 // Update the operands of all cloned recipes starting at NewEntry. This
1189 // traverses all reachable blocks. This is done in two steps, to handle cycles
1190 // in PHI recipes.
1191 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1192 OldDeepRPOT(Entry);
1193 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1194 NewDeepRPOT(NewEntry);
1195 // First, collect all mappings from old to new VPValues defined by cloned
1196 // recipes.
1197 for (const auto &[OldBB, NewBB] :
1198 zip(t: VPBlockUtils::blocksOnly<VPBasicBlock>(Range: OldDeepRPOT),
1199 u: VPBlockUtils::blocksOnly<VPBasicBlock>(Range: NewDeepRPOT))) {
1200 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1201 "blocks must have the same number of recipes");
1202 for (const auto &[OldR, NewR] : zip(t&: *OldBB, u&: *NewBB)) {
1203 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1204 "recipes must have the same number of operands");
1205 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1206 "recipes must define the same number of operands");
1207 for (const auto &[OldV, NewV] :
1208 zip(t: OldR.definedValues(), u: NewR.definedValues()))
1209 Old2NewVPValues[OldV] = NewV;
1210 }
1211 }
1212
1213 // Update all operands to use cloned VPValues.
1214 for (VPBasicBlock *NewBB :
1215 VPBlockUtils::blocksOnly<VPBasicBlock>(Range: NewDeepRPOT)) {
1216 for (VPRecipeBase &NewR : *NewBB)
1217 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1218 VPValue *NewOp = Old2NewVPValues.lookup(Val: NewR.getOperand(N: I));
1219 NewR.setOperand(I, New: NewOp);
1220 }
1221 }
1222}
1223
1224VPlan *VPlan::duplicate() {
1225 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1226 // Clone blocks.
1227 const auto &[NewEntry, __] = cloneFrom(Entry);
1228
1229 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1230 VPIRBasicBlock *NewScalarHeader = nullptr;
1231 if (getScalarHeader()->getNumPredecessors() == 0) {
1232 NewScalarHeader = createVPIRBasicBlock(IRBB: ScalarHeaderIRBB);
1233 } else {
1234 NewScalarHeader = cast<VPIRBasicBlock>(Val: *find_if(
1235 Range: vp_depth_first_shallow(G: NewEntry), P: [ScalarHeaderIRBB](VPBlockBase *VPB) {
1236 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(Val: VPB);
1237 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1238 }));
1239 }
1240 // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1241 auto *NewPlan = new VPlan(cast<VPBasicBlock>(Val: NewEntry), NewScalarHeader);
1242 DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1243 for (VPValue *OldLiveIn : getLiveIns()) {
1244 Old2NewVPValues[OldLiveIn] =
1245 NewPlan->getOrAddLiveIn(V: OldLiveIn->getLiveInIRValue());
1246 }
1247 Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1248 Old2NewVPValues[&VF] = &NewPlan->VF;
1249 Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1250 if (BackedgeTakenCount) {
1251 NewPlan->BackedgeTakenCount = new VPValue();
1252 Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1253 }
1254 if (TripCount && TripCount->isLiveIn())
1255 Old2NewVPValues[TripCount] =
1256 NewPlan->getOrAddLiveIn(V: TripCount->getLiveInIRValue());
1257 // else NewTripCount will be created and inserted into Old2NewVPValues when
1258 // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1259
1260 remapOperands(Entry, NewEntry, Old2NewVPValues);
1261
1262 // Initialize remaining fields of cloned VPlan.
1263 NewPlan->VFs = VFs;
1264 NewPlan->UFs = UFs;
1265 // TODO: Adjust names.
1266 NewPlan->Name = Name;
1267 if (TripCount) {
1268 assert(Old2NewVPValues.contains(TripCount) &&
1269 "TripCount must have been added to Old2NewVPValues");
1270 NewPlan->TripCount = Old2NewVPValues[TripCount];
1271 }
1272
1273 // Transfer all cloned blocks (the second half of all current blocks) from
1274 // current to new VPlan.
1275 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1276 for (unsigned I :
1277 seq<unsigned>(Begin: NumBlocksBeforeCloning, End: NumBlocksAfterCloning))
1278 NewPlan->CreatedBlocks.push_back(Elt: this->CreatedBlocks[I]);
1279 CreatedBlocks.truncate(N: NumBlocksBeforeCloning);
1280
1281 // Update ExitBlocks of the new plan.
1282 for (VPBlockBase *VPB : NewPlan->CreatedBlocks) {
1283 if (VPB->getNumSuccessors() == 0 && isa<VPIRBasicBlock>(Val: VPB) &&
1284 VPB != NewScalarHeader)
1285 NewPlan->ExitBlocks.push_back(Elt: cast<VPIRBasicBlock>(Val: VPB));
1286 }
1287
1288 return NewPlan;
1289}
1290
1291VPIRBasicBlock *VPlan::createEmptyVPIRBasicBlock(BasicBlock *IRBB) {
1292 auto *VPIRBB = new VPIRBasicBlock(IRBB);
1293 CreatedBlocks.push_back(Elt: VPIRBB);
1294 return VPIRBB;
1295}
1296
1297VPIRBasicBlock *VPlan::createVPIRBasicBlock(BasicBlock *IRBB) {
1298 auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
1299 for (Instruction &I :
1300 make_range(x: IRBB->begin(), y: IRBB->getTerminator()->getIterator()))
1301 VPIRBB->appendRecipe(Recipe: VPIRInstruction::create(I));
1302 return VPIRBB;
1303}
1304
1305#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1306
1307Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1308 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1309 Twine(getOrCreateBID(Block));
1310}
1311
1312Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1313 const std::string &Name = Block->getName();
1314 if (!Name.empty())
1315 return Name;
1316 return "VPB" + Twine(getOrCreateBID(Block));
1317}
1318
1319void VPlanPrinter::dump() {
1320 Depth = 1;
1321 bumpIndent(0);
1322 OS << "digraph VPlan {\n";
1323 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1324 if (!Plan.getName().empty())
1325 OS << "\\n" << DOT::EscapeString(Plan.getName());
1326
1327 {
1328 // Print live-ins.
1329 std::string Str;
1330 raw_string_ostream SS(Str);
1331 Plan.printLiveIns(SS);
1332 SmallVector<StringRef, 0> Lines;
1333 StringRef(Str).rtrim('\n').split(Lines, "\n");
1334 for (auto Line : Lines)
1335 OS << DOT::EscapeString(Line.str()) << "\\n";
1336 }
1337
1338 OS << "\"]\n";
1339 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1340 OS << "edge [fontname=Courier, fontsize=30]\n";
1341 OS << "compound=true\n";
1342
1343 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1344 dumpBlock(Block);
1345
1346 OS << "}\n";
1347}
1348
1349void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1350 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
1351 dumpBasicBlock(BasicBlock);
1352 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1353 dumpRegion(Region);
1354 else
1355 llvm_unreachable("Unsupported kind of VPBlock.");
1356}
1357
1358void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1359 bool Hidden, const Twine &Label) {
1360 // Due to "dot" we print an edge between two regions as an edge between the
1361 // exiting basic block and the entry basic of the respective regions.
1362 const VPBlockBase *Tail = From->getExitingBasicBlock();
1363 const VPBlockBase *Head = To->getEntryBasicBlock();
1364 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1365 OS << " [ label=\"" << Label << '\"';
1366 if (Tail != From)
1367 OS << " ltail=" << getUID(From);
1368 if (Head != To)
1369 OS << " lhead=" << getUID(To);
1370 if (Hidden)
1371 OS << "; splines=none";
1372 OS << "]\n";
1373}
1374
1375void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1376 auto &Successors = Block->getSuccessors();
1377 if (Successors.size() == 1)
1378 drawEdge(Block, Successors.front(), false, "");
1379 else if (Successors.size() == 2) {
1380 drawEdge(Block, Successors.front(), false, "T");
1381 drawEdge(Block, Successors.back(), false, "F");
1382 } else {
1383 unsigned SuccessorNumber = 0;
1384 for (auto *Successor : Successors)
1385 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1386 }
1387}
1388
1389void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1390 // Implement dot-formatted dump by performing plain-text dump into the
1391 // temporary storage followed by some post-processing.
1392 OS << Indent << getUID(BasicBlock) << " [label =\n";
1393 bumpIndent(1);
1394 std::string Str;
1395 raw_string_ostream SS(Str);
1396 // Use no indentation as we need to wrap the lines into quotes ourselves.
1397 BasicBlock->print(SS, "", SlotTracker);
1398
1399 // We need to process each line of the output separately, so split
1400 // single-string plain-text dump.
1401 SmallVector<StringRef, 0> Lines;
1402 StringRef(Str).rtrim('\n').split(Lines, "\n");
1403
1404 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1405 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1406 };
1407
1408 // Don't need the "+" after the last line.
1409 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1410 EmitLine(Line, " +\n");
1411 EmitLine(Lines.back(), "\n");
1412
1413 bumpIndent(-1);
1414 OS << Indent << "]\n";
1415
1416 dumpEdges(BasicBlock);
1417}
1418
1419void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1420 OS << Indent << "subgraph " << getUID(Region) << " {\n";
1421 bumpIndent(1);
1422 OS << Indent << "fontname=Courier\n"
1423 << Indent << "label=\""
1424 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1425 << DOT::EscapeString(Region->getName()) << "\"\n";
1426 // Dump the blocks of the region.
1427 assert(Region->getEntry() && "Region contains no inner blocks.");
1428 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1429 dumpBlock(Block);
1430 bumpIndent(-1);
1431 OS << Indent << "}\n";
1432 dumpEdges(Region);
1433}
1434
1435#endif
1436
1437/// Returns true if there is a vector loop region and \p VPV is defined in a
1438/// loop region.
1439static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1440 const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1441 return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1442 DefR->getParent()->getEnclosingLoopRegion());
1443}
1444
1445bool VPValue::isDefinedOutsideLoopRegions() const {
1446 return !isDefinedInsideLoopRegions(VPV: this);
1447}
1448void VPValue::replaceAllUsesWith(VPValue *New) {
1449 replaceUsesWithIf(New, ShouldReplace: [](VPUser &, unsigned) { return true; });
1450}
1451
1452void VPValue::replaceUsesWithIf(
1453 VPValue *New,
1454 llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1455 // Note that this early exit is required for correctness; the implementation
1456 // below relies on the number of users for this VPValue to decrease, which
1457 // isn't the case if this == New.
1458 if (this == New)
1459 return;
1460
1461 for (unsigned J = 0; J < getNumUsers();) {
1462 VPUser *User = Users[J];
1463 bool RemovedUser = false;
1464 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1465 if (User->getOperand(N: I) != this || !ShouldReplace(*User, I))
1466 continue;
1467
1468 RemovedUser = true;
1469 User->setOperand(I, New);
1470 }
1471 // If a user got removed after updating the current user, the next user to
1472 // update will be moved to the current position, so we only need to
1473 // increment the index if the number of users did not change.
1474 if (!RemovedUser)
1475 J++;
1476 }
1477}
1478
1479void VPUser::replaceUsesOfWith(VPValue *From, VPValue *To) {
1480 for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) {
1481 if (getOperand(N: Idx) == From)
1482 setOperand(I: Idx, New: To);
1483 }
1484}
1485
1486#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1487void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
1488 OS << Tracker.getOrCreateName(this);
1489}
1490
1491void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
1492 interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1493 Op->printAsOperand(O, SlotTracker);
1494 });
1495}
1496#endif
1497
1498void VPSlotTracker::assignName(const VPValue *V) {
1499 assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1500 auto *UV = V->getUnderlyingValue();
1501 auto *VPI = dyn_cast_or_null<VPInstruction>(Val: V->getDefiningRecipe());
1502 if (!UV && !(VPI && !VPI->getName().empty())) {
1503 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1504 NextSlot++;
1505 return;
1506 }
1507
1508 // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1509 // appending ".Number" to the name if there are multiple uses.
1510 std::string Name;
1511 if (UV)
1512 Name = getName(V: UV);
1513 else
1514 Name = VPI->getName();
1515
1516 assert(!Name.empty() && "Name cannot be empty.");
1517 StringRef Prefix = UV ? "ir<" : "vp<%";
1518 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1519
1520 // First assign the base name for V.
1521 const auto &[A, _] = VPValue2Name.insert(KV: {V, BaseName});
1522 // Integer or FP constants with different types will result in he same string
1523 // due to stripping types.
1524 if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(Val: UV))
1525 return;
1526
1527 // If it is already used by C > 0 other VPValues, increase the version counter
1528 // C and use it for V.
1529 const auto &[C, UseInserted] = BaseName2Version.insert(KV: {BaseName, 0});
1530 if (!UseInserted) {
1531 C->second++;
1532 A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1533 }
1534}
1535
1536void VPSlotTracker::assignNames(const VPlan &Plan) {
1537 if (Plan.VF.getNumUsers() > 0)
1538 assignName(V: &Plan.VF);
1539 if (Plan.VFxUF.getNumUsers() > 0)
1540 assignName(V: &Plan.VFxUF);
1541 assignName(V: &Plan.VectorTripCount);
1542 if (Plan.BackedgeTakenCount)
1543 assignName(V: Plan.BackedgeTakenCount);
1544 for (VPValue *LI : Plan.getLiveIns())
1545 assignName(V: LI);
1546
1547 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1548 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1549 for (const VPBasicBlock *VPBB :
1550 VPBlockUtils::blocksOnly<const VPBasicBlock>(Range: RPOT))
1551 assignNames(VPBB);
1552}
1553
1554void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1555 for (const VPRecipeBase &Recipe : *VPBB)
1556 for (VPValue *Def : Recipe.definedValues())
1557 assignName(V: Def);
1558}
1559
1560std::string VPSlotTracker::getName(const Value *V) {
1561 std::string Name;
1562 raw_string_ostream S(Name);
1563 if (V->hasName() || !isa<Instruction>(Val: V)) {
1564 V->printAsOperand(O&: S, PrintType: false);
1565 return Name;
1566 }
1567
1568 if (!MST) {
1569 // Lazily create the ModuleSlotTracker when we first hit an unnamed
1570 // instruction.
1571 auto *I = cast<Instruction>(Val: V);
1572 // This check is required to support unit tests with incomplete IR.
1573 if (I->getParent()) {
1574 MST = std::make_unique<ModuleSlotTracker>(args: I->getModule());
1575 MST->incorporateFunction(F: *I->getFunction());
1576 } else {
1577 MST = std::make_unique<ModuleSlotTracker>(args: nullptr);
1578 }
1579 }
1580 V->printAsOperand(O&: S, PrintType: false, MST&: *MST);
1581 return Name;
1582}
1583
1584std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1585 std::string Name = VPValue2Name.lookup(Val: V);
1586 if (!Name.empty())
1587 return Name;
1588
1589 // If no name was assigned, no VPlan was provided when creating the slot
1590 // tracker or it is not reachable from the provided VPlan. This can happen,
1591 // e.g. when trying to print a recipe that has not been inserted into a VPlan
1592 // in a debugger.
1593 // TODO: Update VPSlotTracker constructor to assign names to recipes &
1594 // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1595 // here.
1596 const VPRecipeBase *DefR = V->getDefiningRecipe();
1597 (void)DefR;
1598 assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1599 "VPValue defined by a recipe in a VPlan?");
1600
1601 // Use the underlying value's name, if there is one.
1602 if (auto *UV = V->getUnderlyingValue()) {
1603 std::string Name;
1604 raw_string_ostream S(Name);
1605 UV->printAsOperand(O&: S, PrintType: false);
1606 return (Twine("ir<") + Name + ">").str();
1607 }
1608
1609 return "<badref>";
1610}
1611
1612bool LoopVectorizationPlanner::getDecisionAndClampRange(
1613 const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1614 assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1615 bool PredicateAtRangeStart = Predicate(Range.Start);
1616
1617 for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1618 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1619 Range.End = TmpVF;
1620 break;
1621 }
1622
1623 return PredicateAtRangeStart;
1624}
1625
1626/// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF,
1627/// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range
1628/// of VF's starting at a given VF and extending it as much as possible. Each
1629/// vectorization decision can potentially shorten this sub-range during
1630/// buildVPlan().
1631void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
1632 ElementCount MaxVF) {
1633 auto MaxVFTimes2 = MaxVF * 2;
1634 for (ElementCount VF = MinVF; ElementCount::isKnownLT(LHS: VF, RHS: MaxVFTimes2);) {
1635 VFRange SubRange = {VF, MaxVFTimes2};
1636 if (auto Plan = tryToBuildVPlan(Range&: SubRange)) {
1637 VPlanTransforms::optimize(Plan&: *Plan);
1638 // Update the name of the latch of the top-level vector loop region region
1639 // after optimizations which includes block folding.
1640 Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
1641 VPlans.push_back(Elt: std::move(Plan));
1642 }
1643 VF = SubRange.End;
1644 }
1645}
1646
1647VPlan &LoopVectorizationPlanner::getPlanFor(ElementCount VF) const {
1648 assert(count_if(VPlans,
1649 [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1650 1 &&
1651 "Multiple VPlans for VF.");
1652
1653 for (const VPlanPtr &Plan : VPlans) {
1654 if (Plan->hasVF(VF))
1655 return *Plan.get();
1656 }
1657 llvm_unreachable("No plan found!");
1658}
1659
1660#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1661void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
1662 if (VPlans.empty()) {
1663 O << "LV: No VPlans built.\n";
1664 return;
1665 }
1666 for (const auto &Plan : VPlans)
1667 if (PrintVPlansInDotFormat)
1668 Plan->printDOT(O);
1669 else
1670 Plan->print(O);
1671}
1672#endif
1673
1674TargetTransformInfo::OperandValueInfo
1675VPCostContext::getOperandInfo(VPValue *V) const {
1676 if (!V->isLiveIn())
1677 return {};
1678
1679 return TTI::getOperandInfo(V: V->getLiveInIRValue());
1680}
1681