1//===-- VPlanUnroll.cpp - VPlan unroller ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements explicit unrolling for VPlans.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPRecipeBuilder.h"
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanCFG.h"
18#include "VPlanHelpers.h"
19#include "VPlanPatternMatch.h"
20#include "VPlanTransforms.h"
21#include "VPlanUtils.h"
22#include "llvm/ADT/PostOrderIterator.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/ScopeExit.h"
25#include "llvm/Analysis/IVDescriptors.h"
26#include "llvm/IR/Intrinsics.h"
27
28using namespace llvm;
29using namespace llvm::VPlanPatternMatch;
30
31namespace {
32
33/// Helper to hold state needed for unrolling. It holds the Plan to unroll by
34/// UF. It also holds copies of VPValues across UF-1 unroll parts to facilitate
35/// the unrolling transformation, where the original VPValues are retained for
36/// part zero.
37class UnrollState {
38 /// Plan to unroll.
39 VPlan &Plan;
40 /// Unroll factor to unroll by.
41 const unsigned UF;
42 /// Analysis for types.
43 VPTypeAnalysis TypeInfo;
44
45 /// Unrolling may create recipes that should not be unrolled themselves.
46 /// Those are tracked in ToSkip.
47 SmallPtrSet<VPRecipeBase *, 8> ToSkip;
48
49 // Associate with each VPValue of part 0 its unrolled instances of parts 1,
50 // ..., UF-1.
51 DenseMap<VPValue *, SmallVector<VPValue *>> VPV2Parts;
52
53 /// Unroll replicate region \p VPR by cloning the region UF - 1 times.
54 void unrollReplicateRegionByUF(VPRegionBlock *VPR);
55
56 /// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
57 /// all parts.
58 void unrollRecipeByUF(VPRecipeBase &R);
59
60 /// Unroll header phi recipe \p R. How exactly the recipe gets unrolled
61 /// depends on the concrete header phi. Inserts newly created recipes at \p
62 /// InsertPtForPhi.
63 void unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
64 VPBasicBlock::iterator InsertPtForPhi);
65
66 /// Unroll a widen induction recipe \p IV. This introduces recipes to compute
67 /// the induction steps for each part.
68 void unrollWidenInductionByUF(VPWidenIntOrFpInductionRecipe *IV,
69 VPBasicBlock::iterator InsertPtForPhi);
70
71 VPValue *getConstantVPV(unsigned Part) {
72 Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
73 return Plan.getOrAddLiveIn(V: ConstantInt::get(Ty: CanIVIntTy, V: Part));
74 }
75
76public:
77 UnrollState(VPlan &Plan, unsigned UF, LLVMContext &Ctx)
78 : Plan(Plan), UF(UF), TypeInfo(Plan.getCanonicalIV()->getScalarType()) {}
79
80 void unrollBlock(VPBlockBase *VPB);
81
82 VPValue *getValueForPart(VPValue *V, unsigned Part) {
83 if (Part == 0 || V->isLiveIn())
84 return V;
85 assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) &&
86 "accessed value does not exist");
87 return VPV2Parts[V][Part - 1];
88 }
89
90 /// Given a single original recipe \p OrigR (of part zero), and its copy \p
91 /// CopyR for part \p Part, map every VPValue defined by \p OrigR to its
92 /// corresponding VPValue defined by \p CopyR.
93 void addRecipeForPart(VPRecipeBase *OrigR, VPRecipeBase *CopyR,
94 unsigned Part) {
95 for (const auto &[Idx, VPV] : enumerate(First: OrigR->definedValues())) {
96 auto Ins = VPV2Parts.insert(KV: {VPV, {}});
97 assert(Ins.first->second.size() == Part - 1 && "earlier parts not set");
98 Ins.first->second.push_back(Elt: CopyR->getVPValue(I: Idx));
99 }
100 }
101
102 /// Given a uniform recipe \p R, add it for all parts.
103 void addUniformForAllParts(VPSingleDefRecipe *R) {
104 auto Ins = VPV2Parts.insert(KV: {R, {}});
105 assert(Ins.second && "uniform value already added");
106 for (unsigned Part = 0; Part != UF; ++Part)
107 Ins.first->second.push_back(Elt: R);
108 }
109
110 bool contains(VPValue *VPV) const { return VPV2Parts.contains(Val: VPV); }
111
112 /// Update \p R's operand at \p OpIdx with its corresponding VPValue for part
113 /// \p P.
114 void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {
115 auto *Op = R->getOperand(N: OpIdx);
116 R->setOperand(I: OpIdx, New: getValueForPart(V: Op, Part));
117 }
118
119 /// Update \p R's operands with their corresponding VPValues for part \p P.
120 void remapOperands(VPRecipeBase *R, unsigned Part) {
121 for (const auto &[OpIdx, Op] : enumerate(First: R->operands()))
122 R->setOperand(I: OpIdx, New: getValueForPart(V: Op, Part));
123 }
124};
125} // namespace
126
127void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
128 VPBlockBase *InsertPt = VPR->getSingleSuccessor();
129 for (unsigned Part = 1; Part != UF; ++Part) {
130 auto *Copy = VPR->clone();
131 VPBlockUtils::insertBlockBefore(NewBlock: Copy, BlockPtr: InsertPt);
132
133 auto PartI = vp_depth_first_shallow(G: Copy->getEntry());
134 auto Part0 = vp_depth_first_shallow(G: VPR->getEntry());
135 for (const auto &[PartIVPBB, Part0VPBB] :
136 zip(t: VPBlockUtils::blocksOnly<VPBasicBlock>(Range: PartI),
137 u: VPBlockUtils::blocksOnly<VPBasicBlock>(Range: Part0))) {
138 for (const auto &[PartIR, Part0R] : zip(t&: *PartIVPBB, u&: *Part0VPBB)) {
139 remapOperands(R: &PartIR, Part);
140 if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(Val: &PartIR)) {
141 ScalarIVSteps->addOperand(Operand: getConstantVPV(Part));
142 }
143
144 addRecipeForPart(OrigR: &Part0R, CopyR: &PartIR, Part);
145 }
146 }
147 }
148}
149
150void UnrollState::unrollWidenInductionByUF(
151 VPWidenIntOrFpInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) {
152 VPBasicBlock *PH = cast<VPBasicBlock>(
153 Val: IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
154 Type *IVTy = TypeInfo.inferScalarType(V: IV);
155 auto &ID = IV->getInductionDescriptor();
156 VPIRFlags Flags;
157 if (isa_and_present<FPMathOperator>(Val: ID.getInductionBinOp()))
158 Flags = ID.getInductionBinOp()->getFastMathFlags();
159
160 VPValue *ScalarStep = IV->getStepValue();
161 VPBuilder Builder(PH);
162 VPInstruction *VectorStep = Builder.createNaryOp(
163 Opcode: VPInstruction::WideIVStep, Operands: {&Plan.getVF(), ScalarStep}, ResultTy: IVTy, Flags,
164 DL: IV->getDebugLoc());
165
166 ToSkip.insert(Ptr: VectorStep);
167
168 // Now create recipes to compute the induction steps for part 1 .. UF. Part 0
169 // remains the header phi. Parts > 0 are computed by adding Step to the
170 // previous part. The header phi recipe will get 2 new operands: the step
171 // value for a single part and the last part, used to compute the backedge
172 // value during VPWidenIntOrFpInductionRecipe::execute. %Part.0 =
173 // VPWidenIntOrFpInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3
174 // %Part.1 = %Part.0 + %VectorStep
175 // %Part.2 = %Part.1 + %VectorStep
176 // %Part.3 = %Part.2 + %VectorStep
177 //
178 // The newly added recipes are added to ToSkip to avoid interleaving them
179 // again.
180 VPValue *Prev = IV;
181 Builder.setInsertPoint(TheBB: IV->getParent(), IP: InsertPtForPhi);
182 unsigned AddOpc =
183 IVTy->isFloatingPointTy() ? ID.getInductionOpcode() : Instruction::Add;
184 for (unsigned Part = 1; Part != UF; ++Part) {
185 std::string Name =
186 Part > 1 ? "step.add." + std::to_string(val: Part) : "step.add";
187
188 VPInstruction *Add = Builder.createNaryOp(Opcode: AddOpc,
189 Operands: {
190 Prev,
191 VectorStep,
192 },
193 Flags, DL: IV->getDebugLoc(), Name);
194 ToSkip.insert(Ptr: Add);
195 addRecipeForPart(OrigR: IV, CopyR: Add, Part);
196 Prev = Add;
197 }
198 IV->addOperand(Operand: VectorStep);
199 IV->addOperand(Operand: Prev);
200}
201
202void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
203 VPBasicBlock::iterator InsertPtForPhi) {
204 // First-order recurrences pass a single vector or scalar through their header
205 // phis, irrespective of interleaving.
206 if (isa<VPFirstOrderRecurrencePHIRecipe>(Val: R))
207 return;
208
209 // Generate step vectors for each unrolled part.
210 if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(Val: R)) {
211 unrollWidenInductionByUF(IV, InsertPtForPhi);
212 return;
213 }
214
215 auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(Val: R);
216 if (RdxPhi && RdxPhi->isOrdered())
217 return;
218
219 auto InsertPt = std::next(x: R->getIterator());
220 for (unsigned Part = 1; Part != UF; ++Part) {
221 VPRecipeBase *Copy = R->clone();
222 Copy->insertBefore(BB&: *R->getParent(), IP: InsertPt);
223 addRecipeForPart(OrigR: R, CopyR: Copy, Part);
224 if (isa<VPWidenPointerInductionRecipe>(Val: R)) {
225 Copy->addOperand(Operand: R);
226 Copy->addOperand(Operand: getConstantVPV(Part));
227 } else if (RdxPhi) {
228 // If the start value is a ReductionStartVector, use the identity value
229 // (second operand) for unrolled parts. If the scaling factor is > 1,
230 // create a new ReductionStartVector with the scale factor and both
231 // operands set to the identity value.
232 if (auto *VPI = dyn_cast<VPInstruction>(Val: RdxPhi->getStartValue())) {
233 assert(VPI->getOpcode() == VPInstruction::ReductionStartVector &&
234 "unexpected start VPInstruction");
235 if (Part != 1)
236 continue;
237 VPValue *StartV;
238 if (match(V: VPI->getOperand(N: 2), P: m_SpecificInt(V: 1))) {
239 StartV = VPI->getOperand(N: 1);
240 } else {
241 auto *C = VPI->clone();
242 C->setOperand(I: 0, New: C->getOperand(N: 1));
243 C->insertAfter(InsertPos: VPI);
244 StartV = C;
245 }
246 for (unsigned Part = 1; Part != UF; ++Part)
247 VPV2Parts[VPI][Part - 1] = StartV;
248 }
249 Copy->addOperand(Operand: getConstantVPV(Part));
250 } else {
251 assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&
252 "unexpected header phi recipe not needing unrolled part");
253 }
254 }
255}
256
257/// Handle non-header-phi recipes.
258void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
259 if (match(V: &R, P: m_BranchOnCond(Op0: m_VPValue())) ||
260 match(V: &R, P: m_BranchOnCount(Op0: m_VPValue(), Op1: m_VPValue())))
261 return;
262
263 if (auto *VPI = dyn_cast<VPInstruction>(Val: &R)) {
264 if (vputils::onlyFirstPartUsed(Def: VPI)) {
265 addUniformForAllParts(R: VPI);
266 return;
267 }
268 }
269 if (auto *RepR = dyn_cast<VPReplicateRecipe>(Val: &R)) {
270 if (isa<StoreInst>(Val: RepR->getUnderlyingValue()) &&
271 RepR->getOperand(N: 1)->isDefinedOutsideLoopRegions()) {
272 // Stores to an invariant address only need to store the last part.
273 remapOperands(R: &R, Part: UF - 1);
274 return;
275 }
276 if (auto *II = dyn_cast<IntrinsicInst>(Val: RepR->getUnderlyingValue())) {
277 if (II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) {
278 addUniformForAllParts(R: RepR);
279 return;
280 }
281 }
282 }
283
284 // Unroll non-uniform recipes.
285 auto InsertPt = std::next(x: R.getIterator());
286 VPBasicBlock &VPBB = *R.getParent();
287 for (unsigned Part = 1; Part != UF; ++Part) {
288 VPRecipeBase *Copy = R.clone();
289 Copy->insertBefore(BB&: VPBB, IP: InsertPt);
290 addRecipeForPart(OrigR: &R, CopyR: Copy, Part);
291
292 VPValue *Op;
293 if (match(V: &R, P: m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
294 Op0: m_VPValue(), Op1: m_VPValue(V&: Op)))) {
295 Copy->setOperand(I: 0, New: getValueForPart(V: Op, Part: Part - 1));
296 Copy->setOperand(I: 1, New: getValueForPart(V: Op, Part));
297 continue;
298 }
299 if (auto *Red = dyn_cast<VPReductionRecipe>(Val: &R)) {
300 auto *Phi = dyn_cast<VPReductionPHIRecipe>(Val: R.getOperand(N: 0));
301 if (Phi && Phi->isOrdered()) {
302 auto &Parts = VPV2Parts[Phi];
303 if (Part == 1) {
304 Parts.clear();
305 Parts.push_back(Elt: Red);
306 }
307 Parts.push_back(Elt: Copy->getVPSingleValue());
308 Phi->setOperand(I: 1, New: Copy->getVPSingleValue());
309 }
310 }
311 remapOperands(R: Copy, Part);
312
313 // Add operand indicating the part to generate code for, to recipes still
314 // requiring it.
315 if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
316 VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Val: Copy) ||
317 match(V: Copy, P: m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
318 Op0: m_VPValue())))
319 Copy->addOperand(Operand: getConstantVPV(Part));
320
321 if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Val: R))
322 Copy->setOperand(I: 0, New: R.getOperand(N: 0));
323 }
324}
325
326void UnrollState::unrollBlock(VPBlockBase *VPB) {
327 auto *VPR = dyn_cast<VPRegionBlock>(Val: VPB);
328 if (VPR) {
329 if (VPR->isReplicator())
330 return unrollReplicateRegionByUF(VPR);
331
332 // Traverse blocks in region in RPO to ensure defs are visited before uses
333 // across blocks.
334 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
335 RPOT(VPR->getEntry());
336 for (VPBlockBase *VPB : RPOT)
337 unrollBlock(VPB);
338 return;
339 }
340
341 // VPB is a VPBasicBlock; unroll it, i.e., unroll its recipes.
342 auto *VPBB = cast<VPBasicBlock>(Val: VPB);
343 auto InsertPtForPhi = VPBB->getFirstNonPhi();
344 for (VPRecipeBase &R : make_early_inc_range(Range&: *VPBB)) {
345 if (ToSkip.contains(Ptr: &R) || isa<VPIRInstruction>(Val: &R))
346 continue;
347
348 // Add all VPValues for all parts to AnyOf, FirstActiveLaneMask and
349 // Compute*Result which combine all parts to compute the final value.
350 VPValue *Op1;
351 if (match(V: &R, P: m_VPInstruction<VPInstruction::AnyOf>(Op0: m_VPValue(V&: Op1))) ||
352 match(V: &R, P: m_VPInstruction<VPInstruction::FirstActiveLane>(
353 Op0: m_VPValue(V&: Op1))) ||
354 match(V: &R, P: m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
355 Op0: m_VPValue(), Op1: m_VPValue(), Op2: m_VPValue(V&: Op1))) ||
356 match(V: &R, P: m_VPInstruction<VPInstruction::ComputeReductionResult>(
357 Op0: m_VPValue(), Op1: m_VPValue(V&: Op1))) ||
358 match(V: &R, P: m_VPInstruction<VPInstruction::ComputeFindIVResult>(
359 Op0: m_VPValue(), Op1: m_VPValue(), Op2: m_VPValue(), Op3: m_VPValue(V&: Op1)))) {
360 addUniformForAllParts(R: cast<VPInstruction>(Val: &R));
361 for (unsigned Part = 1; Part != UF; ++Part)
362 R.addOperand(Operand: getValueForPart(V: Op1, Part));
363 continue;
364 }
365 VPValue *Op0;
366 if (match(V: &R, P: m_VPInstruction<VPInstruction::ExtractLastElement>(
367 Op0: m_VPValue(V&: Op0))) ||
368 match(V: &R, P: m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
369 Op0: m_VPValue(V&: Op0)))) {
370 addUniformForAllParts(R: cast<VPSingleDefRecipe>(Val: &R));
371 if (Plan.hasScalarVFOnly()) {
372 auto *I = cast<VPInstruction>(Val: &R);
373 // Extracting from end with VF = 1 implies retrieving the last or
374 // penultimate scalar part (UF-1 or UF-2).
375 unsigned Offset =
376 I->getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
377 I->replaceAllUsesWith(New: getValueForPart(V: Op0, Part: UF - Offset));
378 R.eraseFromParent();
379 } else {
380 // Otherwise we extract from the last part.
381 remapOperands(R: &R, Part: UF - 1);
382 }
383 continue;
384 }
385
386 auto *SingleDef = dyn_cast<VPSingleDefRecipe>(Val: &R);
387 if (SingleDef && vputils::isUniformAcrossVFsAndUFs(V: SingleDef)) {
388 addUniformForAllParts(R: SingleDef);
389 continue;
390 }
391
392 if (auto *H = dyn_cast<VPHeaderPHIRecipe>(Val: &R)) {
393 unrollHeaderPHIByUF(R: H, InsertPtForPhi);
394 continue;
395 }
396
397 unrollRecipeByUF(R);
398 }
399}
400
401void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
402 assert(UF > 0 && "Unroll factor must be positive");
403 Plan.setUF(UF);
404 auto Cleanup = make_scope_exit(F: [&Plan]() {
405 auto Iter = vp_depth_first_deep(G: Plan.getEntry());
406 // Remove recipes that are redundant after unrolling.
407 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Range: Iter)) {
408 for (VPRecipeBase &R : make_early_inc_range(Range&: *VPBB)) {
409 auto *VPI = dyn_cast<VPInstruction>(Val: &R);
410 if (VPI &&
411 VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart &&
412 VPI->getNumOperands() == 1) {
413 VPI->replaceAllUsesWith(New: VPI->getOperand(N: 0));
414 VPI->eraseFromParent();
415 }
416 }
417 }
418 });
419 if (UF == 1) {
420 return;
421 }
422
423 UnrollState Unroller(Plan, UF, Ctx);
424
425 // Iterate over all blocks in the plan starting from Entry, and unroll
426 // recipes inside them. This includes the vector preheader and middle blocks,
427 // which may set up or post-process per-part values.
428 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
429 Plan.getEntry());
430 for (VPBlockBase *VPB : RPOT)
431 Unroller.unrollBlock(VPB);
432
433 unsigned Part = 1;
434 // Remap operands of cloned header phis to update backedge values. The header
435 // phis cloned during unrolling are just after the header phi for part 0.
436 // Reset Part to 1 when reaching the first (part 0) recipe of a block.
437 for (VPRecipeBase &H :
438 Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
439 // The second operand of Fixed Order Recurrence phi's, feeding the spliced
440 // value across the backedge, needs to remap to the last part of the spliced
441 // value.
442 if (isa<VPFirstOrderRecurrencePHIRecipe>(Val: &H)) {
443 Unroller.remapOperand(R: &H, OpIdx: 1, Part: UF - 1);
444 continue;
445 }
446 if (Unroller.contains(VPV: H.getVPSingleValue()) ||
447 isa<VPWidenPointerInductionRecipe>(Val: &H)) {
448 Part = 1;
449 continue;
450 }
451 Unroller.remapOperands(R: &H, Part);
452 Part++;
453 }
454
455 VPlanTransforms::removeDeadRecipes(Plan);
456}
457
458/// Create a single-scalar clone of \p RepR for lane \p Lane.
459static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
460 Type *IdxTy, VPReplicateRecipe *RepR,
461 VPLane Lane) {
462 // Collect the operands at Lane, creating extracts as needed.
463 SmallVector<VPValue *> NewOps;
464 for (VPValue *Op : RepR->operands()) {
465 if (vputils::isSingleScalar(VPV: Op)) {
466 NewOps.push_back(Elt: Op);
467 continue;
468 }
469 if (Lane.getKind() == VPLane::Kind::ScalableLast) {
470 NewOps.push_back(
471 Elt: Builder.createNaryOp(Opcode: VPInstruction::ExtractLastElement, Operands: {Op}));
472 continue;
473 }
474 // Look through buildvector to avoid unnecessary extracts.
475 if (match(V: Op, P: m_BuildVector())) {
476 NewOps.push_back(
477 Elt: cast<VPInstruction>(Val: Op)->getOperand(N: Lane.getKnownLane()));
478 continue;
479 }
480 VPValue *Idx =
481 Plan.getOrAddLiveIn(V: ConstantInt::get(Ty: IdxTy, V: Lane.getKnownLane()));
482 VPValue *Ext = Builder.createNaryOp(Opcode: Instruction::ExtractElement, Operands: {Op, Idx});
483 NewOps.push_back(Elt: Ext);
484 }
485
486 auto *New =
487 new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
488 /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
489 New->insertBefore(InsertPos: RepR);
490 return New;
491}
492
493void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
494 Type *IdxTy = IntegerType::get(
495 C&: Plan.getScalarHeader()->getIRBasicBlock()->getContext(), NumBits: 32);
496
497 // Visit all VPBBs outside the loop region and directly inside the top-level
498 // loop region.
499 auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
500 Range: vp_depth_first_shallow(G: Plan.getEntry()));
501 auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
502 Range: vp_depth_first_shallow(G: Plan.getVectorLoopRegion()->getEntry()));
503 auto VPBBsToUnroll =
504 concat<VPBasicBlock *>(Ranges&: VPBBsOutsideLoopRegion, Ranges&: VPBBsInsideLoopRegion);
505 for (VPBasicBlock *VPBB : VPBBsToUnroll) {
506 for (VPRecipeBase &R : make_early_inc_range(Range&: *VPBB)) {
507 auto *RepR = dyn_cast<VPReplicateRecipe>(Val: &R);
508 if (!RepR || RepR->isSingleScalar())
509 continue;
510
511 VPBuilder Builder(RepR);
512 if (RepR->getNumUsers() == 0) {
513 if (isa<StoreInst>(Val: RepR->getUnderlyingInstr()) &&
514 vputils::isSingleScalar(VPV: RepR->getOperand(N: 1))) {
515 // Stores to invariant addresses need to store the last lane only.
516 cloneForLane(Plan, Builder, IdxTy, RepR,
517 Lane: VPLane::getLastLaneForVF(VF));
518 } else {
519 // Create single-scalar version of RepR for all lanes.
520 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
521 cloneForLane(Plan, Builder, IdxTy, RepR, Lane: VPLane(I));
522 }
523 RepR->eraseFromParent();
524 continue;
525 }
526 /// Create single-scalar version of RepR for all lanes.
527 SmallVector<VPValue *> LaneDefs;
528 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
529 LaneDefs.push_back(Elt: cloneForLane(Plan, Builder, IdxTy, RepR, Lane: VPLane(I)));
530
531 /// Users that only demand the first lane can use the definition for lane
532 /// 0.
533 RepR->replaceUsesWithIf(New: LaneDefs[0], ShouldReplace: [RepR](VPUser &U, unsigned) {
534 return U.onlyFirstLaneUsed(Op: RepR);
535 });
536
537 // If needed, create a Build(Struct)Vector recipe to insert the scalar
538 // lane values into a vector.
539 Type *ResTy = RepR->getUnderlyingInstr()->getType();
540 VPValue *VecRes = Builder.createNaryOp(
541 Opcode: ResTy->isStructTy() ? VPInstruction::BuildStructVector
542 : VPInstruction::BuildVector,
543 Operands: LaneDefs);
544 RepR->replaceAllUsesWith(New: VecRes);
545 RepR->eraseFromParent();
546 }
547 }
548}
549