1 | //===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements transforms for initial VPlan construction. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "LoopVectorizationPlanner.h" |
15 | #include "VPlan.h" |
16 | #include "VPlanCFG.h" |
17 | #include "VPlanDominatorTree.h" |
18 | #include "VPlanPatternMatch.h" |
19 | #include "VPlanTransforms.h" |
20 | #include "llvm/Analysis/LoopInfo.h" |
21 | #include "llvm/Analysis/LoopIterator.h" |
22 | #include "llvm/Analysis/ScalarEvolution.h" |
23 | |
24 | #define DEBUG_TYPE "vplan" |
25 | |
26 | using namespace llvm; |
27 | |
28 | namespace { |
29 | // Class that is used to build the plain CFG for the incoming IR. |
30 | class PlainCFGBuilder { |
31 | // The outermost loop of the input loop nest considered for vectorization. |
32 | Loop *TheLoop; |
33 | |
34 | // Loop Info analysis. |
35 | LoopInfo *LI; |
36 | |
37 | // Vectorization plan that we are working on. |
38 | std::unique_ptr<VPlan> Plan; |
39 | |
40 | // Builder of the VPlan instruction-level representation. |
41 | VPBuilder VPIRBuilder; |
42 | |
43 | // NOTE: The following maps are intentionally destroyed after the plain CFG |
44 | // construction because subsequent VPlan-to-VPlan transformation may |
45 | // invalidate them. |
46 | // Map incoming BasicBlocks to their newly-created VPBasicBlocks. |
47 | DenseMap<BasicBlock *, VPBasicBlock *> BB2VPBB; |
48 | // Map incoming Value definitions to their newly-created VPValues. |
49 | DenseMap<Value *, VPValue *> IRDef2VPValue; |
50 | |
51 | // Hold phi node's that need to be fixed once the plain CFG has been built. |
52 | SmallVector<PHINode *, 8> PhisToFix; |
53 | |
54 | // Utility functions. |
55 | void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB); |
56 | void fixHeaderPhis(); |
57 | VPBasicBlock *getOrCreateVPBB(BasicBlock *BB); |
58 | #ifndef NDEBUG |
59 | bool isExternalDef(Value *Val); |
60 | #endif |
61 | VPValue *getOrCreateVPOperand(Value *IRVal); |
62 | void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB); |
63 | |
64 | public: |
65 | PlainCFGBuilder(Loop *Lp, LoopInfo *LI) |
66 | : TheLoop(Lp), LI(LI), Plan(std::make_unique<VPlan>(args&: Lp)) {} |
67 | |
68 | /// Build plain CFG for TheLoop and connect it to Plan's entry. |
69 | std::unique_ptr<VPlan> buildPlainCFG(); |
70 | }; |
71 | } // anonymous namespace |
72 | |
73 | // Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB |
74 | // must have no predecessors. |
75 | void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) { |
76 | // Collect VPBB predecessors. |
77 | SmallVector<VPBlockBase *, 2> VPBBPreds; |
78 | for (BasicBlock *Pred : predecessors(BB)) |
79 | VPBBPreds.push_back(Elt: getOrCreateVPBB(BB: Pred)); |
80 | VPBB->setPredecessors(VPBBPreds); |
81 | } |
82 | |
83 | static bool (BasicBlock *BB, Loop *L) { |
84 | return L && BB == L->getHeader(); |
85 | } |
86 | |
87 | // Add operands to VPInstructions representing phi nodes from the input IR. |
88 | void PlainCFGBuilder::() { |
89 | for (auto *Phi : PhisToFix) { |
90 | assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode." ); |
91 | VPValue *VPVal = IRDef2VPValue[Phi]; |
92 | assert(isa<VPWidenPHIRecipe>(VPVal) && |
93 | "Expected WidenPHIRecipe for phi node." ); |
94 | auto *VPPhi = cast<VPWidenPHIRecipe>(Val: VPVal); |
95 | assert(VPPhi->getNumOperands() == 0 && |
96 | "Expected VPInstruction with no operands." ); |
97 | assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) && |
98 | "Expected Phi in header block." ); |
99 | assert(Phi->getNumOperands() == 2 && |
100 | "header phi must have exactly 2 operands" ); |
101 | for (BasicBlock *Pred : predecessors(BB: Phi->getParent())) |
102 | VPPhi->addOperand( |
103 | Operand: getOrCreateVPOperand(IRVal: Phi->getIncomingValueForBlock(BB: Pred))); |
104 | } |
105 | } |
106 | |
107 | // Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an |
108 | // existing one if it was already created. |
109 | VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { |
110 | if (auto *VPBB = BB2VPBB.lookup(Val: BB)) { |
111 | // Retrieve existing VPBB. |
112 | return VPBB; |
113 | } |
114 | |
115 | // Create new VPBB. |
116 | StringRef Name = BB->getName(); |
117 | LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n" ); |
118 | VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name); |
119 | BB2VPBB[BB] = VPBB; |
120 | return VPBB; |
121 | } |
122 | |
123 | #ifndef NDEBUG |
124 | // Return true if \p Val is considered an external definition. An external |
125 | // definition is either: |
126 | // 1. A Value that is not an Instruction. This will be refined in the future. |
127 | // 2. An Instruction that is outside of the IR region represented in VPlan, |
128 | // i.e., is not part of the loop nest. |
129 | bool PlainCFGBuilder::isExternalDef(Value *Val) { |
130 | // All the Values that are not Instructions are considered external |
131 | // definitions for now. |
132 | Instruction *Inst = dyn_cast<Instruction>(Val); |
133 | if (!Inst) |
134 | return true; |
135 | |
136 | // Check whether Instruction definition is in loop body. |
137 | return !TheLoop->contains(Inst); |
138 | } |
139 | #endif |
140 | |
141 | // Create a new VPValue or retrieve an existing one for the Instruction's |
142 | // operand \p IRVal. This function must only be used to create/retrieve VPValues |
143 | // for *Instruction's operands* and not to create regular VPInstruction's. For |
144 | // the latter, please, look at 'createVPInstructionsForVPBB'. |
145 | VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) { |
146 | auto VPValIt = IRDef2VPValue.find(Val: IRVal); |
147 | if (VPValIt != IRDef2VPValue.end()) |
148 | // Operand has an associated VPInstruction or VPValue that was previously |
149 | // created. |
150 | return VPValIt->second; |
151 | |
152 | // Operand doesn't have a previously created VPInstruction/VPValue. This |
153 | // means that operand is: |
154 | // A) a definition external to VPlan, |
155 | // B) any other Value without specific representation in VPlan. |
156 | // For now, we use VPValue to represent A and B and classify both as external |
157 | // definitions. We may introduce specific VPValue subclasses for them in the |
158 | // future. |
159 | assert(isExternalDef(IRVal) && "Expected external definition as operand." ); |
160 | |
161 | // A and B: Create VPValue and add it to the pool of external definitions and |
162 | // to the Value->VPValue map. |
163 | VPValue *NewVPVal = Plan->getOrAddLiveIn(V: IRVal); |
164 | IRDef2VPValue[IRVal] = NewVPVal; |
165 | return NewVPVal; |
166 | } |
167 | |
168 | // Create new VPInstructions in a VPBasicBlock, given its BasicBlock |
169 | // counterpart. This function must be invoked in RPO so that the operands of a |
170 | // VPInstruction in \p BB have been visited before (except for Phi nodes). |
171 | void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, |
172 | BasicBlock *BB) { |
173 | VPIRBuilder.setInsertPoint(VPBB); |
174 | // TODO: Model and preserve debug intrinsics in VPlan. |
175 | for (Instruction &InstRef : BB->instructionsWithoutDebug(SkipPseudoOp: false)) { |
176 | Instruction *Inst = &InstRef; |
177 | |
178 | // There shouldn't be any VPValue for Inst at this point. Otherwise, we |
179 | // visited Inst when we shouldn't, breaking the RPO traversal order. |
180 | assert(!IRDef2VPValue.count(Inst) && |
181 | "Instruction shouldn't have been visited." ); |
182 | |
183 | if (auto *Br = dyn_cast<BranchInst>(Val: Inst)) { |
184 | // Conditional branch instruction are represented using BranchOnCond |
185 | // recipes. |
186 | if (Br->isConditional()) { |
187 | VPValue *Cond = getOrCreateVPOperand(IRVal: Br->getCondition()); |
188 | VPIRBuilder.createNaryOp(Opcode: VPInstruction::BranchOnCond, Operands: {Cond}, Inst); |
189 | } |
190 | |
191 | // Skip the rest of the Instruction processing for Branch instructions. |
192 | continue; |
193 | } |
194 | |
195 | if (auto *SI = dyn_cast<SwitchInst>(Val: Inst)) { |
196 | SmallVector<VPValue *> Ops = {getOrCreateVPOperand(IRVal: SI->getCondition())}; |
197 | for (auto Case : SI->cases()) |
198 | Ops.push_back(Elt: getOrCreateVPOperand(IRVal: Case.getCaseValue())); |
199 | VPIRBuilder.createNaryOp(Opcode: Instruction::Switch, Operands: Ops, Inst); |
200 | continue; |
201 | } |
202 | |
203 | VPSingleDefRecipe *NewR; |
204 | if (auto *Phi = dyn_cast<PHINode>(Val: Inst)) { |
205 | // Phi node's operands may have not been visited at this point. We create |
206 | // an empty VPInstruction that we will fix once the whole plain CFG has |
207 | // been built. |
208 | NewR = new VPWidenPHIRecipe(Phi, nullptr, Phi->getDebugLoc(), "vec.phi" ); |
209 | VPBB->appendRecipe(Recipe: NewR); |
210 | if (isHeaderBB(BB: Phi->getParent(), L: LI->getLoopFor(BB: Phi->getParent()))) { |
211 | // Header phis need to be fixed after the VPBB for the latch has been |
212 | // created. |
213 | PhisToFix.push_back(Elt: Phi); |
214 | } else { |
215 | // Add operands for VPPhi in the order matching its predecessors in |
216 | // VPlan. |
217 | DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue; |
218 | for (unsigned I = 0; I != Phi->getNumOperands(); ++I) { |
219 | VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(i: I)]] = |
220 | getOrCreateVPOperand(IRVal: Phi->getIncomingValue(i: I)); |
221 | } |
222 | for (VPBlockBase *Pred : VPBB->getPredecessors()) |
223 | NewR->addOperand( |
224 | Operand: VPPredToIncomingValue.lookup(Val: Pred->getExitingBasicBlock())); |
225 | } |
226 | } else { |
227 | // Translate LLVM-IR operands into VPValue operands and set them in the |
228 | // new VPInstruction. |
229 | SmallVector<VPValue *, 4> VPOperands; |
230 | for (Value *Op : Inst->operands()) |
231 | VPOperands.push_back(Elt: getOrCreateVPOperand(IRVal: Op)); |
232 | |
233 | // Build VPInstruction for any arbitrary Instruction without specific |
234 | // representation in VPlan. |
235 | NewR = cast<VPInstruction>( |
236 | Val: VPIRBuilder.createNaryOp(Opcode: Inst->getOpcode(), Operands: VPOperands, Inst)); |
237 | } |
238 | |
239 | IRDef2VPValue[Inst] = NewR; |
240 | } |
241 | } |
242 | |
243 | // Main interface to build the plain CFG. |
244 | std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() { |
245 | VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Val: Plan->getEntry()); |
246 | BB2VPBB[Entry->getIRBasicBlock()] = Entry; |
247 | for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks()) |
248 | BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB; |
249 | |
250 | // 1. Scan the body of the loop in a topological order to visit each basic |
251 | // block after having visited its predecessor basic blocks. Create a VPBB for |
252 | // each BB and link it to its successor and predecessor VPBBs. Note that |
253 | // predecessors must be set in the same order as they are in the incomming IR. |
254 | // Otherwise, there might be problems with existing phi nodes and algorithm |
255 | // based on predecessors traversal. |
256 | |
257 | // Loop PH needs to be explicitly visited since it's not taken into account by |
258 | // LoopBlocksDFS. |
259 | BasicBlock * = TheLoop->getLoopPreheader(); |
260 | assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && |
261 | "Unexpected loop preheader" ); |
262 | for (auto &I : *ThePreheaderBB) { |
263 | if (I.getType()->isVoidTy()) |
264 | continue; |
265 | IRDef2VPValue[&I] = Plan->getOrAddLiveIn(V: &I); |
266 | } |
267 | |
268 | LoopBlocksRPO RPO(TheLoop); |
269 | RPO.perform(LI); |
270 | |
271 | for (BasicBlock *BB : RPO) { |
272 | // Create or retrieve the VPBasicBlock for this BB. |
273 | VPBasicBlock *VPBB = getOrCreateVPBB(BB); |
274 | // Set VPBB predecessors in the same order as they are in the incoming BB. |
275 | setVPBBPredsFromBB(VPBB, BB); |
276 | |
277 | // Create VPInstructions for BB. |
278 | createVPInstructionsForVPBB(VPBB, BB); |
279 | |
280 | // Set VPBB successors. We create empty VPBBs for successors if they don't |
281 | // exist already. Recipes will be created when the successor is visited |
282 | // during the RPO traversal. |
283 | if (auto *SI = dyn_cast<SwitchInst>(Val: BB->getTerminator())) { |
284 | SmallVector<VPBlockBase *> Succs = { |
285 | getOrCreateVPBB(BB: SI->getDefaultDest())}; |
286 | for (auto Case : SI->cases()) |
287 | Succs.push_back(Elt: getOrCreateVPBB(BB: Case.getCaseSuccessor())); |
288 | VPBB->setSuccessors(Succs); |
289 | continue; |
290 | } |
291 | auto *BI = cast<BranchInst>(Val: BB->getTerminator()); |
292 | unsigned NumSuccs = succ_size(BB); |
293 | if (NumSuccs == 1) { |
294 | VPBB->setOneSuccessor(getOrCreateVPBB(BB: BB->getSingleSuccessor())); |
295 | continue; |
296 | } |
297 | assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() && |
298 | "block must have conditional branch with 2 successors" ); |
299 | |
300 | BasicBlock *IRSucc0 = BI->getSuccessor(i: 0); |
301 | BasicBlock *IRSucc1 = BI->getSuccessor(i: 1); |
302 | VPBasicBlock *Successor0 = getOrCreateVPBB(BB: IRSucc0); |
303 | VPBasicBlock *Successor1 = getOrCreateVPBB(BB: IRSucc1); |
304 | VPBB->setTwoSuccessors(IfTrue: Successor0, IfFalse: Successor1); |
305 | } |
306 | |
307 | for (auto *EB : Plan->getExitBlocks()) |
308 | setVPBBPredsFromBB(VPBB: EB, BB: EB->getIRBasicBlock()); |
309 | |
310 | // 2. The whole CFG has been built at this point so all the input Values must |
311 | // have a VPlan counterpart. Fix VPlan header phi by adding their |
312 | // corresponding VPlan operands. |
313 | fixHeaderPhis(); |
314 | |
315 | Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(BB: TheLoop->getHeader())); |
316 | Plan->getEntry()->setPlan(&*Plan); |
317 | |
318 | // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the |
319 | // VPIRInstructions wrapping them. |
320 | // // Note that the operand order corresponds to IR predecessor order, and may |
321 | // need adjusting when VPlan predecessors are added, if an exit block has |
322 | // multiple predecessor. |
323 | for (auto *EB : Plan->getExitBlocks()) { |
324 | for (VPRecipeBase &R : EB->phis()) { |
325 | auto *PhiR = cast<VPIRPhi>(Val: &R); |
326 | PHINode &Phi = PhiR->getIRPhi(); |
327 | assert(PhiR->getNumOperands() == 0 && |
328 | "no phi operands should be added yet" ); |
329 | for (BasicBlock *Pred : predecessors(BB: EB->getIRBasicBlock())) |
330 | PhiR->addOperand( |
331 | Operand: getOrCreateVPOperand(IRVal: Phi.getIncomingValueForBlock(BB: Pred))); |
332 | } |
333 | } |
334 | |
335 | LLVM_DEBUG(Plan->setName("Plain CFG\n" ); dbgs() << *Plan); |
336 | return std::move(Plan); |
337 | } |
338 | |
339 | std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(Loop *TheLoop, |
340 | LoopInfo &LI) { |
341 | PlainCFGBuilder Builder(TheLoop, &LI); |
342 | return Builder.buildPlainCFG(); |
343 | } |
344 | |
345 | /// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it |
346 | /// has exactly 2 predecessors (preheader and latch), where the block |
347 | /// dominates the latch and the preheader dominates the block. If it is a |
348 | /// header block return true and canonicalize the predecessors of the header |
349 | /// (making sure the preheader appears first and the latch second) and the |
350 | /// successors of the latch (making sure the loop exit comes first). Otherwise |
351 | /// return false. |
352 | static bool canonicalHeaderAndLatch(VPBlockBase *, |
353 | const VPDominatorTree &VPDT) { |
354 | ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors(); |
355 | if (Preds.size() != 2) |
356 | return false; |
357 | |
358 | auto * = Preds[0]; |
359 | auto *LatchVPBB = Preds[1]; |
360 | if (!VPDT.dominates(A: PreheaderVPBB, B: HeaderVPB) || |
361 | !VPDT.dominates(A: HeaderVPB, B: LatchVPBB)) { |
362 | std::swap(a&: PreheaderVPBB, b&: LatchVPBB); |
363 | |
364 | if (!VPDT.dominates(A: PreheaderVPBB, B: HeaderVPB) || |
365 | !VPDT.dominates(A: HeaderVPB, B: LatchVPBB)) |
366 | return false; |
367 | |
368 | // Canonicalize predecessors of header so that preheader is first and |
369 | // latch second. |
370 | HeaderVPB->swapPredecessors(); |
371 | for (VPRecipeBase &R : cast<VPBasicBlock>(Val: HeaderVPB)->phis()) |
372 | R.swapOperands(); |
373 | } |
374 | |
375 | // The two successors of conditional branch match the condition, with the |
376 | // first successor corresponding to true and the second to false. We |
377 | // canonicalize the successors of the latch when introducing the region, such |
378 | // that the latch exits the region when its condition is true; invert the |
379 | // original condition if the original CFG branches to the header on true. |
380 | // Note that the exit edge is not yet connected for top-level loops. |
381 | if (LatchVPBB->getSingleSuccessor() || |
382 | LatchVPBB->getSuccessors()[0] != HeaderVPB) |
383 | return true; |
384 | |
385 | assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors" ); |
386 | auto *Term = cast<VPBasicBlock>(Val: LatchVPBB)->getTerminator(); |
387 | assert(cast<VPInstruction>(Term)->getOpcode() == |
388 | VPInstruction::BranchOnCond && |
389 | "terminator must be a BranchOnCond" ); |
390 | auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(N: 0)}); |
391 | Not->insertBefore(InsertPos: Term); |
392 | Term->setOperand(I: 0, New: Not); |
393 | LatchVPBB->swapSuccessors(); |
394 | |
395 | return true; |
396 | } |
397 | |
398 | /// Create a new VPRegionBlock for the loop starting at \p HeaderVPB. |
399 | static void createLoopRegion(VPlan &Plan, VPBlockBase *) { |
400 | auto * = HeaderVPB->getPredecessors()[0]; |
401 | auto *LatchVPBB = HeaderVPB->getPredecessors()[1]; |
402 | |
403 | VPBlockUtils::disconnectBlocks(From: PreheaderVPBB, To: HeaderVPB); |
404 | VPBlockUtils::disconnectBlocks(From: LatchVPBB, To: HeaderVPB); |
405 | VPBlockBase *LatchExitVPB = LatchVPBB->getSingleSuccessor(); |
406 | assert(LatchExitVPB && "Latch expected to be left with a single successor" ); |
407 | |
408 | // Create an empty region first and insert it between PreheaderVPBB and |
409 | // LatchExitVPB, taking care to preserve the original predecessor & successor |
410 | // order of blocks. Set region entry and exiting after both HeaderVPB and |
411 | // LatchVPBB have been disconnected from their predecessors/successors. |
412 | auto *R = Plan.createVPRegionBlock(Name: "" , IsReplicator: false /*isReplicator*/); |
413 | VPBlockUtils::insertOnEdge(From: LatchVPBB, To: LatchExitVPB, BlockPtr: R); |
414 | VPBlockUtils::disconnectBlocks(From: LatchVPBB, To: R); |
415 | VPBlockUtils::connectBlocks(From: PreheaderVPBB, To: R); |
416 | R->setEntry(HeaderVPB); |
417 | R->setExiting(LatchVPBB); |
418 | |
419 | // All VPBB's reachable shallowly from HeaderVPB belong to the current region. |
420 | for (VPBlockBase *VPBB : vp_depth_first_shallow(G: HeaderVPB)) |
421 | VPBB->setParent(R); |
422 | } |
423 | |
424 | // Add the necessary canonical IV and branch recipes required to control the |
425 | // loop. |
426 | static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *, |
427 | VPBasicBlock *LatchVPBB, Type *IdxTy, |
428 | DebugLoc DL) { |
429 | using namespace VPlanPatternMatch; |
430 | Value *StartIdx = ConstantInt::get(Ty: IdxTy, V: 0); |
431 | auto *StartV = Plan.getOrAddLiveIn(V: StartIdx); |
432 | |
433 | // Add a VPCanonicalIVPHIRecipe starting at 0 to the header. |
434 | auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL); |
435 | HeaderVPBB->insert(Recipe: CanonicalIVPHI, InsertPt: HeaderVPBB->begin()); |
436 | |
437 | // We are about to replace the branch to exit the region. Remove the original |
438 | // BranchOnCond, if there is any. |
439 | if (!LatchVPBB->empty() && |
440 | match(V: &LatchVPBB->back(), P: m_BranchOnCond(Op0: m_VPValue()))) |
441 | LatchVPBB->getTerminator()->eraseFromParent(); |
442 | |
443 | VPBuilder Builder(LatchVPBB); |
444 | // Add a VPInstruction to increment the scalar canonical IV by VF * UF. |
445 | // Initially the induction increment is guaranteed to not wrap, but that may |
446 | // change later, e.g. when tail-folding, when the flags need to be dropped. |
447 | auto *CanonicalIVIncrement = Builder.createOverflowingOp( |
448 | Opcode: Instruction::Add, Operands: {CanonicalIVPHI, &Plan.getVFxUF()}, WrapFlags: {true, false}, DL, |
449 | Name: "index.next" ); |
450 | CanonicalIVPHI->addOperand(Operand: CanonicalIVIncrement); |
451 | |
452 | // Add the BranchOnCount VPInstruction to the latch. |
453 | Builder.createNaryOp(Opcode: VPInstruction::BranchOnCount, |
454 | Operands: {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); |
455 | } |
456 | |
457 | void VPlanTransforms::prepareForVectorization( |
458 | VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, |
459 | bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop, |
460 | DebugLoc IVDL, bool HasUncountableEarlyExit, VFRange &Range) { |
461 | VPDominatorTree VPDT; |
462 | VPDT.recalculate(Func&: Plan); |
463 | |
464 | VPBlockBase * = Plan.getEntry()->getSingleSuccessor(); |
465 | canonicalHeaderAndLatch(HeaderVPB, VPDT); |
466 | VPBlockBase *LatchVPB = HeaderVPB->getPredecessors()[1]; |
467 | |
468 | VPBasicBlock * = Plan.createVPBasicBlock(Name: "vector.ph" ); |
469 | VPBlockUtils::insertBlockAfter(NewBlock: VecPreheader, BlockPtr: Plan.getEntry()); |
470 | |
471 | VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock(Name: "middle.block" ); |
472 | // The canonical LatchVPB has the header block as last successor. If it has |
473 | // another successor, this successor is an exit block - insert middle block on |
474 | // its edge. Otherwise, add middle block as another successor retaining header |
475 | // as last. |
476 | if (LatchVPB->getNumSuccessors() == 2) { |
477 | VPBlockBase *LatchExitVPB = LatchVPB->getSuccessors()[0]; |
478 | VPBlockUtils::insertOnEdge(From: LatchVPB, To: LatchExitVPB, BlockPtr: MiddleVPBB); |
479 | } else { |
480 | VPBlockUtils::connectBlocks(From: LatchVPB, To: MiddleVPBB); |
481 | LatchVPB->swapSuccessors(); |
482 | } |
483 | |
484 | addCanonicalIVRecipes(Plan, HeaderVPBB: cast<VPBasicBlock>(Val: HeaderVPB), |
485 | LatchVPBB: cast<VPBasicBlock>(Val: LatchVPB), IdxTy: InductionTy, DL: IVDL); |
486 | |
487 | [[maybe_unused]] bool HandledUncountableEarlyExit = false; |
488 | // Disconnect all early exits from the loop leaving it with a single exit from |
489 | // the latch. Early exits that are countable are left for a scalar epilog. The |
490 | // condition of uncountable early exits (currently at most one is supported) |
491 | // is fused into the latch exit, and used to branch from middle block to the |
492 | // early exit destination. |
493 | for (VPIRBasicBlock *EB : Plan.getExitBlocks()) { |
494 | for (VPBlockBase *Pred : to_vector(Range&: EB->getPredecessors())) { |
495 | if (Pred == MiddleVPBB) |
496 | continue; |
497 | if (HasUncountableEarlyExit) { |
498 | assert(!HandledUncountableEarlyExit && |
499 | "can handle exactly one uncountable early exit" ); |
500 | handleUncountableEarlyExit(EarlyExitingVPBB: cast<VPBasicBlock>(Val: Pred), EarlyExitVPBB: EB, Plan, |
501 | HeaderVPBB: cast<VPBasicBlock>(Val: HeaderVPB), |
502 | LatchVPBB: cast<VPBasicBlock>(Val: LatchVPB), Range); |
503 | HandledUncountableEarlyExit = true; |
504 | } else { |
505 | for (VPRecipeBase &R : EB->phis()) |
506 | cast<VPIRPhi>(Val: &R)->removeIncomingValueFor(IncomingBlock: Pred); |
507 | } |
508 | cast<VPBasicBlock>(Val: Pred)->getTerminator()->eraseFromParent(); |
509 | VPBlockUtils::disconnectBlocks(From: Pred, To: EB); |
510 | } |
511 | } |
512 | |
513 | assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) && |
514 | "missed an uncountable exit that must be handled" ); |
515 | |
516 | // Create SCEV and VPValue for the trip count. |
517 | // We use the symbolic max backedge-taken-count, which works also when |
518 | // vectorizing loops with uncountable early exits. |
519 | const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); |
520 | assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) && |
521 | "Invalid loop count" ); |
522 | ScalarEvolution &SE = *PSE.getSE(); |
523 | const SCEV *TripCount = SE.getTripCountFromExitCount(ExitCount: BackedgeTakenCountSCEV, |
524 | EvalTy: InductionTy, L: TheLoop); |
525 | Plan.setTripCount( |
526 | vputils::getOrCreateVPValueForSCEVExpr(Plan, Expr: TripCount, SE)); |
527 | |
528 | VPBasicBlock *ScalarPH = Plan.createVPBasicBlock(Name: "scalar.ph" ); |
529 | VPBlockUtils::connectBlocks(From: ScalarPH, To: Plan.getScalarHeader()); |
530 | |
531 | // The connection order corresponds to the operands of the conditional branch, |
532 | // with the middle block already connected to the exit block. |
533 | VPBlockUtils::connectBlocks(From: MiddleVPBB, To: ScalarPH); |
534 | // Also connect the entry block to the scalar preheader. |
535 | // TODO: Also introduce a branch recipe together with the minimum trip count |
536 | // check. |
537 | VPBlockUtils::connectBlocks(From: Plan.getEntry(), To: ScalarPH); |
538 | Plan.getEntry()->swapSuccessors(); |
539 | |
540 | // If MiddleVPBB has a single successor then the original loop does not exit |
541 | // via the latch and the single successor must be the scalar preheader. |
542 | // There's no need to add a runtime check to MiddleVPBB. |
543 | if (MiddleVPBB->getNumSuccessors() == 1) { |
544 | assert(MiddleVPBB->getSingleSuccessor() == ScalarPH && |
545 | "must have ScalarPH as single successor" ); |
546 | return; |
547 | } |
548 | |
549 | assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors" ); |
550 | |
551 | // Add a check in the middle block to see if we have completed all of the |
552 | // iterations in the first vector loop. |
553 | // |
554 | // Three cases: |
555 | // 1) If we require a scalar epilogue, the scalar ph must execute. Set the |
556 | // condition to false. |
557 | // 2) If (N - N%VF) == N, then we *don't* need to run the |
558 | // remainder. Thus if tail is to be folded, we know we don't need to run |
559 | // the remainder and we can set the condition to true. |
560 | // 3) Otherwise, construct a runtime check. |
561 | |
562 | // We use the same DebugLoc as the scalar loop latch terminator instead of |
563 | // the corresponding compare because they may have ended up with different |
564 | // line numbers and we want to avoid awkward line stepping while debugging. |
565 | // E.g., if the compare has got a line number inside the loop. |
566 | DebugLoc LatchDL = TheLoop->getLoopLatch()->getTerminator()->getDebugLoc(); |
567 | VPBuilder Builder(MiddleVPBB); |
568 | VPValue *Cmp; |
569 | if (!RequiresScalarEpilogueCheck) |
570 | Cmp = Plan.getOrAddLiveIn(V: ConstantInt::getFalse( |
571 | Ty: IntegerType::getInt1Ty(C&: TripCount->getType()->getContext()))); |
572 | else if (TailFolded) |
573 | Cmp = Plan.getOrAddLiveIn(V: ConstantInt::getTrue( |
574 | Ty: IntegerType::getInt1Ty(C&: TripCount->getType()->getContext()))); |
575 | else |
576 | Cmp = Builder.createICmp(Pred: CmpInst::ICMP_EQ, A: Plan.getTripCount(), |
577 | B: &Plan.getVectorTripCount(), DL: LatchDL, Name: "cmp.n" ); |
578 | Builder.createNaryOp(Opcode: VPInstruction::BranchOnCond, Operands: {Cmp}, DL: LatchDL); |
579 | } |
580 | |
581 | void VPlanTransforms::createLoopRegions(VPlan &Plan) { |
582 | VPDominatorTree VPDT; |
583 | VPDT.recalculate(Func&: Plan); |
584 | for (VPBlockBase * : vp_post_order_shallow(G: Plan.getEntry())) |
585 | if (canonicalHeaderAndLatch(HeaderVPB, VPDT)) |
586 | createLoopRegion(Plan, HeaderVPB); |
587 | |
588 | VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); |
589 | TopRegion->setName("vector loop" ); |
590 | TopRegion->getEntryBasicBlock()->setName("vector.body" ); |
591 | } |
592 | |