1 | //===- LoopNestAnalysis.cpp - Loop Nest Analysis --------------------------==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// The implementation for the loop nest analysis. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Analysis/LoopNestAnalysis.h" |
15 | #include "llvm/ADT/BreadthFirstIterator.h" |
16 | #include "llvm/ADT/DepthFirstIterator.h" |
17 | #include "llvm/Analysis/ValueTracking.h" |
18 | |
19 | using namespace llvm; |
20 | |
21 | #define DEBUG_TYPE "loopnest" |
22 | #ifndef NDEBUG |
23 | static const char *VerboseDebug = DEBUG_TYPE "-verbose" ; |
24 | #endif |
25 | |
26 | /// Determine whether the loops structure violates basic requirements for |
27 | /// perfect nesting: |
28 | /// - the inner loop should be the outer loop's only child |
29 | /// - the outer loop header should 'flow' into the inner loop preheader |
30 | /// or jump around the inner loop to the outer loop latch |
31 | /// - if the inner loop latch exits the inner loop, it should 'flow' into |
32 | /// the outer loop latch. |
33 | /// Returns true if the loop structure satisfies the basic requirements and |
34 | /// false otherwise. |
35 | static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, |
36 | ScalarEvolution &SE); |
37 | |
38 | //===----------------------------------------------------------------------===// |
39 | // LoopNest implementation |
40 | // |
41 | |
42 | LoopNest::LoopNest(Loop &Root, ScalarEvolution &SE) |
43 | : MaxPerfectDepth(getMaxPerfectDepth(Root, SE)) { |
44 | append_range(C&: Loops, R: breadth_first(G: &Root)); |
45 | } |
46 | |
47 | std::unique_ptr<LoopNest> LoopNest::getLoopNest(Loop &Root, |
48 | ScalarEvolution &SE) { |
49 | return std::make_unique<LoopNest>(args&: Root, args&: SE); |
50 | } |
51 | |
52 | static CmpInst *getOuterLoopLatchCmp(const Loop &OuterLoop) { |
53 | |
54 | const BasicBlock *Latch = OuterLoop.getLoopLatch(); |
55 | assert(Latch && "Expecting a valid loop latch" ); |
56 | |
57 | const BranchInst *BI = dyn_cast<BranchInst>(Val: Latch->getTerminator()); |
58 | assert(BI && BI->isConditional() && |
59 | "Expecting loop latch terminator to be a branch instruction" ); |
60 | |
61 | CmpInst *OuterLoopLatchCmp = dyn_cast<CmpInst>(Val: BI->getCondition()); |
62 | DEBUG_WITH_TYPE( |
63 | VerboseDebug, if (OuterLoopLatchCmp) { |
64 | dbgs() << "Outer loop latch compare instruction: " << *OuterLoopLatchCmp |
65 | << "\n" ; |
66 | }); |
67 | return OuterLoopLatchCmp; |
68 | } |
69 | |
70 | static CmpInst *getInnerLoopGuardCmp(const Loop &InnerLoop) { |
71 | |
72 | BranchInst *InnerGuard = InnerLoop.getLoopGuardBranch(); |
73 | CmpInst *InnerLoopGuardCmp = |
74 | (InnerGuard) ? dyn_cast<CmpInst>(Val: InnerGuard->getCondition()) : nullptr; |
75 | |
76 | DEBUG_WITH_TYPE( |
77 | VerboseDebug, if (InnerLoopGuardCmp) { |
78 | dbgs() << "Inner loop guard compare instruction: " << *InnerLoopGuardCmp |
79 | << "\n" ; |
80 | }); |
81 | return InnerLoopGuardCmp; |
82 | } |
83 | |
84 | static bool checkSafeInstruction(const Instruction &I, |
85 | const CmpInst *InnerLoopGuardCmp, |
86 | const CmpInst *OuterLoopLatchCmp, |
87 | std::optional<Loop::LoopBounds> OuterLoopLB) { |
88 | |
89 | bool IsAllowed = |
90 | isSafeToSpeculativelyExecute(I: &I) || isa<PHINode>(Val: I) || isa<BranchInst>(Val: I); |
91 | if (!IsAllowed) |
92 | return false; |
93 | // The only binary instruction allowed is the outer loop step instruction, |
94 | // the only comparison instructions allowed are the inner loop guard |
95 | // compare instruction and the outer loop latch compare instruction. |
96 | if ((isa<BinaryOperator>(Val: I) && &I != &OuterLoopLB->getStepInst()) || |
97 | (isa<CmpInst>(Val: I) && &I != OuterLoopLatchCmp && &I != InnerLoopGuardCmp)) { |
98 | return false; |
99 | } |
100 | return true; |
101 | } |
102 | |
103 | bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop, |
104 | ScalarEvolution &SE) { |
105 | return (analyzeLoopNestForPerfectNest(OuterLoop, InnerLoop, SE) == |
106 | PerfectLoopNest); |
107 | } |
108 | |
109 | LoopNest::LoopNestEnum LoopNest::analyzeLoopNestForPerfectNest( |
110 | const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) { |
111 | |
112 | assert(!OuterLoop.isInnermost() && "Outer loop should have subloops" ); |
113 | assert(!InnerLoop.isOutermost() && "Inner loop should have a parent" ); |
114 | LLVM_DEBUG(dbgs() << "Checking whether loop '" << OuterLoop.getName() |
115 | << "' and '" << InnerLoop.getName() |
116 | << "' are perfectly nested.\n" ); |
117 | |
118 | // Determine whether the loops structure satisfies the following requirements: |
119 | // - the inner loop should be the outer loop's only child |
120 | // - the outer loop header should 'flow' into the inner loop preheader |
121 | // or jump around the inner loop to the outer loop latch |
122 | // - if the inner loop latch exits the inner loop, it should 'flow' into |
123 | // the outer loop latch. |
124 | if (!checkLoopsStructure(OuterLoop, InnerLoop, SE)) { |
125 | LLVM_DEBUG(dbgs() << "Not perfectly nested: invalid loop structure.\n" ); |
126 | return InvalidLoopStructure; |
127 | } |
128 | |
129 | // Bail out if we cannot retrieve the outer loop bounds. |
130 | auto OuterLoopLB = OuterLoop.getBounds(SE); |
131 | if (OuterLoopLB == std::nullopt) { |
132 | LLVM_DEBUG(dbgs() << "Cannot compute loop bounds of OuterLoop: " |
133 | << OuterLoop << "\n" ;); |
134 | return OuterLoopLowerBoundUnknown; |
135 | } |
136 | |
137 | CmpInst *OuterLoopLatchCmp = getOuterLoopLatchCmp(OuterLoop); |
138 | CmpInst *InnerLoopGuardCmp = getInnerLoopGuardCmp(InnerLoop); |
139 | |
140 | // Determine whether instructions in a basic block are one of: |
141 | // - the inner loop guard comparison |
142 | // - the outer loop latch comparison |
143 | // - the outer loop induction variable increment |
144 | // - a phi node, a cast or a branch |
145 | auto containsOnlySafeInstructions = [&](const BasicBlock &BB) { |
146 | return llvm::all_of(Range: BB, P: [&](const Instruction &I) { |
147 | bool IsSafeInstr = checkSafeInstruction(I, InnerLoopGuardCmp, |
148 | OuterLoopLatchCmp, OuterLoopLB); |
149 | if (IsSafeInstr) { |
150 | DEBUG_WITH_TYPE(VerboseDebug, { |
151 | dbgs() << "Instruction: " << I << "\nin basic block:" << BB |
152 | << "is unsafe.\n" ; |
153 | }); |
154 | } |
155 | return IsSafeInstr; |
156 | }); |
157 | }; |
158 | |
159 | // Check the code surrounding the inner loop for instructions that are deemed |
160 | // unsafe. |
161 | const BasicBlock * = OuterLoop.getHeader(); |
162 | const BasicBlock *OuterLoopLatch = OuterLoop.getLoopLatch(); |
163 | const BasicBlock * = InnerLoop.getLoopPreheader(); |
164 | |
165 | if (!containsOnlySafeInstructions(*OuterLoopHeader) || |
166 | !containsOnlySafeInstructions(*OuterLoopLatch) || |
167 | (InnerLoopPreHeader != OuterLoopHeader && |
168 | !containsOnlySafeInstructions(*InnerLoopPreHeader)) || |
169 | !containsOnlySafeInstructions(*InnerLoop.getExitBlock())) { |
170 | LLVM_DEBUG(dbgs() << "Not perfectly nested: code surrounding inner loop is " |
171 | "unsafe\n" ;); |
172 | return ImperfectLoopNest; |
173 | } |
174 | |
175 | LLVM_DEBUG(dbgs() << "Loop '" << OuterLoop.getName() << "' and '" |
176 | << InnerLoop.getName() << "' are perfectly nested.\n" ); |
177 | |
178 | return PerfectLoopNest; |
179 | } |
180 | |
181 | LoopNest::InstrVectorTy LoopNest::getInterveningInstructions( |
182 | const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) { |
183 | InstrVectorTy Instr; |
184 | switch (analyzeLoopNestForPerfectNest(OuterLoop, InnerLoop, SE)) { |
185 | case PerfectLoopNest: |
186 | LLVM_DEBUG(dbgs() << "The loop Nest is Perfect, returning empty " |
187 | "instruction vector. \n" ;); |
188 | return Instr; |
189 | |
190 | case InvalidLoopStructure: |
191 | LLVM_DEBUG(dbgs() << "Not perfectly nested: invalid loop structure. " |
192 | "Instruction vector is empty.\n" ;); |
193 | return Instr; |
194 | |
195 | case OuterLoopLowerBoundUnknown: |
196 | LLVM_DEBUG(dbgs() << "Cannot compute loop bounds of OuterLoop: " |
197 | << OuterLoop << "\nInstruction vector is empty.\n" ;); |
198 | return Instr; |
199 | |
200 | case ImperfectLoopNest: |
201 | break; |
202 | } |
203 | |
204 | // Identify the outer loop latch comparison instruction. |
205 | auto OuterLoopLB = OuterLoop.getBounds(SE); |
206 | |
207 | CmpInst *OuterLoopLatchCmp = getOuterLoopLatchCmp(OuterLoop); |
208 | CmpInst *InnerLoopGuardCmp = getInnerLoopGuardCmp(InnerLoop); |
209 | |
210 | auto GetUnsafeInstructions = [&](const BasicBlock &BB) { |
211 | for (const Instruction &I : BB) { |
212 | if (!checkSafeInstruction(I, InnerLoopGuardCmp, OuterLoopLatchCmp, |
213 | OuterLoopLB)) { |
214 | Instr.push_back(Elt: &I); |
215 | DEBUG_WITH_TYPE(VerboseDebug, { |
216 | dbgs() << "Instruction: " << I << "\nin basic block:" << BB |
217 | << "is unsafe.\n" ; |
218 | }); |
219 | } |
220 | } |
221 | }; |
222 | |
223 | // Check the code surrounding the inner loop for instructions that are deemed |
224 | // unsafe. |
225 | const BasicBlock * = OuterLoop.getHeader(); |
226 | const BasicBlock *OuterLoopLatch = OuterLoop.getLoopLatch(); |
227 | const BasicBlock * = InnerLoop.getLoopPreheader(); |
228 | const BasicBlock *InnerLoopExitBlock = InnerLoop.getExitBlock(); |
229 | |
230 | GetUnsafeInstructions(*OuterLoopHeader); |
231 | GetUnsafeInstructions(*OuterLoopLatch); |
232 | GetUnsafeInstructions(*InnerLoopExitBlock); |
233 | |
234 | if (InnerLoopPreHeader != OuterLoopHeader) { |
235 | GetUnsafeInstructions(*InnerLoopPreHeader); |
236 | } |
237 | return Instr; |
238 | } |
239 | |
240 | SmallVector<LoopVectorTy, 4> |
241 | LoopNest::getPerfectLoops(ScalarEvolution &SE) const { |
242 | SmallVector<LoopVectorTy, 4> LV; |
243 | LoopVectorTy PerfectNest; |
244 | |
245 | for (Loop *L : depth_first(G: const_cast<Loop *>(Loops.front()))) { |
246 | if (PerfectNest.empty()) |
247 | PerfectNest.push_back(Elt: L); |
248 | |
249 | auto &SubLoops = L->getSubLoops(); |
250 | if (SubLoops.size() == 1 && arePerfectlyNested(OuterLoop: *L, InnerLoop: *SubLoops.front(), SE)) { |
251 | PerfectNest.push_back(Elt: SubLoops.front()); |
252 | } else { |
253 | LV.push_back(Elt: PerfectNest); |
254 | PerfectNest.clear(); |
255 | } |
256 | } |
257 | |
258 | return LV; |
259 | } |
260 | |
261 | unsigned LoopNest::getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE) { |
262 | LLVM_DEBUG(dbgs() << "Get maximum perfect depth of loop nest rooted by loop '" |
263 | << Root.getName() << "'\n" ); |
264 | |
265 | const Loop *CurrentLoop = &Root; |
266 | const auto *SubLoops = &CurrentLoop->getSubLoops(); |
267 | unsigned CurrentDepth = 1; |
268 | |
269 | while (SubLoops->size() == 1) { |
270 | const Loop *InnerLoop = SubLoops->front(); |
271 | if (!arePerfectlyNested(OuterLoop: *CurrentLoop, InnerLoop: *InnerLoop, SE)) { |
272 | LLVM_DEBUG({ |
273 | dbgs() << "Not a perfect nest: loop '" << CurrentLoop->getName() |
274 | << "' is not perfectly nested with loop '" |
275 | << InnerLoop->getName() << "'\n" ; |
276 | }); |
277 | break; |
278 | } |
279 | |
280 | CurrentLoop = InnerLoop; |
281 | SubLoops = &CurrentLoop->getSubLoops(); |
282 | ++CurrentDepth; |
283 | } |
284 | |
285 | return CurrentDepth; |
286 | } |
287 | |
288 | const BasicBlock &LoopNest::skipEmptyBlockUntil(const BasicBlock *From, |
289 | const BasicBlock *End, |
290 | bool CheckUniquePred) { |
291 | assert(From && "Expecting valid From" ); |
292 | assert(End && "Expecting valid End" ); |
293 | |
294 | if (From == End || !From->getUniqueSuccessor()) |
295 | return *From; |
296 | |
297 | auto IsEmpty = [](const BasicBlock *BB) { |
298 | return (BB->size() == 1); |
299 | }; |
300 | |
301 | // Visited is used to avoid running into an infinite loop. |
302 | SmallPtrSet<const BasicBlock *, 4> Visited; |
303 | const BasicBlock *BB = From->getUniqueSuccessor(); |
304 | const BasicBlock *PredBB = From; |
305 | while (BB && BB != End && IsEmpty(BB) && !Visited.count(Ptr: BB) && |
306 | (!CheckUniquePred || BB->getUniquePredecessor())) { |
307 | Visited.insert(Ptr: BB); |
308 | PredBB = BB; |
309 | BB = BB->getUniqueSuccessor(); |
310 | } |
311 | |
312 | return (BB == End) ? *End : *PredBB; |
313 | } |
314 | |
315 | static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, |
316 | ScalarEvolution &SE) { |
317 | // The inner loop must be the only outer loop's child. |
318 | if ((OuterLoop.getSubLoops().size() != 1) || |
319 | (InnerLoop.getParentLoop() != &OuterLoop)) |
320 | return false; |
321 | |
322 | // We expect loops in normal form which have a preheader, header, latch... |
323 | if (!OuterLoop.isLoopSimplifyForm() || !InnerLoop.isLoopSimplifyForm()) |
324 | return false; |
325 | |
326 | const BasicBlock * = OuterLoop.getHeader(); |
327 | const BasicBlock *OuterLoopLatch = OuterLoop.getLoopLatch(); |
328 | const BasicBlock * = InnerLoop.getLoopPreheader(); |
329 | const BasicBlock *InnerLoopLatch = InnerLoop.getLoopLatch(); |
330 | const BasicBlock *InnerLoopExit = InnerLoop.getExitBlock(); |
331 | |
332 | // We expect rotated loops. The inner loop should have a single exit block. |
333 | if (OuterLoop.getExitingBlock() != OuterLoopLatch || |
334 | InnerLoop.getExitingBlock() != InnerLoopLatch || !InnerLoopExit) |
335 | return false; |
336 | |
337 | // Returns whether the block `ExitBlock` contains at least one LCSSA Phi node. |
338 | auto ContainsLCSSAPhi = [](const BasicBlock &ExitBlock) { |
339 | return any_of(Range: ExitBlock.phis(), P: [](const PHINode &PN) { |
340 | return PN.getNumIncomingValues() == 1; |
341 | }); |
342 | }; |
343 | |
344 | // Returns whether the block `BB` qualifies for being an extra Phi block. The |
345 | // extra Phi block is the additional block inserted after the exit block of an |
346 | // "guarded" inner loop which contains "only" Phi nodes corresponding to the |
347 | // LCSSA Phi nodes in the exit block. |
348 | auto = [&](const BasicBlock &BB) { |
349 | return BB.getFirstNonPHI() == BB.getTerminator() && |
350 | all_of(Range: BB.phis(), P: [&](const PHINode &PN) { |
351 | return all_of(Range: PN.blocks(), P: [&](const BasicBlock *IncomingBlock) { |
352 | return IncomingBlock == InnerLoopExit || |
353 | IncomingBlock == OuterLoopHeader; |
354 | }); |
355 | }); |
356 | }; |
357 | |
358 | const BasicBlock * = nullptr; |
359 | // Ensure the only branch that may exist between the loops is the inner loop |
360 | // guard. |
361 | if (OuterLoopHeader != InnerLoopPreHeader) { |
362 | const BasicBlock &SingleSucc = |
363 | LoopNest::skipEmptyBlockUntil(From: OuterLoopHeader, End: InnerLoopPreHeader); |
364 | |
365 | // no conditional branch present |
366 | if (&SingleSucc != InnerLoopPreHeader) { |
367 | const BranchInst *BI = dyn_cast<BranchInst>(Val: SingleSucc.getTerminator()); |
368 | |
369 | if (!BI || BI != InnerLoop.getLoopGuardBranch()) |
370 | return false; |
371 | |
372 | bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit); |
373 | |
374 | // The successors of the inner loop guard should be the inner loop |
375 | // preheader or the outer loop latch possibly through empty blocks. |
376 | for (const BasicBlock *Succ : BI->successors()) { |
377 | const BasicBlock * = Succ; |
378 | const BasicBlock *PotentialOuterLatch = Succ; |
379 | |
380 | // Ensure the inner loop guard successor is empty before skipping |
381 | // blocks. |
382 | if (Succ->size() == 1) { |
383 | PotentialInnerPreHeader = |
384 | &LoopNest::skipEmptyBlockUntil(From: Succ, End: InnerLoopPreHeader); |
385 | PotentialOuterLatch = |
386 | &LoopNest::skipEmptyBlockUntil(From: Succ, End: OuterLoopLatch); |
387 | } |
388 | |
389 | if (PotentialInnerPreHeader == InnerLoopPreHeader) |
390 | continue; |
391 | if (PotentialOuterLatch == OuterLoopLatch) |
392 | continue; |
393 | |
394 | // If `InnerLoopExit` contains LCSSA Phi instructions, additional block |
395 | // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The |
396 | // loops are still considered perfectly nested if the extra block only |
397 | // contains Phi instructions from InnerLoopExit and OuterLoopHeader. |
398 | if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) && |
399 | Succ->getSingleSuccessor() == OuterLoopLatch) { |
400 | // Points to the extra block so that we can reference it later in the |
401 | // final check. We can also conclude that the inner loop is |
402 | // guarded and there exists LCSSA Phi node in the exit block later if |
403 | // we see a non-null `ExtraPhiBlock`. |
404 | ExtraPhiBlock = Succ; |
405 | continue; |
406 | } |
407 | |
408 | DEBUG_WITH_TYPE(VerboseDebug, { |
409 | dbgs() << "Inner loop guard successor " << Succ->getName() |
410 | << " doesn't lead to inner loop preheader or " |
411 | "outer loop latch.\n" ; |
412 | }); |
413 | return false; |
414 | } |
415 | } |
416 | } |
417 | |
418 | // Ensure the inner loop exit block lead to the outer loop latch possibly |
419 | // through empty blocks. |
420 | if ((!ExtraPhiBlock || |
421 | &LoopNest::skipEmptyBlockUntil(From: InnerLoop.getExitBlock(), |
422 | End: ExtraPhiBlock) != ExtraPhiBlock) && |
423 | (&LoopNest::skipEmptyBlockUntil(From: InnerLoop.getExitBlock(), |
424 | End: OuterLoopLatch) != OuterLoopLatch)) { |
425 | DEBUG_WITH_TYPE( |
426 | VerboseDebug, |
427 | dbgs() << "Inner loop exit block " << *InnerLoopExit |
428 | << " does not directly lead to the outer loop latch.\n" ;); |
429 | return false; |
430 | } |
431 | |
432 | return true; |
433 | } |
434 | |
435 | AnalysisKey LoopNestAnalysis::Key; |
436 | |
437 | raw_ostream &llvm::operator<<(raw_ostream &OS, const LoopNest &LN) { |
438 | OS << "IsPerfect=" ; |
439 | if (LN.getMaxPerfectDepth() == LN.getNestDepth()) |
440 | OS << "true" ; |
441 | else |
442 | OS << "false" ; |
443 | OS << ", Depth=" << LN.getNestDepth(); |
444 | OS << ", OutermostLoop: " << LN.getOutermostLoop().getName(); |
445 | OS << ", Loops: ( " ; |
446 | for (const Loop *L : LN.getLoops()) |
447 | OS << L->getName() << " " ; |
448 | OS << ")" ; |
449 | |
450 | return OS; |
451 | } |
452 | |
453 | //===----------------------------------------------------------------------===// |
454 | // LoopNestPrinterPass implementation |
455 | // |
456 | |
457 | PreservedAnalyses LoopNestPrinterPass::run(Loop &L, LoopAnalysisManager &AM, |
458 | LoopStandardAnalysisResults &AR, |
459 | LPMUpdater &U) { |
460 | if (auto LN = LoopNest::getLoopNest(Root&: L, SE&: AR.SE)) |
461 | OS << *LN << "\n" ; |
462 | |
463 | return PreservedAnalyses::all(); |
464 | } |
465 | |