1//===- LoopVersioningLICM.cpp - LICM Loop Versioning ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// When alias analysis is uncertain about the aliasing between any two accesses,
10// it will return MayAlias. This uncertainty from alias analysis restricts LICM
11// from proceeding further. In cases where alias analysis is uncertain we might
12// use loop versioning as an alternative.
13//
14// Loop Versioning will create a version of the loop with aggressive aliasing
15// assumptions in addition to the original with conservative (default) aliasing
16// assumptions. The version of the loop making aggressive aliasing assumptions
17// will have all the memory accesses marked as no-alias. These two versions of
18// loop will be preceded by a memory runtime check. This runtime check consists
19// of bound checks for all unique memory accessed in loop, and it ensures the
20// lack of memory aliasing. The result of the runtime check determines which of
21// the loop versions is executed: If the runtime check detects any memory
22// aliasing, then the original loop is executed. Otherwise, the version with
23// aggressive aliasing assumptions is used.
24//
25// Following are the top level steps:
26//
27// a) Perform LoopVersioningLICM's feasibility check.
28// b) If loop is a candidate for versioning then create a memory bound check,
29// by considering all the memory accesses in loop body.
30// c) Clone original loop and set all memory accesses as no-alias in new loop.
31// d) Set original loop & versioned loop as a branch target of the runtime check
32// result.
33//
34// It transforms loop as shown below:
35//
36// +----------------+
37// |Runtime Memcheck|
38// +----------------+
39// |
40// +----------+----------------+----------+
41// | |
42// +---------+----------+ +-----------+----------+
43// |Orig Loop Preheader | |Cloned Loop Preheader |
44// +--------------------+ +----------------------+
45// | |
46// +--------------------+ +----------------------+
47// |Orig Loop Body | |Cloned Loop Body |
48// +--------------------+ +----------------------+
49// | |
50// +--------------------+ +----------------------+
51// |Orig Loop Exit Block| |Cloned Loop Exit Block|
52// +--------------------+ +-----------+----------+
53// | |
54// +----------+--------------+-----------+
55// |
56// +-----+----+
57// |Join Block|
58// +----------+
59//
60//===----------------------------------------------------------------------===//
61
62#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
63#include "llvm/ADT/SmallVector.h"
64#include "llvm/ADT/StringRef.h"
65#include "llvm/Analysis/AliasAnalysis.h"
66#include "llvm/Analysis/AliasSetTracker.h"
67#include "llvm/Analysis/GlobalsModRef.h"
68#include "llvm/Analysis/LoopAccessAnalysis.h"
69#include "llvm/Analysis/LoopInfo.h"
70#include "llvm/Analysis/LoopPass.h"
71#include "llvm/Analysis/OptimizationRemarkEmitter.h"
72#include "llvm/Analysis/ScalarEvolution.h"
73#include "llvm/IR/Dominators.h"
74#include "llvm/IR/Instruction.h"
75#include "llvm/IR/Instructions.h"
76#include "llvm/IR/LLVMContext.h"
77#include "llvm/IR/MDBuilder.h"
78#include "llvm/IR/Metadata.h"
79#include "llvm/IR/Value.h"
80#include "llvm/Support/Casting.h"
81#include "llvm/Support/CommandLine.h"
82#include "llvm/Support/Debug.h"
83#include "llvm/Support/raw_ostream.h"
84#include "llvm/Transforms/Utils/LoopUtils.h"
85#include "llvm/Transforms/Utils/LoopVersioning.h"
86#include <cassert>
87
88using namespace llvm;
89
90#define DEBUG_TYPE "loop-versioning-licm"
91
92static const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable";
93
94/// Threshold minimum allowed percentage for possible
95/// invariant instructions in a loop.
96static cl::opt<float>
97 LVInvarThreshold("licm-versioning-invariant-threshold",
98 cl::desc("LoopVersioningLICM's minimum allowed percentage "
99 "of possible invariant instructions per loop"),
100 cl::init(Val: 25), cl::Hidden);
101
102/// Threshold for maximum allowed loop nest/depth
103static cl::opt<unsigned> LVLoopDepthThreshold(
104 "licm-versioning-max-depth-threshold",
105 cl::desc(
106 "LoopVersioningLICM's threshold for maximum allowed loop nest/depth"),
107 cl::init(Val: 2), cl::Hidden);
108
109namespace {
110
111struct LoopVersioningLICM {
112 // We don't explicitly pass in LoopAccessInfo to the constructor since the
113 // loop versioning might return early due to instructions that are not safe
114 // for versioning. By passing the proxy instead the construction of
115 // LoopAccessInfo will take place only when it's necessary.
116 LoopVersioningLICM(AliasAnalysis *AA, ScalarEvolution *SE,
117 OptimizationRemarkEmitter *ORE,
118 LoopAccessInfoManager &LAIs, LoopInfo &LI,
119 Loop *CurLoop)
120 : AA(AA), SE(SE), LAIs(LAIs), LI(LI), CurLoop(CurLoop),
121 LoopDepthThreshold(LVLoopDepthThreshold),
122 InvariantThreshold(LVInvarThreshold), ORE(ORE) {}
123
124 bool run(DominatorTree *DT);
125
126private:
127 // Current AliasAnalysis information
128 AliasAnalysis *AA;
129
130 // Current ScalarEvolution
131 ScalarEvolution *SE;
132
133 // Current Loop's LoopAccessInfo
134 const LoopAccessInfo *LAI = nullptr;
135
136 // Proxy for retrieving LoopAccessInfo.
137 LoopAccessInfoManager &LAIs;
138
139 LoopInfo &LI;
140
141 // The current loop we are working on.
142 Loop *CurLoop;
143
144 // Maximum loop nest threshold
145 unsigned LoopDepthThreshold;
146
147 // Minimum invariant threshold
148 float InvariantThreshold;
149
150 // Counter to track num of load & store
151 unsigned LoadAndStoreCounter = 0;
152
153 // Counter to track num of invariant
154 unsigned InvariantCounter = 0;
155
156 // Read only loop marker.
157 bool IsReadOnlyLoop = true;
158
159 // OptimizationRemarkEmitter
160 OptimizationRemarkEmitter *ORE;
161
162 bool isLegalForVersioning();
163 bool legalLoopStructure();
164 bool legalLoopInstructions();
165 bool legalLoopMemoryAccesses();
166 bool isLoopAlreadyVisited();
167 bool instructionSafeForVersioning(Instruction *I);
168};
169
170} // end anonymous namespace
171
172/// Check loop structure and confirms it's good for LoopVersioningLICM.
173bool LoopVersioningLICM::legalLoopStructure() {
174 // Loop must be in loop simplify form.
175 if (!CurLoop->isLoopSimplifyForm()) {
176 LLVM_DEBUG(dbgs() << " loop is not in loop-simplify form.\n");
177 return false;
178 }
179 // Loop should be innermost loop, if not return false.
180 if (!CurLoop->getSubLoops().empty()) {
181 LLVM_DEBUG(dbgs() << " loop is not innermost\n");
182 return false;
183 }
184 // Loop should have a single backedge, if not return false.
185 if (CurLoop->getNumBackEdges() != 1) {
186 LLVM_DEBUG(dbgs() << " loop has multiple backedges\n");
187 return false;
188 }
189 // Loop must have a single exiting block, if not return false.
190 if (!CurLoop->getExitingBlock()) {
191 LLVM_DEBUG(dbgs() << " loop has multiple exiting block\n");
192 return false;
193 }
194 // We only handle bottom-tested loop, i.e. loop in which the condition is
195 // checked at the end of each iteration. With that we can assume that all
196 // instructions in the loop are executed the same number of times.
197 if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) {
198 LLVM_DEBUG(dbgs() << " loop is not bottom tested\n");
199 return false;
200 }
201 // Parallel loops must not have aliasing loop-invariant memory accesses.
202 // Hence we don't need to version anything in this case.
203 if (CurLoop->isAnnotatedParallel()) {
204 LLVM_DEBUG(dbgs() << " Parallel loop is not worth versioning\n");
205 return false;
206 }
207 // Loop depth more then LoopDepthThreshold are not allowed
208 if (CurLoop->getLoopDepth() > LoopDepthThreshold) {
209 LLVM_DEBUG(dbgs() << " loop depth is more than threshold\n");
210 return false;
211 }
212 // We need to be able to compute the loop trip count in order
213 // to generate the bound checks.
214 const SCEV *ExitCount = SE->getBackedgeTakenCount(L: CurLoop);
215 if (isa<SCEVCouldNotCompute>(Val: ExitCount)) {
216 LLVM_DEBUG(dbgs() << " loop does not have trip count\n");
217 return false;
218 }
219 return true;
220}
221
222/// Check memory accesses in loop and confirms it's good for
223/// LoopVersioningLICM.
224bool LoopVersioningLICM::legalLoopMemoryAccesses() {
225 // Loop over the body of this loop, construct AST.
226 BatchAAResults BAA(*AA);
227 AliasSetTracker AST(BAA);
228 for (auto *Block : CurLoop->getBlocks()) {
229 // Ignore blocks in subloops.
230 if (LI.getLoopFor(BB: Block) == CurLoop)
231 AST.add(BB&: *Block);
232 }
233
234 // Memory check:
235 // Transform phase will generate a versioned loop and also a runtime check to
236 // ensure the pointers are independent and they don’t alias.
237 // In version variant of loop, alias meta data asserts that all access are
238 // mutually independent.
239 //
240 // Pointers aliasing in alias domain are avoided because with multiple
241 // aliasing domains we may not be able to hoist potential loop invariant
242 // access out of the loop.
243 //
244 // Iterate over alias tracker sets, and confirm AliasSets doesn't have any
245 // must alias set.
246 bool HasMayAlias = false;
247 bool TypeSafety = false;
248 bool HasMod = false;
249 for (const auto &I : AST) {
250 const AliasSet &AS = I;
251 // Skip Forward Alias Sets, as this should be ignored as part of
252 // the AliasSetTracker object.
253 if (AS.isForwardingAliasSet())
254 continue;
255 // With MustAlias its not worth adding runtime bound check.
256 if (AS.isMustAlias())
257 return false;
258 const Value *SomePtr = AS.begin()->Ptr;
259 bool TypeCheck = true;
260 // Check for Mod & MayAlias
261 HasMayAlias |= AS.isMayAlias();
262 HasMod |= AS.isMod();
263 for (const auto &MemLoc : AS) {
264 const Value *Ptr = MemLoc.Ptr;
265 // Alias tracker should have pointers of same data type.
266 //
267 // FIXME: check no longer effective since opaque pointers?
268 // If the intent is to check that the memory accesses use the
269 // same data type (such that LICM can promote them), then we
270 // can no longer see this from the pointer value types.
271 TypeCheck = (TypeCheck && (SomePtr->getType() == Ptr->getType()));
272 }
273 // At least one alias tracker should have pointers of same data type.
274 TypeSafety |= TypeCheck;
275 }
276 // Ensure types should be of same type.
277 if (!TypeSafety) {
278 LLVM_DEBUG(dbgs() << " Alias tracker type safety failed!\n");
279 return false;
280 }
281 // Ensure loop body shouldn't be read only.
282 if (!HasMod) {
283 LLVM_DEBUG(dbgs() << " No memory modified in loop body\n");
284 return false;
285 }
286 // Make sure alias set has may alias case.
287 // If there no alias memory ambiguity, return false.
288 if (!HasMayAlias) {
289 LLVM_DEBUG(dbgs() << " No ambiguity in memory access.\n");
290 return false;
291 }
292 return true;
293}
294
295/// Check loop instructions safe for Loop versioning.
296/// It returns true if it's safe else returns false.
297/// Consider following:
298/// 1) Check all load store in loop body are non atomic & non volatile.
299/// 2) Check function call safety, by ensuring its not accessing memory.
300/// 3) Loop body shouldn't have any may throw instruction.
301/// 4) Loop body shouldn't have any convergent or noduplicate instructions.
302bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
303 assert(I != nullptr && "Null instruction found!");
304 // Check function call safety
305 if (auto *Call = dyn_cast<CallBase>(Val: I)) {
306 if (Call->isConvergent() || Call->cannotDuplicate()) {
307 LLVM_DEBUG(dbgs() << " Convergent call site found.\n");
308 return false;
309 }
310
311 if (!AA->doesNotAccessMemory(Call)) {
312 LLVM_DEBUG(dbgs() << " Unsafe call site found.\n");
313 return false;
314 }
315 }
316
317 // Avoid loops with possiblity of throw
318 if (I->mayThrow()) {
319 LLVM_DEBUG(dbgs() << " May throw instruction found in loop body\n");
320 return false;
321 }
322 // If current instruction is load instructions
323 // make sure it's a simple load (non atomic & non volatile)
324 if (I->mayReadFromMemory()) {
325 LoadInst *Ld = dyn_cast<LoadInst>(Val: I);
326 if (!Ld || !Ld->isSimple()) {
327 LLVM_DEBUG(dbgs() << " Found a non-simple load.\n");
328 return false;
329 }
330 LoadAndStoreCounter++;
331 Value *Ptr = Ld->getPointerOperand();
332 // Check loop invariant.
333 if (SE->isLoopInvariant(S: SE->getSCEV(V: Ptr), L: CurLoop))
334 InvariantCounter++;
335 }
336 // If current instruction is store instruction
337 // make sure it's a simple store (non atomic & non volatile)
338 else if (I->mayWriteToMemory()) {
339 StoreInst *St = dyn_cast<StoreInst>(Val: I);
340 if (!St || !St->isSimple()) {
341 LLVM_DEBUG(dbgs() << " Found a non-simple store.\n");
342 return false;
343 }
344 LoadAndStoreCounter++;
345 Value *Ptr = St->getPointerOperand();
346 // Don't allow stores that we don't have runtime checks for, as we won't be
347 // able to mark them noalias meaning they would prevent any code motion.
348 auto &Pointers = LAI->getRuntimePointerChecking()->Pointers;
349 if (!any_of(Range: Pointers, P: [&](auto &P) { return P.PointerValue == Ptr; })) {
350 LLVM_DEBUG(dbgs() << " Found a store without a runtime check.\n");
351 return false;
352 }
353 // Check loop invariant.
354 if (SE->isLoopInvariant(S: SE->getSCEV(V: Ptr), L: CurLoop))
355 InvariantCounter++;
356
357 IsReadOnlyLoop = false;
358 }
359 return true;
360}
361
362/// Check loop instructions and confirms it's good for
363/// LoopVersioningLICM.
364bool LoopVersioningLICM::legalLoopInstructions() {
365 // Resetting counters.
366 LoadAndStoreCounter = 0;
367 InvariantCounter = 0;
368 IsReadOnlyLoop = true;
369 using namespace ore;
370 // Get LoopAccessInfo from current loop via the proxy.
371 LAI = &LAIs.getInfo(L&: *CurLoop, /*AllowPartial=*/true);
372 // Check LoopAccessInfo for need of runtime check.
373 if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
374 LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n");
375 return false;
376 }
377 // Iterate over loop blocks and instructions of each block and check
378 // instruction safety.
379 for (auto *Block : CurLoop->getBlocks())
380 for (auto &Inst : *Block) {
381 // If instruction is unsafe just return false.
382 if (!instructionSafeForVersioning(I: &Inst)) {
383 ORE->emit(RemarkBuilder: [&]() {
384 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopInst", &Inst)
385 << " Unsafe Loop Instruction";
386 });
387 return false;
388 }
389 }
390 // Number of runtime-checks should be less then RuntimeMemoryCheckThreshold
391 if (LAI->getNumRuntimePointerChecks() >
392 VectorizerParams::RuntimeMemoryCheckThreshold) {
393 LLVM_DEBUG(
394 dbgs() << " LAA: Runtime checks are more than threshold !!\n");
395 ORE->emit(RemarkBuilder: [&]() {
396 return OptimizationRemarkMissed(DEBUG_TYPE, "RuntimeCheck",
397 CurLoop->getStartLoc(),
398 CurLoop->getHeader())
399 << "Number of runtime checks "
400 << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks())
401 << " exceeds threshold "
402 << NV("Threshold", VectorizerParams::RuntimeMemoryCheckThreshold);
403 });
404 return false;
405 }
406 // Loop should have at least one invariant load or store instruction.
407 if (!InvariantCounter) {
408 LLVM_DEBUG(dbgs() << " Invariant not found !!\n");
409 return false;
410 }
411 // Read only loop not allowed.
412 if (IsReadOnlyLoop) {
413 LLVM_DEBUG(dbgs() << " Found a read-only loop!\n");
414 return false;
415 }
416 // Profitablity check:
417 // Check invariant threshold, should be in limit.
418 if (InvariantCounter * 100 < InvariantThreshold * LoadAndStoreCounter) {
419 LLVM_DEBUG(
420 dbgs()
421 << " Invariant load & store are less then defined threshold\n");
422 LLVM_DEBUG(dbgs() << " Invariant loads & stores: "
423 << ((InvariantCounter * 100) / LoadAndStoreCounter)
424 << "%\n");
425 LLVM_DEBUG(dbgs() << " Invariant loads & store threshold: "
426 << InvariantThreshold << "%\n");
427 ORE->emit(RemarkBuilder: [&]() {
428 return OptimizationRemarkMissed(DEBUG_TYPE, "InvariantThreshold",
429 CurLoop->getStartLoc(),
430 CurLoop->getHeader())
431 << "Invariant load & store "
432 << NV("LoadAndStoreCounter",
433 ((InvariantCounter * 100) / LoadAndStoreCounter))
434 << " are less then defined threshold "
435 << NV("Threshold", InvariantThreshold);
436 });
437 return false;
438 }
439 return true;
440}
441
442/// It checks loop is already visited or not.
443/// check loop meta data, if loop revisited return true
444/// else false.
445bool LoopVersioningLICM::isLoopAlreadyVisited() {
446 // Check LoopVersioningLICM metadata into loop
447 if (findStringMetadataForLoop(TheLoop: CurLoop, Name: LICMVersioningMetaData)) {
448 return true;
449 }
450 return false;
451}
452
453/// Checks legality for LoopVersioningLICM by considering following:
454/// a) loop structure legality b) loop instruction legality
455/// c) loop memory access legality.
456/// Return true if legal else returns false.
457bool LoopVersioningLICM::isLegalForVersioning() {
458 using namespace ore;
459 LLVM_DEBUG(dbgs() << "Loop: " << *CurLoop);
460 // Make sure not re-visiting same loop again.
461 if (isLoopAlreadyVisited()) {
462 LLVM_DEBUG(
463 dbgs() << " Revisiting loop in LoopVersioningLICM not allowed.\n\n");
464 return false;
465 }
466 // Check loop structure leagality.
467 if (!legalLoopStructure()) {
468 LLVM_DEBUG(
469 dbgs() << " Loop structure not suitable for LoopVersioningLICM\n\n");
470 ORE->emit(RemarkBuilder: [&]() {
471 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopStruct",
472 CurLoop->getStartLoc(),
473 CurLoop->getHeader())
474 << " Unsafe Loop structure";
475 });
476 return false;
477 }
478 // Check loop instruction leagality.
479 if (!legalLoopInstructions()) {
480 LLVM_DEBUG(
481 dbgs()
482 << " Loop instructions not suitable for LoopVersioningLICM\n\n");
483 return false;
484 }
485 // Check loop memory access leagality.
486 if (!legalLoopMemoryAccesses()) {
487 LLVM_DEBUG(
488 dbgs()
489 << " Loop memory access not suitable for LoopVersioningLICM\n\n");
490 ORE->emit(RemarkBuilder: [&]() {
491 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopMemoryAccess",
492 CurLoop->getStartLoc(),
493 CurLoop->getHeader())
494 << " Unsafe Loop memory access";
495 });
496 return false;
497 }
498 // Loop versioning is feasible, return true.
499 LLVM_DEBUG(dbgs() << " Loop Versioning found to be beneficial\n\n");
500 ORE->emit(RemarkBuilder: [&]() {
501 return OptimizationRemark(DEBUG_TYPE, "IsLegalForVersioning",
502 CurLoop->getStartLoc(), CurLoop->getHeader())
503 << " Versioned loop for LICM."
504 << " Number of runtime checks we had to insert "
505 << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks());
506 });
507 return true;
508}
509
510bool LoopVersioningLICM::run(DominatorTree *DT) {
511 // Do not do the transformation if disabled by metadata.
512 if (hasLICMVersioningTransformation(L: CurLoop) & TM_Disable)
513 return false;
514
515 bool Changed = false;
516
517 // Check feasiblity of LoopVersioningLICM.
518 // If versioning found to be feasible and beneficial then proceed
519 // else simply return, by cleaning up memory.
520 if (isLegalForVersioning()) {
521 // Do loop versioning.
522 // Create memcheck for memory accessed inside loop.
523 // Clone original loop, and set blocks properly.
524 LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(),
525 CurLoop, &LI, DT, SE);
526 LVer.versionLoop();
527 // Set Loop Versioning metaData for original loop.
528 addStringMetadataToLoop(TheLoop: LVer.getNonVersionedLoop(), MDString: LICMVersioningMetaData);
529 // Set Loop Versioning metaData for version loop.
530 addStringMetadataToLoop(TheLoop: LVer.getVersionedLoop(), MDString: LICMVersioningMetaData);
531 // Set "llvm.mem.parallel_loop_access" metaData to versioned loop.
532 // FIXME: "llvm.mem.parallel_loop_access" annotates memory access
533 // instructions, not loops.
534 addStringMetadataToLoop(TheLoop: LVer.getVersionedLoop(),
535 MDString: "llvm.mem.parallel_loop_access");
536 // Update version loop with aggressive aliasing assumption.
537 LVer.annotateLoopWithNoAlias();
538 Changed = true;
539 }
540 return Changed;
541}
542
543namespace llvm {
544
545PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
546 LoopStandardAnalysisResults &LAR,
547 LPMUpdater &U) {
548 AliasAnalysis *AA = &LAR.AA;
549 ScalarEvolution *SE = &LAR.SE;
550 DominatorTree *DT = &LAR.DT;
551 const Function *F = L.getHeader()->getParent();
552 OptimizationRemarkEmitter ORE(F);
553
554 LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr);
555 if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT))
556 return PreservedAnalyses::all();
557 return getLoopPassPreservedAnalyses();
558}
559} // namespace llvm
560