| 1 | //===- LoopVersioning.cpp - Utility to version a loop ---------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines a utility class to perform loop versioning. The versioned |
| 10 | // loop speculates that otherwise may-aliasing memory accesses don't overlap and |
| 11 | // emits checks to prove this. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "llvm/Transforms/Utils/LoopVersioning.h" |
| 16 | #include "llvm/ADT/ArrayRef.h" |
| 17 | #include "llvm/Analysis/AliasAnalysis.h" |
| 18 | #include "llvm/Analysis/InstSimplifyFolder.h" |
| 19 | #include "llvm/Analysis/LoopAccessAnalysis.h" |
| 20 | #include "llvm/Analysis/LoopInfo.h" |
| 21 | #include "llvm/Analysis/ScalarEvolution.h" |
| 22 | #include "llvm/Analysis/TargetLibraryInfo.h" |
| 23 | #include "llvm/IR/Dominators.h" |
| 24 | #include "llvm/IR/MDBuilder.h" |
| 25 | #include "llvm/IR/PassManager.h" |
| 26 | #include "llvm/Support/CommandLine.h" |
| 27 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 28 | #include "llvm/Transforms/Utils/Cloning.h" |
| 29 | #include "llvm/Transforms/Utils/LoopUtils.h" |
| 30 | #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" |
| 31 | |
| 32 | using namespace llvm; |
| 33 | |
| 34 | #define DEBUG_TYPE "loop-versioning" |
| 35 | |
| 36 | static cl::opt<bool> |
| 37 | AnnotateNoAlias("loop-version-annotate-no-alias" , cl::init(Val: true), |
| 38 | cl::Hidden, |
| 39 | cl::desc("Add no-alias annotation for instructions that " |
| 40 | "are disambiguated by memchecks" )); |
| 41 | |
| 42 | LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, |
| 43 | ArrayRef<RuntimePointerCheck> Checks, Loop *L, |
| 44 | LoopInfo *LI, DominatorTree *DT, |
| 45 | ScalarEvolution *SE) |
| 46 | : VersionedLoop(L), AliasChecks(Checks), Preds(LAI.getPSE().getPredicate()), |
| 47 | LAI(LAI), LI(LI), DT(DT), SE(SE) {} |
| 48 | |
| 49 | void LoopVersioning::versionLoop( |
| 50 | const SmallVectorImpl<Instruction *> &DefsUsedOutside) { |
| 51 | assert(VersionedLoop->getUniqueExitBlock() && "No single exit block" ); |
| 52 | assert(VersionedLoop->isLoopSimplifyForm() && |
| 53 | "Loop is not in loop-simplify form" ); |
| 54 | |
| 55 | Value *MemRuntimeCheck; |
| 56 | Value *SCEVRuntimeCheck; |
| 57 | Value *RuntimeCheck = nullptr; |
| 58 | |
| 59 | // Add the memcheck in the original preheader (this is empty initially). |
| 60 | BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader(); |
| 61 | const auto &RtPtrChecking = *LAI.getRuntimePointerChecking(); |
| 62 | |
| 63 | SCEVExpander Exp2(*RtPtrChecking.getSE(), |
| 64 | VersionedLoop->getHeader()->getDataLayout(), |
| 65 | "induction" ); |
| 66 | MemRuntimeCheck = addRuntimeChecks(Loc: RuntimeCheckBB->getTerminator(), |
| 67 | TheLoop: VersionedLoop, PointerChecks: AliasChecks, Expander&: Exp2); |
| 68 | |
| 69 | SCEVExpander Exp(*SE, RuntimeCheckBB->getDataLayout(), |
| 70 | "scev.check" ); |
| 71 | SCEVRuntimeCheck = |
| 72 | Exp.expandCodeForPredicate(Pred: &Preds, Loc: RuntimeCheckBB->getTerminator()); |
| 73 | |
| 74 | IRBuilder<InstSimplifyFolder> Builder( |
| 75 | RuntimeCheckBB->getContext(), |
| 76 | InstSimplifyFolder(RuntimeCheckBB->getDataLayout())); |
| 77 | if (MemRuntimeCheck && SCEVRuntimeCheck) { |
| 78 | Builder.SetInsertPoint(RuntimeCheckBB->getTerminator()); |
| 79 | RuntimeCheck = |
| 80 | Builder.CreateOr(LHS: MemRuntimeCheck, RHS: SCEVRuntimeCheck, Name: "lver.safe" ); |
| 81 | } else |
| 82 | RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck; |
| 83 | |
| 84 | assert(RuntimeCheck && "called even though we don't need " |
| 85 | "any runtime checks" ); |
| 86 | |
| 87 | // Rename the block to make the IR more readable. |
| 88 | RuntimeCheckBB->setName(VersionedLoop->getHeader()->getName() + |
| 89 | ".lver.check" ); |
| 90 | |
| 91 | // Create empty preheader for the loop (and after cloning for the |
| 92 | // non-versioned loop). |
| 93 | BasicBlock *PH = |
| 94 | SplitBlock(Old: RuntimeCheckBB, SplitPt: RuntimeCheckBB->getTerminator(), DT, LI, |
| 95 | MSSAU: nullptr, BBName: VersionedLoop->getHeader()->getName() + ".ph" ); |
| 96 | |
| 97 | // Clone the loop including the preheader. |
| 98 | // |
| 99 | // FIXME: This does not currently preserve SimplifyLoop because the exit |
| 100 | // block is a join between the two loops. |
| 101 | SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks; |
| 102 | NonVersionedLoop = |
| 103 | cloneLoopWithPreheader(Before: PH, LoopDomBB: RuntimeCheckBB, OrigLoop: VersionedLoop, VMap, |
| 104 | NameSuffix: ".lver.orig" , LI, DT, Blocks&: NonVersionedLoopBlocks); |
| 105 | remapInstructionsInBlocks(Blocks: NonVersionedLoopBlocks, VMap); |
| 106 | |
| 107 | // Insert the conditional branch based on the result of the memchecks. |
| 108 | Instruction *OrigTerm = RuntimeCheckBB->getTerminator(); |
| 109 | Builder.SetInsertPoint(OrigTerm); |
| 110 | Builder.CreateCondBr(Cond: RuntimeCheck, True: NonVersionedLoop->getLoopPreheader(), |
| 111 | False: VersionedLoop->getLoopPreheader()); |
| 112 | OrigTerm->eraseFromParent(); |
| 113 | |
| 114 | // The loops merge in the original exit block. This is now dominated by the |
| 115 | // memchecking block. |
| 116 | DT->changeImmediateDominator(BB: VersionedLoop->getExitBlock(), NewBB: RuntimeCheckBB); |
| 117 | |
| 118 | // Adds the necessary PHI nodes for the versioned loops based on the |
| 119 | // loop-defined values used outside of the loop. |
| 120 | addPHINodes(DefsUsedOutside); |
| 121 | formDedicatedExitBlocks(L: NonVersionedLoop, DT, LI, MSSAU: nullptr, PreserveLCSSA: true); |
| 122 | formDedicatedExitBlocks(L: VersionedLoop, DT, LI, MSSAU: nullptr, PreserveLCSSA: true); |
| 123 | assert(NonVersionedLoop->isLoopSimplifyForm() && |
| 124 | VersionedLoop->isLoopSimplifyForm() && |
| 125 | "The versioned loops should be in simplify form." ); |
| 126 | } |
| 127 | |
| 128 | void LoopVersioning::addPHINodes( |
| 129 | const SmallVectorImpl<Instruction *> &DefsUsedOutside) { |
| 130 | BasicBlock *PHIBlock = VersionedLoop->getExitBlock(); |
| 131 | assert(PHIBlock && "No single successor to loop exit block" ); |
| 132 | PHINode *PN; |
| 133 | |
| 134 | // First add a single-operand PHI for each DefsUsedOutside if one does not |
| 135 | // exists yet. |
| 136 | for (auto *Inst : DefsUsedOutside) { |
| 137 | // See if we have a single-operand PHI with the value defined by the |
| 138 | // original loop. |
| 139 | for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(Val&: I)); ++I) { |
| 140 | if (PN->getIncomingValue(i: 0) == Inst) { |
| 141 | SE->forgetLcssaPhiWithNewPredecessor(L: VersionedLoop, V: PN); |
| 142 | break; |
| 143 | } |
| 144 | } |
| 145 | // If not create it. |
| 146 | if (!PN) { |
| 147 | PN = PHINode::Create(Ty: Inst->getType(), NumReservedValues: 2, NameStr: Inst->getName() + ".lver" ); |
| 148 | PN->insertBefore(InsertPos: PHIBlock->begin()); |
| 149 | SmallVector<User*, 8> UsersToUpdate; |
| 150 | for (User *U : Inst->users()) |
| 151 | if (!VersionedLoop->contains(BB: cast<Instruction>(Val: U)->getParent())) |
| 152 | UsersToUpdate.push_back(Elt: U); |
| 153 | for (User *U : UsersToUpdate) |
| 154 | U->replaceUsesOfWith(From: Inst, To: PN); |
| 155 | PN->addIncoming(V: Inst, BB: VersionedLoop->getExitingBlock()); |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | // Then for each PHI add the operand for the edge from the cloned loop. |
| 160 | for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(Val&: I)); ++I) { |
| 161 | assert(PN->getNumOperands() == 1 && |
| 162 | "Exit block should only have on predecessor" ); |
| 163 | |
| 164 | // If the definition was cloned used that otherwise use the same value. |
| 165 | Value *ClonedValue = PN->getIncomingValue(i: 0); |
| 166 | auto Mapped = VMap.find(Val: ClonedValue); |
| 167 | if (Mapped != VMap.end()) |
| 168 | ClonedValue = Mapped->second; |
| 169 | |
| 170 | PN->addIncoming(V: ClonedValue, BB: NonVersionedLoop->getExitingBlock()); |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | void LoopVersioning::prepareNoAliasMetadata() { |
| 175 | // We need to turn the no-alias relation between pointer checking groups into |
| 176 | // no-aliasing annotations between instructions. |
| 177 | // |
| 178 | // We accomplish this by mapping each pointer checking group (a set of |
| 179 | // pointers memchecked together) to an alias scope and then also mapping each |
| 180 | // group to the list of scopes it can't alias. |
| 181 | |
| 182 | const RuntimePointerChecking *RtPtrChecking = LAI.getRuntimePointerChecking(); |
| 183 | LLVMContext &Context = VersionedLoop->getHeader()->getContext(); |
| 184 | |
| 185 | // First allocate an aliasing scope for each pointer checking group. |
| 186 | // |
| 187 | // While traversing through the checking groups in the loop, also create a |
| 188 | // reverse map from pointers to the pointer checking group they were assigned |
| 189 | // to. |
| 190 | MDBuilder MDB(Context); |
| 191 | MDNode *Domain = MDB.createAnonymousAliasScopeDomain(Name: "LVerDomain" ); |
| 192 | |
| 193 | for (const auto &Group : RtPtrChecking->CheckingGroups) { |
| 194 | GroupToScope[&Group] = MDB.createAnonymousAliasScope(Domain); |
| 195 | |
| 196 | for (unsigned PtrIdx : Group.Members) |
| 197 | PtrToGroup[RtPtrChecking->getPointerInfo(PtrIdx).PointerValue] = &Group; |
| 198 | } |
| 199 | |
| 200 | // Go through the checks and for each pointer group, collect the scopes for |
| 201 | // each non-aliasing pointer group. |
| 202 | DenseMap<const RuntimeCheckingPtrGroup *, SmallVector<Metadata *, 4>> |
| 203 | GroupToNonAliasingScopes; |
| 204 | |
| 205 | for (const auto &Check : AliasChecks) |
| 206 | GroupToNonAliasingScopes[Check.first].push_back(Elt: GroupToScope[Check.second]); |
| 207 | |
| 208 | // Finally, transform the above to actually map to scope list which is what |
| 209 | // the metadata uses. |
| 210 | |
| 211 | for (const auto &Pair : GroupToNonAliasingScopes) |
| 212 | GroupToNonAliasingScopeList[Pair.first] = MDNode::get(Context, MDs: Pair.second); |
| 213 | } |
| 214 | |
| 215 | void LoopVersioning::annotateLoopWithNoAlias() { |
| 216 | if (!AnnotateNoAlias) |
| 217 | return; |
| 218 | |
| 219 | // First prepare the maps. |
| 220 | prepareNoAliasMetadata(); |
| 221 | |
| 222 | // Add the scope and no-alias metadata to the instructions. |
| 223 | for (Instruction *I : LAI.getDepChecker().getMemoryInstructions()) { |
| 224 | annotateInstWithNoAlias(I); |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | std::pair<MDNode *, MDNode *> |
| 229 | LoopVersioning::getNoAliasMetadataFor(const Instruction *OrigInst) const { |
| 230 | if (!AnnotateNoAlias) |
| 231 | return {nullptr, nullptr}; |
| 232 | |
| 233 | LLVMContext &Context = VersionedLoop->getHeader()->getContext(); |
| 234 | const Value *Ptr = isa<LoadInst>(Val: OrigInst) |
| 235 | ? cast<LoadInst>(Val: OrigInst)->getPointerOperand() |
| 236 | : cast<StoreInst>(Val: OrigInst)->getPointerOperand(); |
| 237 | |
| 238 | MDNode *AliasScope = nullptr; |
| 239 | MDNode *NoAlias = nullptr; |
| 240 | // Find the group for the pointer and then add the scope metadata. |
| 241 | auto Group = PtrToGroup.find(Val: Ptr); |
| 242 | if (Group != PtrToGroup.end()) { |
| 243 | AliasScope = MDNode::concatenate( |
| 244 | A: OrigInst->getMetadata(KindID: LLVMContext::MD_alias_scope), |
| 245 | B: MDNode::get(Context, MDs: GroupToScope.lookup(Val: Group->second))); |
| 246 | |
| 247 | // Add the no-alias metadata. |
| 248 | auto NonAliasingScopeList = GroupToNonAliasingScopeList.find(Val: Group->second); |
| 249 | if (NonAliasingScopeList != GroupToNonAliasingScopeList.end()) |
| 250 | NoAlias = |
| 251 | MDNode::concatenate(A: OrigInst->getMetadata(KindID: LLVMContext::MD_noalias), |
| 252 | B: NonAliasingScopeList->second); |
| 253 | } |
| 254 | return {AliasScope, NoAlias}; |
| 255 | } |
| 256 | |
| 257 | void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst, |
| 258 | const Instruction *OrigInst) { |
| 259 | const auto &[AliasScopeMD, NoAliasMD] = getNoAliasMetadataFor(OrigInst); |
| 260 | if (AliasScopeMD) |
| 261 | VersionedInst->setMetadata(KindID: LLVMContext::MD_alias_scope, Node: AliasScopeMD); |
| 262 | |
| 263 | if (NoAliasMD) |
| 264 | VersionedInst->setMetadata(KindID: LLVMContext::MD_noalias, Node: NoAliasMD); |
| 265 | } |
| 266 | |
| 267 | namespace { |
| 268 | bool runImpl(LoopInfo *LI, LoopAccessInfoManager &LAIs, DominatorTree *DT, |
| 269 | ScalarEvolution *SE) { |
| 270 | // Build up a worklist of inner-loops to version. This is necessary as the |
| 271 | // act of versioning a loop creates new loops and can invalidate iterators |
| 272 | // across the loops. |
| 273 | SmallVector<Loop *, 8> Worklist; |
| 274 | |
| 275 | for (Loop *TopLevelLoop : *LI) |
| 276 | for (Loop *L : depth_first(G: TopLevelLoop)) |
| 277 | // We only handle inner-most loops. |
| 278 | if (L->isInnermost()) |
| 279 | Worklist.push_back(Elt: L); |
| 280 | |
| 281 | // Now walk the identified inner loops. |
| 282 | bool Changed = false; |
| 283 | for (Loop *L : Worklist) { |
| 284 | if (!L->isLoopSimplifyForm() || !L->isRotatedForm() || |
| 285 | !L->getExitingBlock()) |
| 286 | continue; |
| 287 | const LoopAccessInfo &LAI = LAIs.getInfo(L&: *L); |
| 288 | if (!LAI.hasConvergentOp() && |
| 289 | (LAI.getNumRuntimePointerChecks() || |
| 290 | !LAI.getPSE().getPredicate().isAlwaysTrue())) { |
| 291 | if (!L->isLCSSAForm(DT: *DT)) |
| 292 | formLCSSARecursively(L&: *L, DT: *DT, LI, SE); |
| 293 | |
| 294 | LoopVersioning LVer(LAI, LAI.getRuntimePointerChecking()->getChecks(), L, |
| 295 | LI, DT, SE); |
| 296 | LVer.versionLoop(); |
| 297 | LVer.annotateLoopWithNoAlias(); |
| 298 | Changed = true; |
| 299 | LAIs.clear(); |
| 300 | } |
| 301 | } |
| 302 | |
| 303 | return Changed; |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | PreservedAnalyses LoopVersioningPass::run(Function &F, |
| 308 | FunctionAnalysisManager &AM) { |
| 309 | auto &SE = AM.getResult<ScalarEvolutionAnalysis>(IR&: F); |
| 310 | auto &LI = AM.getResult<LoopAnalysis>(IR&: F); |
| 311 | LoopAccessInfoManager &LAIs = AM.getResult<LoopAccessAnalysis>(IR&: F); |
| 312 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
| 313 | |
| 314 | if (runImpl(LI: &LI, LAIs, DT: &DT, SE: &SE)) |
| 315 | return PreservedAnalyses::none(); |
| 316 | return PreservedAnalyses::all(); |
| 317 | } |
| 318 | |