| 1 | //===- CodeExtractor.cpp - Pull code region into a new function -----------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the interface to tear out a code region, such as an |
| 10 | // individual loop or a parallel section, into a new function, replacing it with |
| 11 | // a call to the new function. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "llvm/Transforms/Utils/CodeExtractor.h" |
| 16 | #include "llvm/ADT/ArrayRef.h" |
| 17 | #include "llvm/ADT/DenseMap.h" |
| 18 | #include "llvm/ADT/STLExtras.h" |
| 19 | #include "llvm/ADT/SetVector.h" |
| 20 | #include "llvm/ADT/SmallPtrSet.h" |
| 21 | #include "llvm/ADT/SmallVector.h" |
| 22 | #include "llvm/Analysis/AssumptionCache.h" |
| 23 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
| 24 | #include "llvm/Analysis/BlockFrequencyInfoImpl.h" |
| 25 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
| 26 | #include "llvm/IR/Argument.h" |
| 27 | #include "llvm/IR/Attributes.h" |
| 28 | #include "llvm/IR/BasicBlock.h" |
| 29 | #include "llvm/IR/CFG.h" |
| 30 | #include "llvm/IR/Constant.h" |
| 31 | #include "llvm/IR/Constants.h" |
| 32 | #include "llvm/IR/DIBuilder.h" |
| 33 | #include "llvm/IR/DataLayout.h" |
| 34 | #include "llvm/IR/DebugInfo.h" |
| 35 | #include "llvm/IR/DebugInfoMetadata.h" |
| 36 | #include "llvm/IR/DerivedTypes.h" |
| 37 | #include "llvm/IR/Dominators.h" |
| 38 | #include "llvm/IR/Function.h" |
| 39 | #include "llvm/IR/GlobalValue.h" |
| 40 | #include "llvm/IR/InstIterator.h" |
| 41 | #include "llvm/IR/InstrTypes.h" |
| 42 | #include "llvm/IR/Instruction.h" |
| 43 | #include "llvm/IR/Instructions.h" |
| 44 | #include "llvm/IR/IntrinsicInst.h" |
| 45 | #include "llvm/IR/Intrinsics.h" |
| 46 | #include "llvm/IR/LLVMContext.h" |
| 47 | #include "llvm/IR/MDBuilder.h" |
| 48 | #include "llvm/IR/Module.h" |
| 49 | #include "llvm/IR/PatternMatch.h" |
| 50 | #include "llvm/IR/Type.h" |
| 51 | #include "llvm/IR/User.h" |
| 52 | #include "llvm/IR/Value.h" |
| 53 | #include "llvm/IR/Verifier.h" |
| 54 | #include "llvm/Support/BlockFrequency.h" |
| 55 | #include "llvm/Support/BranchProbability.h" |
| 56 | #include "llvm/Support/Casting.h" |
| 57 | #include "llvm/Support/CommandLine.h" |
| 58 | #include "llvm/Support/Debug.h" |
| 59 | #include "llvm/Support/ErrorHandling.h" |
| 60 | #include "llvm/Support/raw_ostream.h" |
| 61 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 62 | #include <cassert> |
| 63 | #include <cstdint> |
| 64 | #include <iterator> |
| 65 | #include <map> |
| 66 | #include <utility> |
| 67 | #include <vector> |
| 68 | |
| 69 | using namespace llvm; |
| 70 | using namespace llvm::PatternMatch; |
| 71 | using ProfileCount = Function::ProfileCount; |
| 72 | |
| 73 | #define DEBUG_TYPE "code-extractor" |
| 74 | |
| 75 | // Provide a command-line option to aggregate function arguments into a struct |
| 76 | // for functions produced by the code extractor. This is useful when converting |
| 77 | // extracted functions to pthread-based code, as only one argument (void*) can |
| 78 | // be passed in to pthread_create(). |
| 79 | static cl::opt<bool> |
| 80 | AggregateArgsOpt("aggregate-extracted-args" , cl::Hidden, |
| 81 | cl::desc("Aggregate arguments to code-extracted functions" )); |
| 82 | |
| 83 | /// Test whether a block is valid for extraction. |
| 84 | static bool (const BasicBlock &BB, |
| 85 | const SetVector<BasicBlock *> &Result, |
| 86 | bool AllowVarArgs, bool AllowAlloca) { |
| 87 | // taking the address of a basic block moved to another function is illegal |
| 88 | if (BB.hasAddressTaken()) |
| 89 | return false; |
| 90 | |
| 91 | // don't hoist code that uses another basicblock address, as it's likely to |
| 92 | // lead to unexpected behavior, like cross-function jumps |
| 93 | SmallPtrSet<User const *, 16> Visited; |
| 94 | SmallVector<User const *, 16> ToVisit(llvm::make_pointer_range(Range: BB)); |
| 95 | |
| 96 | while (!ToVisit.empty()) { |
| 97 | User const *Curr = ToVisit.pop_back_val(); |
| 98 | if (!Visited.insert(Ptr: Curr).second) |
| 99 | continue; |
| 100 | if (isa<BlockAddress const>(Val: Curr)) |
| 101 | return false; // even a reference to self is likely to be not compatible |
| 102 | |
| 103 | if (isa<Instruction>(Val: Curr) && cast<Instruction>(Val: Curr)->getParent() != &BB) |
| 104 | continue; |
| 105 | |
| 106 | for (auto const &U : Curr->operands()) { |
| 107 | if (auto *UU = dyn_cast<User>(Val: U)) |
| 108 | ToVisit.push_back(Elt: UU); |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | // If explicitly requested, allow vastart and alloca. For invoke instructions |
| 113 | // verify that extraction is valid. |
| 114 | for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { |
| 115 | if (isa<AllocaInst>(Val: I)) { |
| 116 | if (!AllowAlloca) |
| 117 | return false; |
| 118 | continue; |
| 119 | } |
| 120 | |
| 121 | if (const auto *II = dyn_cast<InvokeInst>(Val&: I)) { |
| 122 | // Unwind destination (either a landingpad, catchswitch, or cleanuppad) |
| 123 | // must be a part of the subgraph which is being extracted. |
| 124 | if (auto *UBB = II->getUnwindDest()) |
| 125 | if (!Result.count(key: UBB)) |
| 126 | return false; |
| 127 | continue; |
| 128 | } |
| 129 | |
| 130 | // All catch handlers of a catchswitch instruction as well as the unwind |
| 131 | // destination must be in the subgraph. |
| 132 | if (const auto *CSI = dyn_cast<CatchSwitchInst>(Val&: I)) { |
| 133 | if (auto *UBB = CSI->getUnwindDest()) |
| 134 | if (!Result.count(key: UBB)) |
| 135 | return false; |
| 136 | for (const auto *HBB : CSI->handlers()) |
| 137 | if (!Result.count(key: const_cast<BasicBlock*>(HBB))) |
| 138 | return false; |
| 139 | continue; |
| 140 | } |
| 141 | |
| 142 | // Make sure that entire catch handler is within subgraph. It is sufficient |
| 143 | // to check that catch return's block is in the list. |
| 144 | if (const auto *CPI = dyn_cast<CatchPadInst>(Val&: I)) { |
| 145 | for (const auto *U : CPI->users()) |
| 146 | if (const auto *CRI = dyn_cast<CatchReturnInst>(Val: U)) |
| 147 | if (!Result.count(key: const_cast<BasicBlock*>(CRI->getParent()))) |
| 148 | return false; |
| 149 | continue; |
| 150 | } |
| 151 | |
| 152 | // And do similar checks for cleanup handler - the entire handler must be |
| 153 | // in subgraph which is going to be extracted. For cleanup return should |
| 154 | // additionally check that the unwind destination is also in the subgraph. |
| 155 | if (const auto *CPI = dyn_cast<CleanupPadInst>(Val&: I)) { |
| 156 | for (const auto *U : CPI->users()) |
| 157 | if (const auto *CRI = dyn_cast<CleanupReturnInst>(Val: U)) |
| 158 | if (!Result.count(key: const_cast<BasicBlock*>(CRI->getParent()))) |
| 159 | return false; |
| 160 | continue; |
| 161 | } |
| 162 | if (const auto *CRI = dyn_cast<CleanupReturnInst>(Val&: I)) { |
| 163 | if (auto *UBB = CRI->getUnwindDest()) |
| 164 | if (!Result.count(key: UBB)) |
| 165 | return false; |
| 166 | continue; |
| 167 | } |
| 168 | |
| 169 | if (const CallInst *CI = dyn_cast<CallInst>(Val&: I)) { |
| 170 | // musttail calls have several restrictions, generally enforcing matching |
| 171 | // calling conventions between the caller parent and musttail callee. |
| 172 | // We can't usually honor them, because the extracted function has a |
| 173 | // different signature altogether, taking inputs/outputs and returning |
| 174 | // a control-flow identifier rather than the actual return value. |
| 175 | if (CI->isMustTailCall()) |
| 176 | return false; |
| 177 | |
| 178 | if (const Function *F = CI->getCalledFunction()) { |
| 179 | auto IID = F->getIntrinsicID(); |
| 180 | if (IID == Intrinsic::vastart) { |
| 181 | if (AllowVarArgs) |
| 182 | continue; |
| 183 | else |
| 184 | return false; |
| 185 | } |
| 186 | |
| 187 | // Currently, we miscompile outlined copies of eh_typid_for. There are |
| 188 | // proposals for fixing this in llvm.org/PR39545. |
| 189 | if (IID == Intrinsic::eh_typeid_for) |
| 190 | return false; |
| 191 | } |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | return true; |
| 196 | } |
| 197 | |
| 198 | /// Build a set of blocks to extract if the input blocks are viable. |
| 199 | static SetVector<BasicBlock *> |
| 200 | (ArrayRef<BasicBlock *> BBs, DominatorTree *DT, |
| 201 | bool AllowVarArgs, bool AllowAlloca) { |
| 202 | assert(!BBs.empty() && "The set of blocks to extract must be non-empty" ); |
| 203 | SetVector<BasicBlock *> Result; |
| 204 | |
| 205 | // Loop over the blocks, adding them to our set-vector, and aborting with an |
| 206 | // empty set if we encounter invalid blocks. |
| 207 | for (BasicBlock *BB : BBs) { |
| 208 | // If this block is dead, don't process it. |
| 209 | if (DT && !DT->isReachableFromEntry(A: BB)) |
| 210 | continue; |
| 211 | |
| 212 | if (!Result.insert(X: BB)) |
| 213 | llvm_unreachable("Repeated basic blocks in extraction input" ); |
| 214 | } |
| 215 | |
| 216 | LLVM_DEBUG(dbgs() << "Region front block: " << Result.front()->getName() |
| 217 | << '\n'); |
| 218 | |
| 219 | for (auto *BB : Result) { |
| 220 | if (!isBlockValidForExtraction(BB: *BB, Result, AllowVarArgs, AllowAlloca)) |
| 221 | return {}; |
| 222 | |
| 223 | // Make sure that the first block is not a landing pad. |
| 224 | if (BB == Result.front()) { |
| 225 | if (BB->isEHPad()) { |
| 226 | LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n" ); |
| 227 | return {}; |
| 228 | } |
| 229 | continue; |
| 230 | } |
| 231 | |
| 232 | // All blocks other than the first must not have predecessors outside of |
| 233 | // the subgraph which is being extracted. |
| 234 | for (auto *PBB : predecessors(BB)) |
| 235 | if (!Result.count(key: PBB)) { |
| 236 | LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from " |
| 237 | "outside the region except for the first block!\n" |
| 238 | << "Problematic source BB: " << BB->getName() << "\n" |
| 239 | << "Problematic destination BB: " << PBB->getName() |
| 240 | << "\n" ); |
| 241 | return {}; |
| 242 | } |
| 243 | } |
| 244 | |
| 245 | return Result; |
| 246 | } |
| 247 | |
| 248 | /// isAlignmentPreservedForAddrCast - Return true if the cast operation |
| 249 | /// for specified target preserves original alignment |
| 250 | static bool isAlignmentPreservedForAddrCast(const Triple &TargetTriple) { |
| 251 | switch (TargetTriple.getArch()) { |
| 252 | case Triple::ArchType::amdgcn: |
| 253 | case Triple::ArchType::r600: |
| 254 | return true; |
| 255 | // TODO: Add other architectures for which we are certain that alignment |
| 256 | // is preserved during address space cast operations. |
| 257 | default: |
| 258 | return false; |
| 259 | } |
| 260 | return false; |
| 261 | } |
| 262 | |
| 263 | CodeExtractor::(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, |
| 264 | bool AggregateArgs, BlockFrequencyInfo *BFI, |
| 265 | BranchProbabilityInfo *BPI, AssumptionCache *AC, |
| 266 | bool AllowVarArgs, bool AllowAlloca, |
| 267 | BasicBlock *AllocationBlock, std::string Suffix, |
| 268 | bool ArgsInZeroAddressSpace) |
| 269 | : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), |
| 270 | BPI(BPI), AC(AC), AllocationBlock(AllocationBlock), |
| 271 | AllowVarArgs(AllowVarArgs), |
| 272 | Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), |
| 273 | Suffix(Suffix), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace) {} |
| 274 | |
| 275 | /// definedInRegion - Return true if the specified value is defined in the |
| 276 | /// extracted region. |
| 277 | static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) { |
| 278 | if (Instruction *I = dyn_cast<Instruction>(Val: V)) |
| 279 | if (Blocks.count(key: I->getParent())) |
| 280 | return true; |
| 281 | return false; |
| 282 | } |
| 283 | |
| 284 | /// definedInCaller - Return true if the specified value is defined in the |
| 285 | /// function being code extracted, but not in the region being extracted. |
| 286 | /// These values must be passed in as live-ins to the function. |
| 287 | static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) { |
| 288 | if (isa<Argument>(Val: V)) return true; |
| 289 | if (Instruction *I = dyn_cast<Instruction>(Val: V)) |
| 290 | if (!Blocks.count(key: I->getParent())) |
| 291 | return true; |
| 292 | return false; |
| 293 | } |
| 294 | |
| 295 | static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) { |
| 296 | BasicBlock *CommonExitBlock = nullptr; |
| 297 | auto hasNonCommonExitSucc = [&](BasicBlock *Block) { |
| 298 | for (auto *Succ : successors(BB: Block)) { |
| 299 | // Internal edges, ok. |
| 300 | if (Blocks.count(key: Succ)) |
| 301 | continue; |
| 302 | if (!CommonExitBlock) { |
| 303 | CommonExitBlock = Succ; |
| 304 | continue; |
| 305 | } |
| 306 | if (CommonExitBlock != Succ) |
| 307 | return true; |
| 308 | } |
| 309 | return false; |
| 310 | }; |
| 311 | |
| 312 | if (any_of(Range: Blocks, P: hasNonCommonExitSucc)) |
| 313 | return nullptr; |
| 314 | |
| 315 | return CommonExitBlock; |
| 316 | } |
| 317 | |
| 318 | CodeExtractorAnalysisCache::(Function &F) { |
| 319 | for (BasicBlock &BB : F) { |
| 320 | for (Instruction &II : BB.instructionsWithoutDebug()) |
| 321 | if (auto *AI = dyn_cast<AllocaInst>(Val: &II)) |
| 322 | Allocas.push_back(Elt: AI); |
| 323 | |
| 324 | findSideEffectInfoForBlock(BB); |
| 325 | } |
| 326 | } |
| 327 | |
| 328 | void CodeExtractorAnalysisCache::(BasicBlock &BB) { |
| 329 | for (Instruction &II : BB.instructionsWithoutDebug()) { |
| 330 | unsigned Opcode = II.getOpcode(); |
| 331 | Value *MemAddr = nullptr; |
| 332 | switch (Opcode) { |
| 333 | case Instruction::Store: |
| 334 | case Instruction::Load: { |
| 335 | if (Opcode == Instruction::Store) { |
| 336 | StoreInst *SI = cast<StoreInst>(Val: &II); |
| 337 | MemAddr = SI->getPointerOperand(); |
| 338 | } else { |
| 339 | LoadInst *LI = cast<LoadInst>(Val: &II); |
| 340 | MemAddr = LI->getPointerOperand(); |
| 341 | } |
| 342 | // Global variable can not be aliased with locals. |
| 343 | if (isa<Constant>(Val: MemAddr)) |
| 344 | break; |
| 345 | Value *Base = MemAddr->stripInBoundsConstantOffsets(); |
| 346 | if (!isa<AllocaInst>(Val: Base)) { |
| 347 | SideEffectingBlocks.insert(V: &BB); |
| 348 | return; |
| 349 | } |
| 350 | BaseMemAddrs[&BB].insert(V: Base); |
| 351 | break; |
| 352 | } |
| 353 | default: { |
| 354 | IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(Val: &II); |
| 355 | if (IntrInst) { |
| 356 | if (IntrInst->isLifetimeStartOrEnd()) |
| 357 | break; |
| 358 | SideEffectingBlocks.insert(V: &BB); |
| 359 | return; |
| 360 | } |
| 361 | // Treat all the other cases conservatively if it has side effects. |
| 362 | if (II.mayHaveSideEffects()) { |
| 363 | SideEffectingBlocks.insert(V: &BB); |
| 364 | return; |
| 365 | } |
| 366 | } |
| 367 | } |
| 368 | } |
| 369 | } |
| 370 | |
| 371 | bool CodeExtractorAnalysisCache::( |
| 372 | BasicBlock &BB, AllocaInst *Addr) const { |
| 373 | if (SideEffectingBlocks.count(V: &BB)) |
| 374 | return true; |
| 375 | auto It = BaseMemAddrs.find(Val: &BB); |
| 376 | if (It != BaseMemAddrs.end()) |
| 377 | return It->second.count(V: Addr); |
| 378 | return false; |
| 379 | } |
| 380 | |
| 381 | bool CodeExtractor::( |
| 382 | const CodeExtractorAnalysisCache &CEAC, Instruction *Addr) const { |
| 383 | AllocaInst *AI = cast<AllocaInst>(Val: Addr->stripInBoundsConstantOffsets()); |
| 384 | Function *Func = (*Blocks.begin())->getParent(); |
| 385 | for (BasicBlock &BB : *Func) { |
| 386 | if (Blocks.count(key: &BB)) |
| 387 | continue; |
| 388 | if (CEAC.doesBlockContainClobberOfAddr(BB, Addr: AI)) |
| 389 | return false; |
| 390 | } |
| 391 | return true; |
| 392 | } |
| 393 | |
| 394 | BasicBlock * |
| 395 | CodeExtractor::(BasicBlock *CommonExitBlock) { |
| 396 | BasicBlock *SinglePredFromOutlineRegion = nullptr; |
| 397 | assert(!Blocks.count(CommonExitBlock) && |
| 398 | "Expect a block outside the region!" ); |
| 399 | for (auto *Pred : predecessors(BB: CommonExitBlock)) { |
| 400 | if (!Blocks.count(key: Pred)) |
| 401 | continue; |
| 402 | if (!SinglePredFromOutlineRegion) { |
| 403 | SinglePredFromOutlineRegion = Pred; |
| 404 | } else if (SinglePredFromOutlineRegion != Pred) { |
| 405 | SinglePredFromOutlineRegion = nullptr; |
| 406 | break; |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | if (SinglePredFromOutlineRegion) |
| 411 | return SinglePredFromOutlineRegion; |
| 412 | |
| 413 | #ifndef NDEBUG |
| 414 | auto getFirstPHI = [](BasicBlock *BB) { |
| 415 | BasicBlock::iterator I = BB->begin(); |
| 416 | PHINode *FirstPhi = nullptr; |
| 417 | while (I != BB->end()) { |
| 418 | PHINode *Phi = dyn_cast<PHINode>(I); |
| 419 | if (!Phi) |
| 420 | break; |
| 421 | if (!FirstPhi) { |
| 422 | FirstPhi = Phi; |
| 423 | break; |
| 424 | } |
| 425 | } |
| 426 | return FirstPhi; |
| 427 | }; |
| 428 | // If there are any phi nodes, the single pred either exists or has already |
| 429 | // be created before code extraction. |
| 430 | assert(!getFirstPHI(CommonExitBlock) && "Phi not expected" ); |
| 431 | #endif |
| 432 | |
| 433 | BasicBlock *NewExitBlock = |
| 434 | CommonExitBlock->splitBasicBlock(I: CommonExitBlock->getFirstNonPHIIt()); |
| 435 | |
| 436 | for (BasicBlock *Pred : |
| 437 | llvm::make_early_inc_range(Range: predecessors(BB: CommonExitBlock))) { |
| 438 | if (Blocks.count(key: Pred)) |
| 439 | continue; |
| 440 | Pred->getTerminator()->replaceUsesOfWith(From: CommonExitBlock, To: NewExitBlock); |
| 441 | } |
| 442 | // Now add the old exit block to the outline region. |
| 443 | Blocks.insert(X: CommonExitBlock); |
| 444 | return CommonExitBlock; |
| 445 | } |
| 446 | |
| 447 | // Find the pair of life time markers for address 'Addr' that are either |
| 448 | // defined inside the outline region or can legally be shrinkwrapped into the |
| 449 | // outline region. If there are not other untracked uses of the address, return |
| 450 | // the pair of markers if found; otherwise return a pair of nullptr. |
| 451 | CodeExtractor::LifetimeMarkerInfo |
| 452 | CodeExtractor::(const CodeExtractorAnalysisCache &CEAC, |
| 453 | Instruction *Addr, |
| 454 | BasicBlock *ExitBlock) const { |
| 455 | LifetimeMarkerInfo Info; |
| 456 | |
| 457 | for (User *U : Addr->users()) { |
| 458 | IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(Val: U); |
| 459 | if (IntrInst) { |
| 460 | // We don't model addresses with multiple start/end markers, but the |
| 461 | // markers do not need to be in the region. |
| 462 | if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) { |
| 463 | if (Info.LifeStart) |
| 464 | return {}; |
| 465 | Info.LifeStart = IntrInst; |
| 466 | continue; |
| 467 | } |
| 468 | if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) { |
| 469 | if (Info.LifeEnd) |
| 470 | return {}; |
| 471 | Info.LifeEnd = IntrInst; |
| 472 | continue; |
| 473 | } |
| 474 | } |
| 475 | // Find untracked uses of the address, bail. |
| 476 | if (!definedInRegion(Blocks, V: U)) |
| 477 | return {}; |
| 478 | } |
| 479 | |
| 480 | if (!Info.LifeStart || !Info.LifeEnd) |
| 481 | return {}; |
| 482 | |
| 483 | Info.SinkLifeStart = !definedInRegion(Blocks, V: Info.LifeStart); |
| 484 | Info.HoistLifeEnd = !definedInRegion(Blocks, V: Info.LifeEnd); |
| 485 | // Do legality check. |
| 486 | if ((Info.SinkLifeStart || Info.HoistLifeEnd) && |
| 487 | !isLegalToShrinkwrapLifetimeMarkers(CEAC, Addr)) |
| 488 | return {}; |
| 489 | |
| 490 | // Check to see if we have a place to do hoisting, if not, bail. |
| 491 | if (Info.HoistLifeEnd && !ExitBlock) |
| 492 | return {}; |
| 493 | |
| 494 | return Info; |
| 495 | } |
| 496 | |
| 497 | void CodeExtractor::(const CodeExtractorAnalysisCache &CEAC, |
| 498 | ValueSet &SinkCands, ValueSet &HoistCands, |
| 499 | BasicBlock *&ExitBlock) const { |
| 500 | Function *Func = (*Blocks.begin())->getParent(); |
| 501 | ExitBlock = getCommonExitBlock(Blocks); |
| 502 | |
| 503 | auto moveOrIgnoreLifetimeMarkers = |
| 504 | [&](const LifetimeMarkerInfo &LMI) -> bool { |
| 505 | if (!LMI.LifeStart) |
| 506 | return false; |
| 507 | if (LMI.SinkLifeStart) { |
| 508 | LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart |
| 509 | << "\n" ); |
| 510 | SinkCands.insert(X: LMI.LifeStart); |
| 511 | } |
| 512 | if (LMI.HoistLifeEnd) { |
| 513 | LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n" ); |
| 514 | HoistCands.insert(X: LMI.LifeEnd); |
| 515 | } |
| 516 | return true; |
| 517 | }; |
| 518 | |
| 519 | // Look up allocas in the original function in CodeExtractorAnalysisCache, as |
| 520 | // this is much faster than walking all the instructions. |
| 521 | for (AllocaInst *AI : CEAC.getAllocas()) { |
| 522 | BasicBlock *BB = AI->getParent(); |
| 523 | if (Blocks.count(key: BB)) |
| 524 | continue; |
| 525 | |
| 526 | // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca, |
| 527 | // check whether it is actually still in the original function. |
| 528 | Function *AIFunc = BB->getParent(); |
| 529 | if (AIFunc != Func) |
| 530 | continue; |
| 531 | |
| 532 | LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, Addr: AI, ExitBlock); |
| 533 | bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo); |
| 534 | if (Moved) { |
| 535 | LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n" ); |
| 536 | SinkCands.insert(X: AI); |
| 537 | continue; |
| 538 | } |
| 539 | |
| 540 | // Find bitcasts in the outlined region that have lifetime marker users |
| 541 | // outside that region. Replace the lifetime marker use with an |
| 542 | // outside region bitcast to avoid unnecessary alloca/reload instructions |
| 543 | // and extra lifetime markers. |
| 544 | SmallVector<Instruction *, 2> LifetimeBitcastUsers; |
| 545 | for (User *U : AI->users()) { |
| 546 | if (!definedInRegion(Blocks, V: U)) |
| 547 | continue; |
| 548 | |
| 549 | if (U->stripInBoundsConstantOffsets() != AI) |
| 550 | continue; |
| 551 | |
| 552 | Instruction *Bitcast = cast<Instruction>(Val: U); |
| 553 | for (User *BU : Bitcast->users()) { |
| 554 | auto *IntrInst = dyn_cast<LifetimeIntrinsic>(Val: BU); |
| 555 | if (!IntrInst) |
| 556 | continue; |
| 557 | |
| 558 | if (definedInRegion(Blocks, V: IntrInst)) |
| 559 | continue; |
| 560 | |
| 561 | LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast" |
| 562 | << *Bitcast << " in out-of-region lifetime marker " |
| 563 | << *IntrInst << "\n" ); |
| 564 | LifetimeBitcastUsers.push_back(Elt: IntrInst); |
| 565 | } |
| 566 | } |
| 567 | |
| 568 | for (Instruction *I : LifetimeBitcastUsers) { |
| 569 | Module *M = AIFunc->getParent(); |
| 570 | LLVMContext &Ctx = M->getContext(); |
| 571 | auto *Int8PtrTy = PointerType::getUnqual(C&: Ctx); |
| 572 | CastInst *CastI = |
| 573 | CastInst::CreatePointerCast(S: AI, Ty: Int8PtrTy, Name: "lt.cast" , InsertBefore: I->getIterator()); |
| 574 | I->replaceUsesOfWith(From: I->getOperand(i: 1), To: CastI); |
| 575 | } |
| 576 | |
| 577 | // Follow any bitcasts. |
| 578 | SmallVector<Instruction *, 2> Bitcasts; |
| 579 | SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo; |
| 580 | for (User *U : AI->users()) { |
| 581 | if (U->stripInBoundsConstantOffsets() == AI) { |
| 582 | Instruction *Bitcast = cast<Instruction>(Val: U); |
| 583 | LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Addr: Bitcast, ExitBlock); |
| 584 | if (LMI.LifeStart) { |
| 585 | Bitcasts.push_back(Elt: Bitcast); |
| 586 | BitcastLifetimeInfo.push_back(Elt: LMI); |
| 587 | continue; |
| 588 | } |
| 589 | } |
| 590 | |
| 591 | // Found unknown use of AI. |
| 592 | if (!definedInRegion(Blocks, V: U)) { |
| 593 | Bitcasts.clear(); |
| 594 | break; |
| 595 | } |
| 596 | } |
| 597 | |
| 598 | // Either no bitcasts reference the alloca or there are unknown uses. |
| 599 | if (Bitcasts.empty()) |
| 600 | continue; |
| 601 | |
| 602 | LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n" ); |
| 603 | SinkCands.insert(X: AI); |
| 604 | for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { |
| 605 | Instruction *BitcastAddr = Bitcasts[I]; |
| 606 | const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; |
| 607 | assert(LMI.LifeStart && |
| 608 | "Unsafe to sink bitcast without lifetime markers" ); |
| 609 | moveOrIgnoreLifetimeMarkers(LMI); |
| 610 | if (!definedInRegion(Blocks, V: BitcastAddr)) { |
| 611 | LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr |
| 612 | << "\n" ); |
| 613 | SinkCands.insert(X: BitcastAddr); |
| 614 | } |
| 615 | } |
| 616 | } |
| 617 | } |
| 618 | |
| 619 | bool CodeExtractor::() const { |
| 620 | if (Blocks.empty()) |
| 621 | return false; |
| 622 | BasicBlock * = *Blocks.begin(); |
| 623 | Function *F = Header->getParent(); |
| 624 | |
| 625 | // For functions with varargs, check that varargs handling is only done in the |
| 626 | // outlined function, i.e vastart and vaend are only used in outlined blocks. |
| 627 | if (AllowVarArgs && F->getFunctionType()->isVarArg()) { |
| 628 | auto containsVarArgIntrinsic = [](const Instruction &I) { |
| 629 | if (const CallInst *CI = dyn_cast<CallInst>(Val: &I)) |
| 630 | if (const Function *Callee = CI->getCalledFunction()) |
| 631 | return Callee->getIntrinsicID() == Intrinsic::vastart || |
| 632 | Callee->getIntrinsicID() == Intrinsic::vaend; |
| 633 | return false; |
| 634 | }; |
| 635 | |
| 636 | for (auto &BB : *F) { |
| 637 | if (Blocks.count(key: &BB)) |
| 638 | continue; |
| 639 | if (llvm::any_of(Range&: BB, P: containsVarArgIntrinsic)) |
| 640 | return false; |
| 641 | } |
| 642 | } |
| 643 | // stacksave as input implies stackrestore in the outlined function. |
| 644 | // This can confuse prolog epilog insertion phase. |
| 645 | // stacksave's uses must not cross outlined function. |
| 646 | for (BasicBlock *BB : Blocks) { |
| 647 | for (Instruction &I : *BB) { |
| 648 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &I); |
| 649 | if (!II) |
| 650 | continue; |
| 651 | bool IsSave = II->getIntrinsicID() == Intrinsic::stacksave; |
| 652 | bool IsRestore = II->getIntrinsicID() == Intrinsic::stackrestore; |
| 653 | if (IsSave && any_of(Range: II->users(), P: [&Blks = this->Blocks](User *U) { |
| 654 | return !definedInRegion(Blocks: Blks, V: U); |
| 655 | })) |
| 656 | return false; |
| 657 | if (IsRestore && !definedInRegion(Blocks, V: II->getArgOperand(i: 0))) |
| 658 | return false; |
| 659 | } |
| 660 | } |
| 661 | return true; |
| 662 | } |
| 663 | |
| 664 | void CodeExtractor::(ValueSet &Inputs, ValueSet &Outputs, |
| 665 | const ValueSet &SinkCands, |
| 666 | bool CollectGlobalInputs) const { |
| 667 | for (BasicBlock *BB : Blocks) { |
| 668 | // If a used value is defined outside the region, it's an input. If an |
| 669 | // instruction is used outside the region, it's an output. |
| 670 | for (Instruction &II : *BB) { |
| 671 | for (auto &OI : II.operands()) { |
| 672 | Value *V = OI; |
| 673 | if (!SinkCands.count(key: V) && |
| 674 | (definedInCaller(Blocks, V) || |
| 675 | (CollectGlobalInputs && llvm::isa<llvm::GlobalVariable>(Val: V)))) |
| 676 | Inputs.insert(X: V); |
| 677 | } |
| 678 | |
| 679 | for (User *U : II.users()) |
| 680 | if (!definedInRegion(Blocks, V: U)) { |
| 681 | Outputs.insert(X: &II); |
| 682 | break; |
| 683 | } |
| 684 | } |
| 685 | } |
| 686 | } |
| 687 | |
| 688 | /// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside |
| 689 | /// of the region, we need to split the entry block of the region so that the |
| 690 | /// PHI node is easier to deal with. |
| 691 | void CodeExtractor::(BasicBlock *&) { |
| 692 | unsigned NumPredsFromRegion = 0; |
| 693 | unsigned NumPredsOutsideRegion = 0; |
| 694 | |
| 695 | if (Header != &Header->getParent()->getEntryBlock()) { |
| 696 | PHINode *PN = dyn_cast<PHINode>(Val: Header->begin()); |
| 697 | if (!PN) return; // No PHI nodes. |
| 698 | |
| 699 | // If the header node contains any PHI nodes, check to see if there is more |
| 700 | // than one entry from outside the region. If so, we need to sever the |
| 701 | // header block into two. |
| 702 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) |
| 703 | if (Blocks.count(key: PN->getIncomingBlock(i))) |
| 704 | ++NumPredsFromRegion; |
| 705 | else |
| 706 | ++NumPredsOutsideRegion; |
| 707 | |
| 708 | // If there is one (or fewer) predecessor from outside the region, we don't |
| 709 | // need to do anything special. |
| 710 | if (NumPredsOutsideRegion <= 1) return; |
| 711 | } |
| 712 | |
| 713 | // Otherwise, we need to split the header block into two pieces: one |
| 714 | // containing PHI nodes merging values from outside of the region, and a |
| 715 | // second that contains all of the code for the block and merges back any |
| 716 | // incoming values from inside of the region. |
| 717 | BasicBlock *NewBB = SplitBlock(Old: Header, SplitPt: Header->getFirstNonPHIIt(), DT); |
| 718 | |
| 719 | // We only want to code extract the second block now, and it becomes the new |
| 720 | // header of the region. |
| 721 | BasicBlock *OldPred = Header; |
| 722 | Blocks.remove(X: OldPred); |
| 723 | Blocks.insert(X: NewBB); |
| 724 | Header = NewBB; |
| 725 | |
| 726 | // Okay, now we need to adjust the PHI nodes and any branches from within the |
| 727 | // region to go to the new header block instead of the old header block. |
| 728 | if (NumPredsFromRegion) { |
| 729 | PHINode *PN = cast<PHINode>(Val: OldPred->begin()); |
| 730 | // Loop over all of the predecessors of OldPred that are in the region, |
| 731 | // changing them to branch to NewBB instead. |
| 732 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) |
| 733 | if (Blocks.count(key: PN->getIncomingBlock(i))) { |
| 734 | Instruction *TI = PN->getIncomingBlock(i)->getTerminator(); |
| 735 | TI->replaceUsesOfWith(From: OldPred, To: NewBB); |
| 736 | } |
| 737 | |
| 738 | // Okay, everything within the region is now branching to the right block, we |
| 739 | // just have to update the PHI nodes now, inserting PHI nodes into NewBB. |
| 740 | BasicBlock::iterator AfterPHIs; |
| 741 | for (AfterPHIs = OldPred->begin(); isa<PHINode>(Val: AfterPHIs); ++AfterPHIs) { |
| 742 | PHINode *PN = cast<PHINode>(Val&: AfterPHIs); |
| 743 | // Create a new PHI node in the new region, which has an incoming value |
| 744 | // from OldPred of PN. |
| 745 | PHINode *NewPN = PHINode::Create(Ty: PN->getType(), NumReservedValues: 1 + NumPredsFromRegion, |
| 746 | NameStr: PN->getName() + ".ce" ); |
| 747 | NewPN->insertBefore(InsertPos: NewBB->begin()); |
| 748 | PN->replaceAllUsesWith(V: NewPN); |
| 749 | NewPN->addIncoming(V: PN, BB: OldPred); |
| 750 | |
| 751 | // Loop over all of the incoming value in PN, moving them to NewPN if they |
| 752 | // are from the extracted region. |
| 753 | for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { |
| 754 | if (Blocks.count(key: PN->getIncomingBlock(i))) { |
| 755 | NewPN->addIncoming(V: PN->getIncomingValue(i), BB: PN->getIncomingBlock(i)); |
| 756 | PN->removeIncomingValue(Idx: i); |
| 757 | --i; |
| 758 | } |
| 759 | } |
| 760 | } |
| 761 | } |
| 762 | } |
| 763 | |
| 764 | /// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from |
| 765 | /// outlined region, we split these PHIs on two: one with inputs from region |
| 766 | /// and other with remaining incoming blocks; then first PHIs are placed in |
| 767 | /// outlined region. |
| 768 | void CodeExtractor::() { |
| 769 | for (BasicBlock *ExitBB : ExtractedFuncRetVals) { |
| 770 | BasicBlock *NewBB = nullptr; |
| 771 | |
| 772 | for (PHINode &PN : ExitBB->phis()) { |
| 773 | // Find all incoming values from the outlining region. |
| 774 | SmallVector<unsigned, 2> IncomingVals; |
| 775 | for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i) |
| 776 | if (Blocks.count(key: PN.getIncomingBlock(i))) |
| 777 | IncomingVals.push_back(Elt: i); |
| 778 | |
| 779 | // Do not process PHI if there is one (or fewer) predecessor from region. |
| 780 | // If PHI has exactly one predecessor from region, only this one incoming |
| 781 | // will be replaced on codeRepl block, so it should be safe to skip PHI. |
| 782 | if (IncomingVals.size() <= 1) |
| 783 | continue; |
| 784 | |
| 785 | // Create block for new PHIs and add it to the list of outlined if it |
| 786 | // wasn't done before. |
| 787 | if (!NewBB) { |
| 788 | NewBB = BasicBlock::Create(Context&: ExitBB->getContext(), |
| 789 | Name: ExitBB->getName() + ".split" , |
| 790 | Parent: ExitBB->getParent(), InsertBefore: ExitBB); |
| 791 | SmallVector<BasicBlock *, 4> Preds(predecessors(BB: ExitBB)); |
| 792 | for (BasicBlock *PredBB : Preds) |
| 793 | if (Blocks.count(key: PredBB)) |
| 794 | PredBB->getTerminator()->replaceUsesOfWith(From: ExitBB, To: NewBB); |
| 795 | BranchInst::Create(IfTrue: ExitBB, InsertBefore: NewBB); |
| 796 | Blocks.insert(X: NewBB); |
| 797 | } |
| 798 | |
| 799 | // Split this PHI. |
| 800 | PHINode *NewPN = PHINode::Create(Ty: PN.getType(), NumReservedValues: IncomingVals.size(), |
| 801 | NameStr: PN.getName() + ".ce" ); |
| 802 | NewPN->insertBefore(InsertPos: NewBB->getFirstNonPHIIt()); |
| 803 | for (unsigned i : IncomingVals) |
| 804 | NewPN->addIncoming(V: PN.getIncomingValue(i), BB: PN.getIncomingBlock(i)); |
| 805 | for (unsigned i : reverse(C&: IncomingVals)) |
| 806 | PN.removeIncomingValue(Idx: i, DeletePHIIfEmpty: false); |
| 807 | PN.addIncoming(V: NewPN, BB: NewBB); |
| 808 | } |
| 809 | } |
| 810 | } |
| 811 | |
| 812 | void CodeExtractor::() { |
| 813 | for (BasicBlock *Block : Blocks) |
| 814 | if (ReturnInst *RI = dyn_cast<ReturnInst>(Val: Block->getTerminator())) { |
| 815 | BasicBlock *New = |
| 816 | Block->splitBasicBlock(I: RI->getIterator(), BBName: Block->getName() + ".ret" ); |
| 817 | if (DT) { |
| 818 | // Old dominates New. New node dominates all other nodes dominated |
| 819 | // by Old. |
| 820 | DomTreeNode *OldNode = DT->getNode(BB: Block); |
| 821 | SmallVector<DomTreeNode *, 8> Children(OldNode->begin(), |
| 822 | OldNode->end()); |
| 823 | |
| 824 | DomTreeNode *NewNode = DT->addNewBlock(BB: New, DomBB: Block); |
| 825 | |
| 826 | for (DomTreeNode *I : Children) |
| 827 | DT->changeImmediateDominator(N: I, NewIDom: NewNode); |
| 828 | } |
| 829 | } |
| 830 | } |
| 831 | |
| 832 | Function *CodeExtractor::( |
| 833 | const ValueSet &inputs, const ValueSet &outputs, BlockFrequency EntryFreq, |
| 834 | const Twine &Name, ValueSet &StructValues, StructType *&StructTy) { |
| 835 | LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n" ); |
| 836 | LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n" ); |
| 837 | |
| 838 | Function *oldFunction = Blocks.front()->getParent(); |
| 839 | Module *M = Blocks.front()->getModule(); |
| 840 | |
| 841 | // Assemble the function's parameter lists. |
| 842 | std::vector<Type *> ParamTy; |
| 843 | std::vector<Type *> AggParamTy; |
| 844 | const DataLayout &DL = M->getDataLayout(); |
| 845 | |
| 846 | // Add the types of the input values to the function's argument list |
| 847 | for (Value *value : inputs) { |
| 848 | LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n" ); |
| 849 | if (AggregateArgs && !ExcludeArgsFromAggregate.contains(key: value)) { |
| 850 | AggParamTy.push_back(x: value->getType()); |
| 851 | StructValues.insert(X: value); |
| 852 | } else |
| 853 | ParamTy.push_back(x: value->getType()); |
| 854 | } |
| 855 | |
| 856 | // Add the types of the output values to the function's argument list. |
| 857 | for (Value *output : outputs) { |
| 858 | LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n" ); |
| 859 | if (AggregateArgs && !ExcludeArgsFromAggregate.contains(key: output)) { |
| 860 | AggParamTy.push_back(x: output->getType()); |
| 861 | StructValues.insert(X: output); |
| 862 | } else |
| 863 | ParamTy.push_back( |
| 864 | x: PointerType::get(C&: output->getContext(), AddressSpace: DL.getAllocaAddrSpace())); |
| 865 | } |
| 866 | |
| 867 | assert( |
| 868 | (ParamTy.size() + AggParamTy.size()) == |
| 869 | (inputs.size() + outputs.size()) && |
| 870 | "Number of scalar and aggregate params does not match inputs, outputs" ); |
| 871 | assert((StructValues.empty() || AggregateArgs) && |
| 872 | "Expeced StructValues only with AggregateArgs set" ); |
| 873 | |
| 874 | // Concatenate scalar and aggregate params in ParamTy. |
| 875 | if (!AggParamTy.empty()) { |
| 876 | StructTy = StructType::get(Context&: M->getContext(), Elements: AggParamTy); |
| 877 | ParamTy.push_back(x: PointerType::get( |
| 878 | C&: M->getContext(), AddressSpace: ArgsInZeroAddressSpace ? 0 : DL.getAllocaAddrSpace())); |
| 879 | } |
| 880 | |
| 881 | Type *RetTy = getSwitchType(); |
| 882 | LLVM_DEBUG({ |
| 883 | dbgs() << "Function type: " << *RetTy << " f(" ; |
| 884 | for (Type *i : ParamTy) |
| 885 | dbgs() << *i << ", " ; |
| 886 | dbgs() << ")\n" ; |
| 887 | }); |
| 888 | |
| 889 | FunctionType *funcType = FunctionType::get( |
| 890 | Result: RetTy, Params: ParamTy, isVarArg: AllowVarArgs && oldFunction->isVarArg()); |
| 891 | |
| 892 | // Create the new function |
| 893 | Function *newFunction = |
| 894 | Function::Create(Ty: funcType, Linkage: GlobalValue::InternalLinkage, |
| 895 | AddrSpace: oldFunction->getAddressSpace(), N: Name, M); |
| 896 | |
| 897 | // Propagate personality info to the new function if there is one. |
| 898 | if (oldFunction->hasPersonalityFn()) |
| 899 | newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); |
| 900 | |
| 901 | // Inherit all of the target dependent attributes and white-listed |
| 902 | // target independent attributes. |
| 903 | // (e.g. If the extracted region contains a call to an x86.sse |
| 904 | // instruction we need to make sure that the extracted region has the |
| 905 | // "target-features" attribute allowing it to be lowered. |
| 906 | // FIXME: This should be changed to check to see if a specific |
| 907 | // attribute can not be inherited. |
| 908 | for (const auto &Attr : oldFunction->getAttributes().getFnAttrs()) { |
| 909 | if (Attr.isStringAttribute()) { |
| 910 | if (Attr.getKindAsString() == "thunk" ) |
| 911 | continue; |
| 912 | } else |
| 913 | switch (Attr.getKindAsEnum()) { |
| 914 | // Those attributes cannot be propagated safely. Explicitly list them |
| 915 | // here so we get a warning if new attributes are added. |
| 916 | case Attribute::AllocSize: |
| 917 | case Attribute::Builtin: |
| 918 | case Attribute::Convergent: |
| 919 | case Attribute::JumpTable: |
| 920 | case Attribute::Naked: |
| 921 | case Attribute::NoBuiltin: |
| 922 | case Attribute::NoMerge: |
| 923 | case Attribute::NoReturn: |
| 924 | case Attribute::NoSync: |
| 925 | case Attribute::ReturnsTwice: |
| 926 | case Attribute::Speculatable: |
| 927 | case Attribute::StackAlignment: |
| 928 | case Attribute::WillReturn: |
| 929 | case Attribute::AllocKind: |
| 930 | case Attribute::PresplitCoroutine: |
| 931 | case Attribute::Memory: |
| 932 | case Attribute::NoFPClass: |
| 933 | case Attribute::CoroDestroyOnlyWhenComplete: |
| 934 | case Attribute::CoroElideSafe: |
| 935 | case Attribute::NoDivergenceSource: |
| 936 | continue; |
| 937 | // Those attributes should be safe to propagate to the extracted function. |
| 938 | case Attribute::AlwaysInline: |
| 939 | case Attribute::Cold: |
| 940 | case Attribute::DisableSanitizerInstrumentation: |
| 941 | case Attribute::FnRetThunkExtern: |
| 942 | case Attribute::Hot: |
| 943 | case Attribute::HybridPatchable: |
| 944 | case Attribute::NoRecurse: |
| 945 | case Attribute::InlineHint: |
| 946 | case Attribute::MinSize: |
| 947 | case Attribute::NoCallback: |
| 948 | case Attribute::NoDuplicate: |
| 949 | case Attribute::NoFree: |
| 950 | case Attribute::NoImplicitFloat: |
| 951 | case Attribute::NoInline: |
| 952 | case Attribute::NonLazyBind: |
| 953 | case Attribute::NoRedZone: |
| 954 | case Attribute::NoUnwind: |
| 955 | case Attribute::NoSanitizeBounds: |
| 956 | case Attribute::NoSanitizeCoverage: |
| 957 | case Attribute::NullPointerIsValid: |
| 958 | case Attribute::OptimizeForDebugging: |
| 959 | case Attribute::OptForFuzzing: |
| 960 | case Attribute::OptimizeNone: |
| 961 | case Attribute::OptimizeForSize: |
| 962 | case Attribute::SafeStack: |
| 963 | case Attribute::ShadowCallStack: |
| 964 | case Attribute::SanitizeAddress: |
| 965 | case Attribute::SanitizeMemory: |
| 966 | case Attribute::SanitizeNumericalStability: |
| 967 | case Attribute::SanitizeThread: |
| 968 | case Attribute::SanitizeType: |
| 969 | case Attribute::SanitizeHWAddress: |
| 970 | case Attribute::SanitizeMemTag: |
| 971 | case Attribute::SanitizeRealtime: |
| 972 | case Attribute::SanitizeRealtimeBlocking: |
| 973 | case Attribute::SpeculativeLoadHardening: |
| 974 | case Attribute::StackProtect: |
| 975 | case Attribute::StackProtectReq: |
| 976 | case Attribute::StackProtectStrong: |
| 977 | case Attribute::StrictFP: |
| 978 | case Attribute::UWTable: |
| 979 | case Attribute::VScaleRange: |
| 980 | case Attribute::NoCfCheck: |
| 981 | case Attribute::MustProgress: |
| 982 | case Attribute::NoProfile: |
| 983 | case Attribute::SkipProfile: |
| 984 | break; |
| 985 | // These attributes cannot be applied to functions. |
| 986 | case Attribute::Alignment: |
| 987 | case Attribute::AllocatedPointer: |
| 988 | case Attribute::AllocAlign: |
| 989 | case Attribute::ByVal: |
| 990 | case Attribute::Captures: |
| 991 | case Attribute::Dereferenceable: |
| 992 | case Attribute::DereferenceableOrNull: |
| 993 | case Attribute::ElementType: |
| 994 | case Attribute::InAlloca: |
| 995 | case Attribute::InReg: |
| 996 | case Attribute::Nest: |
| 997 | case Attribute::NoAlias: |
| 998 | case Attribute::NoUndef: |
| 999 | case Attribute::NonNull: |
| 1000 | case Attribute::Preallocated: |
| 1001 | case Attribute::ReadNone: |
| 1002 | case Attribute::ReadOnly: |
| 1003 | case Attribute::Returned: |
| 1004 | case Attribute::SExt: |
| 1005 | case Attribute::StructRet: |
| 1006 | case Attribute::SwiftError: |
| 1007 | case Attribute::SwiftSelf: |
| 1008 | case Attribute::SwiftAsync: |
| 1009 | case Attribute::ZExt: |
| 1010 | case Attribute::ImmArg: |
| 1011 | case Attribute::ByRef: |
| 1012 | case Attribute::WriteOnly: |
| 1013 | case Attribute::Writable: |
| 1014 | case Attribute::DeadOnUnwind: |
| 1015 | case Attribute::Range: |
| 1016 | case Attribute::Initializes: |
| 1017 | case Attribute::NoExt: |
| 1018 | // These are not really attributes. |
| 1019 | case Attribute::None: |
| 1020 | case Attribute::EndAttrKinds: |
| 1021 | case Attribute::EmptyKey: |
| 1022 | case Attribute::TombstoneKey: |
| 1023 | case Attribute::DeadOnReturn: |
| 1024 | llvm_unreachable("Not a function attribute" ); |
| 1025 | } |
| 1026 | |
| 1027 | newFunction->addFnAttr(Attr); |
| 1028 | } |
| 1029 | |
| 1030 | // Create scalar and aggregate iterators to name all of the arguments we |
| 1031 | // inserted. |
| 1032 | Function::arg_iterator ScalarAI = newFunction->arg_begin(); |
| 1033 | |
| 1034 | // Set names and attributes for input and output arguments. |
| 1035 | ScalarAI = newFunction->arg_begin(); |
| 1036 | for (Value *input : inputs) { |
| 1037 | if (StructValues.contains(key: input)) |
| 1038 | continue; |
| 1039 | |
| 1040 | ScalarAI->setName(input->getName()); |
| 1041 | if (input->isSwiftError()) |
| 1042 | newFunction->addParamAttr(ArgNo: ScalarAI - newFunction->arg_begin(), |
| 1043 | Kind: Attribute::SwiftError); |
| 1044 | ++ScalarAI; |
| 1045 | } |
| 1046 | for (Value *output : outputs) { |
| 1047 | if (StructValues.contains(key: output)) |
| 1048 | continue; |
| 1049 | |
| 1050 | ScalarAI->setName(output->getName() + ".out" ); |
| 1051 | ++ScalarAI; |
| 1052 | } |
| 1053 | |
| 1054 | // Update the entry count of the function. |
| 1055 | if (BFI) { |
| 1056 | auto Count = BFI->getProfileCountFromFreq(Freq: EntryFreq); |
| 1057 | if (Count.has_value()) |
| 1058 | newFunction->setEntryCount( |
| 1059 | Count: ProfileCount(*Count, Function::PCT_Real)); // FIXME |
| 1060 | } |
| 1061 | |
| 1062 | return newFunction; |
| 1063 | } |
| 1064 | |
| 1065 | /// If the original function has debug info, we have to add a debug location |
| 1066 | /// to the new branch instruction from the artificial entry block. |
| 1067 | /// We use the debug location of the first instruction in the extracted |
| 1068 | /// blocks, as there is no other equivalent line in the source code. |
| 1069 | static void applyFirstDebugLoc(Function *oldFunction, |
| 1070 | ArrayRef<BasicBlock *> Blocks, |
| 1071 | Instruction *BranchI) { |
| 1072 | if (oldFunction->getSubprogram()) { |
| 1073 | any_of(Range&: Blocks, P: [&BranchI](const BasicBlock *BB) { |
| 1074 | return any_of(Range: *BB, P: [&BranchI](const Instruction &I) { |
| 1075 | if (!I.getDebugLoc()) |
| 1076 | return false; |
| 1077 | BranchI->setDebugLoc(I.getDebugLoc()); |
| 1078 | return true; |
| 1079 | }); |
| 1080 | }); |
| 1081 | } |
| 1082 | } |
| 1083 | |
| 1084 | /// Erase lifetime.start markers which reference inputs to the extraction |
| 1085 | /// region, and insert the referenced memory into \p LifetimesStart. |
| 1086 | /// |
| 1087 | /// The extraction region is defined by a set of blocks (\p Blocks), and a set |
| 1088 | /// of allocas which will be moved from the caller function into the extracted |
| 1089 | /// function (\p SunkAllocas). |
| 1090 | static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks, |
| 1091 | const SetVector<Value *> &SunkAllocas, |
| 1092 | SetVector<Value *> &LifetimesStart) { |
| 1093 | for (BasicBlock *BB : Blocks) { |
| 1094 | for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) { |
| 1095 | auto *II = dyn_cast<LifetimeIntrinsic>(Val: &I); |
| 1096 | if (!II) |
| 1097 | continue; |
| 1098 | |
| 1099 | // Get the memory operand of the lifetime marker. If the underlying |
| 1100 | // object is a sunk alloca, or is otherwise defined in the extraction |
| 1101 | // region, the lifetime marker must not be erased. |
| 1102 | Value *Mem = II->getOperand(i_nocapture: 1)->stripInBoundsOffsets(); |
| 1103 | if (SunkAllocas.count(key: Mem) || definedInRegion(Blocks, V: Mem)) |
| 1104 | continue; |
| 1105 | |
| 1106 | if (II->getIntrinsicID() == Intrinsic::lifetime_start) |
| 1107 | LifetimesStart.insert(X: Mem); |
| 1108 | II->eraseFromParent(); |
| 1109 | } |
| 1110 | } |
| 1111 | } |
| 1112 | |
| 1113 | /// Insert lifetime start/end markers surrounding the call to the new function |
| 1114 | /// for objects defined in the caller. |
| 1115 | static void ( |
| 1116 | Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd, |
| 1117 | CallInst *TheCall) { |
| 1118 | LLVMContext &Ctx = M->getContext(); |
| 1119 | auto NegativeOne = ConstantInt::getSigned(Ty: Type::getInt64Ty(C&: Ctx), V: -1); |
| 1120 | Instruction *Term = TheCall->getParent()->getTerminator(); |
| 1121 | |
| 1122 | // Emit lifetime markers for the pointers given in \p Objects. Insert the |
| 1123 | // markers before the call if \p InsertBefore, and after the call otherwise. |
| 1124 | auto insertMarkers = [&](Intrinsic::ID MarkerFunc, ArrayRef<Value *> Objects, |
| 1125 | bool InsertBefore) { |
| 1126 | for (Value *Mem : Objects) { |
| 1127 | assert((!isa<Instruction>(Mem) || cast<Instruction>(Mem)->getFunction() == |
| 1128 | TheCall->getFunction()) && |
| 1129 | "Input memory not defined in original function" ); |
| 1130 | |
| 1131 | Function *Func = |
| 1132 | Intrinsic::getOrInsertDeclaration(M, id: MarkerFunc, Tys: Mem->getType()); |
| 1133 | auto Marker = CallInst::Create(Func, Args: {NegativeOne, Mem}); |
| 1134 | if (InsertBefore) |
| 1135 | Marker->insertBefore(InsertPos: TheCall->getIterator()); |
| 1136 | else |
| 1137 | Marker->insertBefore(InsertPos: Term->getIterator()); |
| 1138 | } |
| 1139 | }; |
| 1140 | |
| 1141 | if (!LifetimesStart.empty()) { |
| 1142 | insertMarkers(Intrinsic::lifetime_start, LifetimesStart, |
| 1143 | /*InsertBefore=*/true); |
| 1144 | } |
| 1145 | |
| 1146 | if (!LifetimesEnd.empty()) { |
| 1147 | insertMarkers(Intrinsic::lifetime_end, LifetimesEnd, |
| 1148 | /*InsertBefore=*/false); |
| 1149 | } |
| 1150 | } |
| 1151 | |
| 1152 | void CodeExtractor::(Function *newFunction) { |
| 1153 | auto newFuncIt = newFunction->begin(); |
| 1154 | for (BasicBlock *Block : Blocks) { |
| 1155 | // Delete the basic block from the old function, and the list of blocks |
| 1156 | Block->removeFromParent(); |
| 1157 | |
| 1158 | // Insert this basic block into the new function |
| 1159 | // Insert the original blocks after the entry block created |
| 1160 | // for the new function. The entry block may be followed |
| 1161 | // by a set of exit blocks at this point, but these exit |
| 1162 | // blocks better be placed at the end of the new function. |
| 1163 | newFuncIt = newFunction->insert(Position: std::next(x: newFuncIt), BB: Block); |
| 1164 | } |
| 1165 | } |
| 1166 | |
| 1167 | void CodeExtractor::( |
| 1168 | BasicBlock *CodeReplacer, |
| 1169 | const DenseMap<BasicBlock *, BlockFrequency> &ExitWeights, |
| 1170 | BranchProbabilityInfo *BPI) { |
| 1171 | using Distribution = BlockFrequencyInfoImplBase::Distribution; |
| 1172 | using BlockNode = BlockFrequencyInfoImplBase::BlockNode; |
| 1173 | |
| 1174 | // Update the branch weights for the exit block. |
| 1175 | Instruction *TI = CodeReplacer->getTerminator(); |
| 1176 | SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0); |
| 1177 | |
| 1178 | // Block Frequency distribution with dummy node. |
| 1179 | Distribution BranchDist; |
| 1180 | |
| 1181 | SmallVector<BranchProbability, 4> EdgeProbabilities( |
| 1182 | TI->getNumSuccessors(), BranchProbability::getUnknown()); |
| 1183 | |
| 1184 | // Add each of the frequencies of the successors. |
| 1185 | for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { |
| 1186 | BlockNode ExitNode(i); |
| 1187 | uint64_t ExitFreq = ExitWeights.lookup(Val: TI->getSuccessor(Idx: i)).getFrequency(); |
| 1188 | if (ExitFreq != 0) |
| 1189 | BranchDist.addExit(Node: ExitNode, Amount: ExitFreq); |
| 1190 | else |
| 1191 | EdgeProbabilities[i] = BranchProbability::getZero(); |
| 1192 | } |
| 1193 | |
| 1194 | // Check for no total weight. |
| 1195 | if (BranchDist.Total == 0) { |
| 1196 | BPI->setEdgeProbability(Src: CodeReplacer, Probs: EdgeProbabilities); |
| 1197 | return; |
| 1198 | } |
| 1199 | |
| 1200 | // Normalize the distribution so that they can fit in unsigned. |
| 1201 | BranchDist.normalize(); |
| 1202 | |
| 1203 | // Create normalized branch weights and set the metadata. |
| 1204 | for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) { |
| 1205 | const auto &Weight = BranchDist.Weights[I]; |
| 1206 | |
| 1207 | // Get the weight and update the current BFI. |
| 1208 | BranchWeights[Weight.TargetNode.Index] = Weight.Amount; |
| 1209 | BranchProbability BP(Weight.Amount, BranchDist.Total); |
| 1210 | EdgeProbabilities[Weight.TargetNode.Index] = BP; |
| 1211 | } |
| 1212 | BPI->setEdgeProbability(Src: CodeReplacer, Probs: EdgeProbabilities); |
| 1213 | TI->setMetadata( |
| 1214 | KindID: LLVMContext::MD_prof, |
| 1215 | Node: MDBuilder(TI->getContext()).createBranchWeights(Weights: BranchWeights)); |
| 1216 | } |
| 1217 | |
| 1218 | /// Erase debug info intrinsics which refer to values in \p F but aren't in |
| 1219 | /// \p F. |
| 1220 | static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) { |
| 1221 | for (Instruction &I : instructions(F)) { |
| 1222 | SmallVector<DbgVariableIntrinsic *, 4> DbgUsers; |
| 1223 | SmallVector<DbgVariableRecord *, 4> DbgVariableRecords; |
| 1224 | findDbgUsers(DbgInsts&: DbgUsers, V: &I, DbgVariableRecords: &DbgVariableRecords); |
| 1225 | for (DbgVariableIntrinsic *DVI : DbgUsers) |
| 1226 | if (DVI->getFunction() != &F) |
| 1227 | DVI->eraseFromParent(); |
| 1228 | for (DbgVariableRecord *DVR : DbgVariableRecords) |
| 1229 | if (DVR->getFunction() != &F) |
| 1230 | DVR->eraseFromParent(); |
| 1231 | } |
| 1232 | } |
| 1233 | |
| 1234 | /// Fix up the debug info in the old and new functions. Following changes are |
| 1235 | /// done. |
| 1236 | /// 1. If a debug record points to a value that has been replaced, update the |
| 1237 | /// record to use the new value. |
| 1238 | /// 2. If an Input value that has been replaced was used as a location of a |
| 1239 | /// debug record in the Parent function, then materealize a similar record in |
| 1240 | /// the new function. |
| 1241 | /// 3. Point line locations and debug intrinsics to the new subprogram scope |
| 1242 | /// 4. Remove intrinsics which point to values outside of the new function. |
| 1243 | static void (Function &OldFunc, Function &NewFunc, |
| 1244 | CallInst &TheCall, |
| 1245 | const SetVector<Value *> &Inputs, |
| 1246 | ArrayRef<Value *> NewValues) { |
| 1247 | DISubprogram *OldSP = OldFunc.getSubprogram(); |
| 1248 | LLVMContext &Ctx = OldFunc.getContext(); |
| 1249 | |
| 1250 | if (!OldSP) { |
| 1251 | // Erase any debug info the new function contains. |
| 1252 | stripDebugInfo(F&: NewFunc); |
| 1253 | // Make sure the old function doesn't contain any non-local metadata refs. |
| 1254 | eraseDebugIntrinsicsWithNonLocalRefs(F&: NewFunc); |
| 1255 | return; |
| 1256 | } |
| 1257 | |
| 1258 | // Create a subprogram for the new function. Leave out a description of the |
| 1259 | // function arguments, as the parameters don't correspond to anything at the |
| 1260 | // source level. |
| 1261 | assert(OldSP->getUnit() && "Missing compile unit for subprogram" ); |
| 1262 | DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false, |
| 1263 | OldSP->getUnit()); |
| 1264 | auto SPType = DIB.createSubroutineType(ParameterTypes: DIB.getOrCreateTypeArray(Elements: {})); |
| 1265 | DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition | |
| 1266 | DISubprogram::SPFlagOptimized | |
| 1267 | DISubprogram::SPFlagLocalToUnit; |
| 1268 | auto NewSP = DIB.createFunction( |
| 1269 | Scope: OldSP->getUnit(), Name: NewFunc.getName(), LinkageName: NewFunc.getName(), File: OldSP->getFile(), |
| 1270 | /*LineNo=*/0, Ty: SPType, /*ScopeLine=*/0, Flags: DINode::FlagZero, SPFlags); |
| 1271 | NewFunc.setSubprogram(NewSP); |
| 1272 | |
| 1273 | auto UpdateOrInsertDebugRecord = [&](auto *DR, Value *OldLoc, Value *NewLoc, |
| 1274 | DIExpression *Expr, bool Declare) { |
| 1275 | if (DR->getParent()->getParent() == &NewFunc) { |
| 1276 | DR->replaceVariableLocationOp(OldLoc, NewLoc); |
| 1277 | return; |
| 1278 | } |
| 1279 | if (Declare) { |
| 1280 | DIB.insertDeclare(NewLoc, DR->getVariable(), Expr, DR->getDebugLoc(), |
| 1281 | &NewFunc.getEntryBlock()); |
| 1282 | return; |
| 1283 | } |
| 1284 | DIB.insertDbgValueIntrinsic( |
| 1285 | Val: NewLoc, VarInfo: DR->getVariable(), Expr, DL: DR->getDebugLoc(), |
| 1286 | InsertPt: NewFunc.getEntryBlock().getTerminator()->getIterator()); |
| 1287 | }; |
| 1288 | for (auto [Input, NewVal] : zip_equal(t: Inputs, u&: NewValues)) { |
| 1289 | SmallVector<DbgVariableIntrinsic *, 1> DbgUsers; |
| 1290 | SmallVector<DbgVariableRecord *, 1> DPUsers; |
| 1291 | findDbgUsers(DbgInsts&: DbgUsers, V: Input, DbgVariableRecords: &DPUsers); |
| 1292 | DIExpression *Expr = DIB.createExpression(); |
| 1293 | |
| 1294 | // Iterate the debud users of the Input values. If they are in the extracted |
| 1295 | // function then update their location with the new value. If they are in |
| 1296 | // the parent function then create a similar debug record. |
| 1297 | for (auto *DVI : DbgUsers) |
| 1298 | UpdateOrInsertDebugRecord(DVI, Input, NewVal, Expr, |
| 1299 | isa<DbgDeclareInst>(Val: DVI)); |
| 1300 | for (auto *DVR : DPUsers) |
| 1301 | UpdateOrInsertDebugRecord(DVR, Input, NewVal, Expr, DVR->isDbgDeclare()); |
| 1302 | } |
| 1303 | |
| 1304 | auto IsInvalidLocation = [&NewFunc](Value *Location) { |
| 1305 | // Location is invalid if it isn't a constant, an instruction or an |
| 1306 | // argument, or is an instruction/argument but isn't in the new function. |
| 1307 | if (!Location || (!isa<Constant>(Val: Location) && !isa<Argument>(Val: Location) && |
| 1308 | !isa<Instruction>(Val: Location))) |
| 1309 | return true; |
| 1310 | |
| 1311 | if (Argument *Arg = dyn_cast<Argument>(Val: Location)) |
| 1312 | return Arg->getParent() != &NewFunc; |
| 1313 | if (Instruction *LocationInst = dyn_cast<Instruction>(Val: Location)) |
| 1314 | return LocationInst->getFunction() != &NewFunc; |
| 1315 | return false; |
| 1316 | }; |
| 1317 | |
| 1318 | // Debug intrinsics in the new function need to be updated in one of two |
| 1319 | // ways: |
| 1320 | // 1) They need to be deleted, because they describe a value in the old |
| 1321 | // function. |
| 1322 | // 2) They need to point to fresh metadata, e.g. because they currently |
| 1323 | // point to a variable in the wrong scope. |
| 1324 | SmallDenseMap<DINode *, DINode *> RemappedMetadata; |
| 1325 | SmallVector<DbgVariableRecord *, 4> DVRsToDelete; |
| 1326 | DenseMap<const MDNode *, MDNode *> Cache; |
| 1327 | |
| 1328 | auto GetUpdatedDIVariable = [&](DILocalVariable *OldVar) { |
| 1329 | DINode *&NewVar = RemappedMetadata[OldVar]; |
| 1330 | if (!NewVar) { |
| 1331 | DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram( |
| 1332 | RootScope&: *OldVar->getScope(), NewSP&: *NewSP, Ctx, Cache); |
| 1333 | NewVar = DIB.createAutoVariable( |
| 1334 | Scope: NewScope, Name: OldVar->getName(), File: OldVar->getFile(), LineNo: OldVar->getLine(), |
| 1335 | Ty: OldVar->getType(), /*AlwaysPreserve=*/false, Flags: DINode::FlagZero, |
| 1336 | AlignInBits: OldVar->getAlignInBits()); |
| 1337 | } |
| 1338 | return cast<DILocalVariable>(Val: NewVar); |
| 1339 | }; |
| 1340 | |
| 1341 | auto UpdateDbgLabel = [&](auto *LabelRecord) { |
| 1342 | // Point the label record to a fresh label within the new function if |
| 1343 | // the record was not inlined from some other function. |
| 1344 | if (LabelRecord->getDebugLoc().getInlinedAt()) |
| 1345 | return; |
| 1346 | DILabel *OldLabel = LabelRecord->getLabel(); |
| 1347 | DINode *&NewLabel = RemappedMetadata[OldLabel]; |
| 1348 | if (!NewLabel) { |
| 1349 | DILocalScope *NewScope = DILocalScope::cloneScopeForSubprogram( |
| 1350 | RootScope&: *OldLabel->getScope(), NewSP&: *NewSP, Ctx, Cache); |
| 1351 | NewLabel = |
| 1352 | DILabel::get(Context&: Ctx, Scope: NewScope, Name: OldLabel->getName(), File: OldLabel->getFile(), |
| 1353 | Line: OldLabel->getLine(), Column: OldLabel->getColumn(), |
| 1354 | IsArtificial: OldLabel->isArtificial(), CoroSuspendIdx: OldLabel->getCoroSuspendIdx()); |
| 1355 | } |
| 1356 | LabelRecord->setLabel(cast<DILabel>(Val: NewLabel)); |
| 1357 | }; |
| 1358 | |
| 1359 | auto UpdateDbgRecordsOnInst = [&](Instruction &I) -> void { |
| 1360 | for (DbgRecord &DR : I.getDbgRecordRange()) { |
| 1361 | if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(Val: &DR)) { |
| 1362 | UpdateDbgLabel(DLR); |
| 1363 | continue; |
| 1364 | } |
| 1365 | |
| 1366 | DbgVariableRecord &DVR = cast<DbgVariableRecord>(Val&: DR); |
| 1367 | // If any of the used locations are invalid, delete the record. |
| 1368 | if (any_of(Range: DVR.location_ops(), P: IsInvalidLocation)) { |
| 1369 | DVRsToDelete.push_back(Elt: &DVR); |
| 1370 | continue; |
| 1371 | } |
| 1372 | |
| 1373 | // DbgAssign intrinsics have an extra Value argument: |
| 1374 | if (DVR.isDbgAssign() && IsInvalidLocation(DVR.getAddress())) { |
| 1375 | DVRsToDelete.push_back(Elt: &DVR); |
| 1376 | continue; |
| 1377 | } |
| 1378 | |
| 1379 | // If the variable was in the scope of the old function, i.e. it was not |
| 1380 | // inlined, point the intrinsic to a fresh variable within the new |
| 1381 | // function. |
| 1382 | if (!DVR.getDebugLoc().getInlinedAt()) |
| 1383 | DVR.setVariable(GetUpdatedDIVariable(DVR.getVariable())); |
| 1384 | } |
| 1385 | }; |
| 1386 | |
| 1387 | for (Instruction &I : instructions(F&: NewFunc)) |
| 1388 | UpdateDbgRecordsOnInst(I); |
| 1389 | |
| 1390 | for (auto *DVR : DVRsToDelete) |
| 1391 | DVR->getMarker()->MarkedInstr->dropOneDbgRecord(I: DVR); |
| 1392 | DIB.finalizeSubprogram(SP: NewSP); |
| 1393 | |
| 1394 | // Fix up the scope information attached to the line locations and the |
| 1395 | // debug assignment metadata in the new function. |
| 1396 | DenseMap<DIAssignID *, DIAssignID *> AssignmentIDMap; |
| 1397 | for (Instruction &I : instructions(F&: NewFunc)) { |
| 1398 | if (const DebugLoc &DL = I.getDebugLoc()) |
| 1399 | I.setDebugLoc( |
| 1400 | DebugLoc::replaceInlinedAtSubprogram(DL, NewSP&: *NewSP, Ctx, Cache)); |
| 1401 | for (DbgRecord &DR : I.getDbgRecordRange()) |
| 1402 | DR.setDebugLoc(DebugLoc::replaceInlinedAtSubprogram(DL: DR.getDebugLoc(), |
| 1403 | NewSP&: *NewSP, Ctx, Cache)); |
| 1404 | |
| 1405 | // Loop info metadata may contain line locations. Fix them up. |
| 1406 | auto updateLoopInfoLoc = [&Ctx, &Cache, NewSP](Metadata *MD) -> Metadata * { |
| 1407 | if (auto *Loc = dyn_cast_or_null<DILocation>(Val: MD)) |
| 1408 | return DebugLoc::replaceInlinedAtSubprogram(DL: Loc, NewSP&: *NewSP, Ctx, Cache); |
| 1409 | return MD; |
| 1410 | }; |
| 1411 | updateLoopMetadataDebugLocations(I, Updater: updateLoopInfoLoc); |
| 1412 | at::remapAssignID(Map&: AssignmentIDMap, I); |
| 1413 | } |
| 1414 | if (!TheCall.getDebugLoc()) |
| 1415 | TheCall.setDebugLoc(DILocation::get(Context&: Ctx, Line: 0, Column: 0, Scope: OldSP)); |
| 1416 | |
| 1417 | eraseDebugIntrinsicsWithNonLocalRefs(F&: NewFunc); |
| 1418 | } |
| 1419 | |
| 1420 | Function * |
| 1421 | CodeExtractor::(const CodeExtractorAnalysisCache &CEAC) { |
| 1422 | ValueSet Inputs, Outputs; |
| 1423 | return extractCodeRegion(CEAC, Inputs, Outputs); |
| 1424 | } |
| 1425 | |
| 1426 | Function * |
| 1427 | CodeExtractor::(const CodeExtractorAnalysisCache &CEAC, |
| 1428 | ValueSet &inputs, ValueSet &outputs) { |
| 1429 | if (!isEligible()) |
| 1430 | return nullptr; |
| 1431 | |
| 1432 | // Assumption: this is a single-entry code region, and the header is the first |
| 1433 | // block in the region. |
| 1434 | BasicBlock * = *Blocks.begin(); |
| 1435 | Function *oldFunction = header->getParent(); |
| 1436 | |
| 1437 | normalizeCFGForExtraction(header); |
| 1438 | |
| 1439 | // Remove @llvm.assume calls that will be moved to the new function from the |
| 1440 | // old function's assumption cache. |
| 1441 | for (BasicBlock *Block : Blocks) { |
| 1442 | for (Instruction &I : llvm::make_early_inc_range(Range&: *Block)) { |
| 1443 | if (auto *AI = dyn_cast<AssumeInst>(Val: &I)) { |
| 1444 | if (AC) |
| 1445 | AC->unregisterAssumption(CI: AI); |
| 1446 | AI->eraseFromParent(); |
| 1447 | } |
| 1448 | } |
| 1449 | } |
| 1450 | |
| 1451 | ValueSet SinkingCands, HoistingCands; |
| 1452 | BasicBlock *CommonExit = nullptr; |
| 1453 | findAllocas(CEAC, SinkCands&: SinkingCands, HoistCands&: HoistingCands, ExitBlock&: CommonExit); |
| 1454 | assert(HoistingCands.empty() || CommonExit); |
| 1455 | |
| 1456 | // Find inputs to, outputs from the code region. |
| 1457 | findInputsOutputs(Inputs&: inputs, Outputs&: outputs, SinkCands: SinkingCands); |
| 1458 | |
| 1459 | // Collect objects which are inputs to the extraction region and also |
| 1460 | // referenced by lifetime start markers within it. The effects of these |
| 1461 | // markers must be replicated in the calling function to prevent the stack |
| 1462 | // coloring pass from merging slots which store input objects. |
| 1463 | ValueSet LifetimesStart; |
| 1464 | eraseLifetimeMarkersOnInputs(Blocks, SunkAllocas: SinkingCands, LifetimesStart); |
| 1465 | |
| 1466 | if (!HoistingCands.empty()) { |
| 1467 | auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExitBlock: CommonExit); |
| 1468 | Instruction *TI = HoistToBlock->getTerminator(); |
| 1469 | for (auto *II : HoistingCands) |
| 1470 | cast<Instruction>(Val: II)->moveBefore(InsertPos: TI->getIterator()); |
| 1471 | computeExtractedFuncRetVals(); |
| 1472 | } |
| 1473 | |
| 1474 | // CFG/ExitBlocks must not change hereafter |
| 1475 | |
| 1476 | // Calculate the entry frequency of the new function before we change the root |
| 1477 | // block. |
| 1478 | BlockFrequency EntryFreq; |
| 1479 | DenseMap<BasicBlock *, BlockFrequency> ExitWeights; |
| 1480 | if (BFI) { |
| 1481 | assert(BPI && "Both BPI and BFI are required to preserve profile info" ); |
| 1482 | for (BasicBlock *Pred : predecessors(BB: header)) { |
| 1483 | if (Blocks.count(key: Pred)) |
| 1484 | continue; |
| 1485 | EntryFreq += |
| 1486 | BFI->getBlockFreq(BB: Pred) * BPI->getEdgeProbability(Src: Pred, Dst: header); |
| 1487 | } |
| 1488 | |
| 1489 | for (BasicBlock *Succ : ExtractedFuncRetVals) { |
| 1490 | for (BasicBlock *Block : predecessors(BB: Succ)) { |
| 1491 | if (!Blocks.count(key: Block)) |
| 1492 | continue; |
| 1493 | |
| 1494 | // Update the branch weight for this successor. |
| 1495 | BlockFrequency &BF = ExitWeights[Succ]; |
| 1496 | BF += BFI->getBlockFreq(BB: Block) * BPI->getEdgeProbability(Src: Block, Dst: Succ); |
| 1497 | } |
| 1498 | } |
| 1499 | } |
| 1500 | |
| 1501 | // Determine position for the replacement code. Do so before header is moved |
| 1502 | // to the new function. |
| 1503 | BasicBlock *ReplIP = header; |
| 1504 | while (ReplIP && Blocks.count(key: ReplIP)) |
| 1505 | ReplIP = ReplIP->getNextNode(); |
| 1506 | |
| 1507 | // Construct new function based on inputs/outputs & add allocas for all defs. |
| 1508 | std::string SuffixToUse = |
| 1509 | Suffix.empty() |
| 1510 | ? (header->getName().empty() ? "extracted" : header->getName().str()) |
| 1511 | : Suffix; |
| 1512 | |
| 1513 | ValueSet StructValues; |
| 1514 | StructType *StructTy = nullptr; |
| 1515 | Function *newFunction = constructFunctionDeclaration( |
| 1516 | inputs, outputs, EntryFreq, Name: oldFunction->getName() + "." + SuffixToUse, |
| 1517 | StructValues, StructTy); |
| 1518 | SmallVector<Value *> NewValues; |
| 1519 | |
| 1520 | emitFunctionBody(inputs, outputs, StructValues, newFunction, StructArgTy: StructTy, header, |
| 1521 | SinkingCands, NewValues); |
| 1522 | |
| 1523 | std::vector<Value *> Reloads; |
| 1524 | CallInst *TheCall = emitReplacerCall( |
| 1525 | inputs, outputs, StructValues, newFunction, StructArgTy: StructTy, oldFunction, ReplIP, |
| 1526 | EntryFreq, LifetimesStart: LifetimesStart.getArrayRef(), Reloads); |
| 1527 | |
| 1528 | insertReplacerCall(oldFunction, header, codeReplacer: TheCall->getParent(), outputs, |
| 1529 | Reloads, ExitWeights); |
| 1530 | |
| 1531 | fixupDebugInfoPostExtraction(OldFunc&: *oldFunction, NewFunc&: *newFunction, TheCall&: *TheCall, Inputs: inputs, |
| 1532 | NewValues); |
| 1533 | |
| 1534 | LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - newFunction:\n" ); |
| 1535 | LLVM_DEBUG(newFunction->dump()); |
| 1536 | LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - oldFunction:\n" ); |
| 1537 | LLVM_DEBUG(oldFunction->dump()); |
| 1538 | LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC)) |
| 1539 | report_fatal_error("Stale Asumption cache for old Function!" )); |
| 1540 | return newFunction; |
| 1541 | } |
| 1542 | |
| 1543 | void CodeExtractor::(BasicBlock *&) { |
| 1544 | // If we have any return instructions in the region, split those blocks so |
| 1545 | // that the return is not in the region. |
| 1546 | splitReturnBlocks(); |
| 1547 | |
| 1548 | // If we have to split PHI nodes of the entry or exit blocks, do so now. |
| 1549 | severSplitPHINodesOfEntry(Header&: header); |
| 1550 | |
| 1551 | // If a PHI in an exit block has multiple incoming values from the outlined |
| 1552 | // region, create a new PHI for those values within the region such that only |
| 1553 | // PHI itself becomes an output value, not each of its incoming values |
| 1554 | // individually. |
| 1555 | computeExtractedFuncRetVals(); |
| 1556 | severSplitPHINodesOfExits(); |
| 1557 | } |
| 1558 | |
| 1559 | void CodeExtractor::() { |
| 1560 | ExtractedFuncRetVals.clear(); |
| 1561 | |
| 1562 | SmallPtrSet<BasicBlock *, 2> ExitBlocks; |
| 1563 | for (BasicBlock *Block : Blocks) { |
| 1564 | for (BasicBlock *Succ : successors(BB: Block)) { |
| 1565 | if (Blocks.count(key: Succ)) |
| 1566 | continue; |
| 1567 | |
| 1568 | bool IsNew = ExitBlocks.insert(Ptr: Succ).second; |
| 1569 | if (IsNew) |
| 1570 | ExtractedFuncRetVals.push_back(Elt: Succ); |
| 1571 | } |
| 1572 | } |
| 1573 | } |
| 1574 | |
| 1575 | Type *CodeExtractor::() { |
| 1576 | LLVMContext &Context = Blocks.front()->getContext(); |
| 1577 | |
| 1578 | assert(ExtractedFuncRetVals.size() < 0xffff && |
| 1579 | "too many exit blocks for switch" ); |
| 1580 | switch (ExtractedFuncRetVals.size()) { |
| 1581 | case 0: |
| 1582 | case 1: |
| 1583 | return Type::getVoidTy(C&: Context); |
| 1584 | case 2: |
| 1585 | // Conditional branch, return a bool |
| 1586 | return Type::getInt1Ty(C&: Context); |
| 1587 | default: |
| 1588 | return Type::getInt16Ty(C&: Context); |
| 1589 | } |
| 1590 | } |
| 1591 | |
| 1592 | void CodeExtractor::emitFunctionBody( |
| 1593 | const ValueSet &inputs, const ValueSet &outputs, |
| 1594 | const ValueSet &StructValues, Function *newFunction, |
| 1595 | StructType *StructArgTy, BasicBlock *, const ValueSet &SinkingCands, |
| 1596 | SmallVectorImpl<Value *> &NewValues) { |
| 1597 | Function *oldFunction = header->getParent(); |
| 1598 | LLVMContext &Context = oldFunction->getContext(); |
| 1599 | |
| 1600 | // The new function needs a root node because other nodes can branch to the |
| 1601 | // head of the region, but the entry node of a function cannot have preds. |
| 1602 | BasicBlock *newFuncRoot = |
| 1603 | BasicBlock::Create(Context, Name: "newFuncRoot" , Parent: newFunction); |
| 1604 | |
| 1605 | // Now sink all instructions which only have non-phi uses inside the region. |
| 1606 | // Group the allocas at the start of the block, so that any bitcast uses of |
| 1607 | // the allocas are well-defined. |
| 1608 | for (auto *II : SinkingCands) { |
| 1609 | if (!isa<AllocaInst>(Val: II)) { |
| 1610 | cast<Instruction>(Val: II)->moveBefore(BB&: *newFuncRoot, |
| 1611 | I: newFuncRoot->getFirstInsertionPt()); |
| 1612 | } |
| 1613 | } |
| 1614 | for (auto *II : SinkingCands) { |
| 1615 | if (auto *AI = dyn_cast<AllocaInst>(Val: II)) { |
| 1616 | AI->moveBefore(BB&: *newFuncRoot, I: newFuncRoot->getFirstInsertionPt()); |
| 1617 | } |
| 1618 | } |
| 1619 | |
| 1620 | Function::arg_iterator ScalarAI = newFunction->arg_begin(); |
| 1621 | Argument *AggArg = StructValues.empty() |
| 1622 | ? nullptr |
| 1623 | : newFunction->getArg(i: newFunction->arg_size() - 1); |
| 1624 | |
| 1625 | // Rewrite all users of the inputs in the extracted region to use the |
| 1626 | // arguments (or appropriate addressing into struct) instead. |
| 1627 | for (unsigned i = 0, e = inputs.size(), aggIdx = 0; i != e; ++i) { |
| 1628 | Value *RewriteVal; |
| 1629 | if (StructValues.contains(key: inputs[i])) { |
| 1630 | Value *Idx[2]; |
| 1631 | Idx[0] = Constant::getNullValue(Ty: Type::getInt32Ty(C&: header->getContext())); |
| 1632 | Idx[1] = ConstantInt::get(Ty: Type::getInt32Ty(C&: header->getContext()), V: aggIdx); |
| 1633 | GetElementPtrInst *GEP = GetElementPtrInst::Create( |
| 1634 | PointeeType: StructArgTy, Ptr: AggArg, IdxList: Idx, NameStr: "gep_" + inputs[i]->getName(), InsertBefore: newFuncRoot); |
| 1635 | LoadInst *LoadGEP = |
| 1636 | new LoadInst(StructArgTy->getElementType(N: aggIdx), GEP, |
| 1637 | "loadgep_" + inputs[i]->getName(), newFuncRoot); |
| 1638 | // If we load pointer, we can add optional !align metadata |
| 1639 | // The existence of the !align metadata on the instruction tells |
| 1640 | // the optimizer that the value loaded is known to be aligned to |
| 1641 | // a boundary specified by the integer value in the metadata node. |
| 1642 | // Example: |
| 1643 | // %res = load ptr, ptr %input, align 8, !align !align_md_node |
| 1644 | // ^ ^ |
| 1645 | // | | |
| 1646 | // alignment of %input address | |
| 1647 | // | |
| 1648 | // alignment of %res object |
| 1649 | if (StructArgTy->getElementType(N: aggIdx)->isPointerTy()) { |
| 1650 | unsigned AlignmentValue; |
| 1651 | const Triple &TargetTriple = |
| 1652 | newFunction->getParent()->getTargetTriple(); |
| 1653 | const DataLayout &DL = header->getDataLayout(); |
| 1654 | // Pointers without casting can provide more information about |
| 1655 | // alignment. Use pointers without casts if given target preserves |
| 1656 | // alignment information for cast the operation. |
| 1657 | if (isAlignmentPreservedForAddrCast(TargetTriple)) |
| 1658 | AlignmentValue = |
| 1659 | inputs[i]->stripPointerCasts()->getPointerAlignment(DL).value(); |
| 1660 | else |
| 1661 | AlignmentValue = inputs[i]->getPointerAlignment(DL).value(); |
| 1662 | MDBuilder MDB(header->getContext()); |
| 1663 | LoadGEP->setMetadata( |
| 1664 | KindID: LLVMContext::MD_align, |
| 1665 | Node: MDNode::get( |
| 1666 | Context&: header->getContext(), |
| 1667 | MDs: MDB.createConstant(C: ConstantInt::get( |
| 1668 | Ty: Type::getInt64Ty(C&: header->getContext()), V: AlignmentValue)))); |
| 1669 | } |
| 1670 | RewriteVal = LoadGEP; |
| 1671 | ++aggIdx; |
| 1672 | } else |
| 1673 | RewriteVal = &*ScalarAI++; |
| 1674 | |
| 1675 | NewValues.push_back(Elt: RewriteVal); |
| 1676 | } |
| 1677 | |
| 1678 | moveCodeToFunction(newFunction); |
| 1679 | |
| 1680 | for (unsigned i = 0, e = inputs.size(); i != e; ++i) { |
| 1681 | Value *RewriteVal = NewValues[i]; |
| 1682 | |
| 1683 | std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end()); |
| 1684 | for (User *use : Users) |
| 1685 | if (Instruction *inst = dyn_cast<Instruction>(Val: use)) |
| 1686 | if (Blocks.count(key: inst->getParent())) |
| 1687 | inst->replaceUsesOfWith(From: inputs[i], To: RewriteVal); |
| 1688 | } |
| 1689 | |
| 1690 | // Since there may be multiple exits from the original region, make the new |
| 1691 | // function return an unsigned, switch on that number. This loop iterates |
| 1692 | // over all of the blocks in the extracted region, updating any terminator |
| 1693 | // instructions in the to-be-extracted region that branch to blocks that are |
| 1694 | // not in the region to be extracted. |
| 1695 | std::map<BasicBlock *, BasicBlock *> ExitBlockMap; |
| 1696 | |
| 1697 | // Iterate over the previously collected targets, and create new blocks inside |
| 1698 | // the function to branch to. |
| 1699 | for (auto P : enumerate(First&: ExtractedFuncRetVals)) { |
| 1700 | BasicBlock *OldTarget = P.value(); |
| 1701 | size_t SuccNum = P.index(); |
| 1702 | |
| 1703 | BasicBlock *NewTarget = BasicBlock::Create( |
| 1704 | Context, Name: OldTarget->getName() + ".exitStub" , Parent: newFunction); |
| 1705 | ExitBlockMap[OldTarget] = NewTarget; |
| 1706 | |
| 1707 | Value *brVal = nullptr; |
| 1708 | Type *RetTy = getSwitchType(); |
| 1709 | assert(ExtractedFuncRetVals.size() < 0xffff && |
| 1710 | "too many exit blocks for switch" ); |
| 1711 | switch (ExtractedFuncRetVals.size()) { |
| 1712 | case 0: |
| 1713 | case 1: |
| 1714 | // No value needed. |
| 1715 | break; |
| 1716 | case 2: // Conditional branch, return a bool |
| 1717 | brVal = ConstantInt::get(Ty: RetTy, V: !SuccNum); |
| 1718 | break; |
| 1719 | default: |
| 1720 | brVal = ConstantInt::get(Ty: RetTy, V: SuccNum); |
| 1721 | break; |
| 1722 | } |
| 1723 | |
| 1724 | ReturnInst::Create(C&: Context, retVal: brVal, InsertBefore: NewTarget); |
| 1725 | } |
| 1726 | |
| 1727 | for (BasicBlock *Block : Blocks) { |
| 1728 | Instruction *TI = Block->getTerminator(); |
| 1729 | for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { |
| 1730 | if (Blocks.count(key: TI->getSuccessor(Idx: i))) |
| 1731 | continue; |
| 1732 | BasicBlock *OldTarget = TI->getSuccessor(Idx: i); |
| 1733 | // add a new basic block which returns the appropriate value |
| 1734 | BasicBlock *NewTarget = ExitBlockMap[OldTarget]; |
| 1735 | assert(NewTarget && "Unknown target block!" ); |
| 1736 | |
| 1737 | // rewrite the original branch instruction with this new target |
| 1738 | TI->setSuccessor(Idx: i, BB: NewTarget); |
| 1739 | } |
| 1740 | } |
| 1741 | |
| 1742 | // Loop over all of the PHI nodes in the header and exit blocks, and change |
| 1743 | // any references to the old incoming edge to be the new incoming edge. |
| 1744 | for (BasicBlock::iterator I = header->begin(); isa<PHINode>(Val: I); ++I) { |
| 1745 | PHINode *PN = cast<PHINode>(Val&: I); |
| 1746 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) |
| 1747 | if (!Blocks.count(key: PN->getIncomingBlock(i))) |
| 1748 | PN->setIncomingBlock(i, BB: newFuncRoot); |
| 1749 | } |
| 1750 | |
| 1751 | // Connect newFunction entry block to new header. |
| 1752 | BranchInst *BranchI = BranchInst::Create(IfTrue: header, InsertBefore: newFuncRoot); |
| 1753 | applyFirstDebugLoc(oldFunction, Blocks: Blocks.getArrayRef(), BranchI); |
| 1754 | |
| 1755 | // Store the arguments right after the definition of output value. |
| 1756 | // This should be proceeded after creating exit stubs to be ensure that invoke |
| 1757 | // result restore will be placed in the outlined function. |
| 1758 | ScalarAI = newFunction->arg_begin(); |
| 1759 | unsigned AggIdx = 0; |
| 1760 | |
| 1761 | for (Value *Input : inputs) { |
| 1762 | if (StructValues.contains(key: Input)) |
| 1763 | ++AggIdx; |
| 1764 | else |
| 1765 | ++ScalarAI; |
| 1766 | } |
| 1767 | |
| 1768 | for (Value *Output : outputs) { |
| 1769 | // Find proper insertion point. |
| 1770 | // In case Output is an invoke, we insert the store at the beginning in the |
| 1771 | // 'normal destination' BB. Otherwise we insert the store right after |
| 1772 | // Output. |
| 1773 | BasicBlock::iterator InsertPt; |
| 1774 | if (auto *InvokeI = dyn_cast<InvokeInst>(Val: Output)) |
| 1775 | InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); |
| 1776 | else if (auto *Phi = dyn_cast<PHINode>(Val: Output)) |
| 1777 | InsertPt = Phi->getParent()->getFirstInsertionPt(); |
| 1778 | else if (auto *OutI = dyn_cast<Instruction>(Val: Output)) |
| 1779 | InsertPt = std::next(x: OutI->getIterator()); |
| 1780 | else { |
| 1781 | // Globals don't need to be updated, just advance to the next argument. |
| 1782 | if (StructValues.contains(key: Output)) |
| 1783 | ++AggIdx; |
| 1784 | else |
| 1785 | ++ScalarAI; |
| 1786 | continue; |
| 1787 | } |
| 1788 | |
| 1789 | assert((InsertPt->getFunction() == newFunction || |
| 1790 | Blocks.count(InsertPt->getParent())) && |
| 1791 | "InsertPt should be in new function" ); |
| 1792 | |
| 1793 | if (StructValues.contains(key: Output)) { |
| 1794 | assert(AggArg && "Number of aggregate output arguments should match " |
| 1795 | "the number of defined values" ); |
| 1796 | Value *Idx[2]; |
| 1797 | Idx[0] = Constant::getNullValue(Ty: Type::getInt32Ty(C&: Context)); |
| 1798 | Idx[1] = ConstantInt::get(Ty: Type::getInt32Ty(C&: Context), V: AggIdx); |
| 1799 | GetElementPtrInst *GEP = GetElementPtrInst::Create( |
| 1800 | PointeeType: StructArgTy, Ptr: AggArg, IdxList: Idx, NameStr: "gep_" + Output->getName(), InsertBefore: InsertPt); |
| 1801 | new StoreInst(Output, GEP, InsertPt); |
| 1802 | ++AggIdx; |
| 1803 | } else { |
| 1804 | assert(ScalarAI != newFunction->arg_end() && |
| 1805 | "Number of scalar output arguments should match " |
| 1806 | "the number of defined values" ); |
| 1807 | new StoreInst(Output, &*ScalarAI, InsertPt); |
| 1808 | ++ScalarAI; |
| 1809 | } |
| 1810 | } |
| 1811 | |
| 1812 | if (ExtractedFuncRetVals.empty()) { |
| 1813 | // Mark the new function `noreturn` if applicable. Terminators which resume |
| 1814 | // exception propagation are treated as returning instructions. This is to |
| 1815 | // avoid inserting traps after calls to outlined functions which unwind. |
| 1816 | if (none_of(Range&: Blocks, P: [](const BasicBlock *BB) { |
| 1817 | const Instruction *Term = BB->getTerminator(); |
| 1818 | return isa<ReturnInst>(Val: Term) || isa<ResumeInst>(Val: Term); |
| 1819 | })) |
| 1820 | newFunction->setDoesNotReturn(); |
| 1821 | } |
| 1822 | } |
| 1823 | |
| 1824 | CallInst *CodeExtractor::( |
| 1825 | const ValueSet &inputs, const ValueSet &outputs, |
| 1826 | const ValueSet &StructValues, Function *newFunction, |
| 1827 | StructType *StructArgTy, Function *oldFunction, BasicBlock *ReplIP, |
| 1828 | BlockFrequency EntryFreq, ArrayRef<Value *> LifetimesStart, |
| 1829 | std::vector<Value *> &Reloads) { |
| 1830 | LLVMContext &Context = oldFunction->getContext(); |
| 1831 | Module *M = oldFunction->getParent(); |
| 1832 | const DataLayout &DL = M->getDataLayout(); |
| 1833 | |
| 1834 | // This takes place of the original loop |
| 1835 | BasicBlock *codeReplacer = |
| 1836 | BasicBlock::Create(Context, Name: "codeRepl" , Parent: oldFunction, InsertBefore: ReplIP); |
| 1837 | if (AllocationBlock) |
| 1838 | assert(AllocationBlock->getParent() == oldFunction && |
| 1839 | "AllocationBlock is not in the same function" ); |
| 1840 | BasicBlock *AllocaBlock = |
| 1841 | AllocationBlock ? AllocationBlock : &oldFunction->getEntryBlock(); |
| 1842 | |
| 1843 | // Update the entry count of the function. |
| 1844 | if (BFI) |
| 1845 | BFI->setBlockFreq(BB: codeReplacer, Freq: EntryFreq); |
| 1846 | |
| 1847 | std::vector<Value *> params; |
| 1848 | |
| 1849 | // Add inputs as params, or to be filled into the struct |
| 1850 | for (Value *input : inputs) { |
| 1851 | if (StructValues.contains(key: input)) |
| 1852 | continue; |
| 1853 | |
| 1854 | params.push_back(x: input); |
| 1855 | } |
| 1856 | |
| 1857 | // Create allocas for the outputs |
| 1858 | std::vector<Value *> ReloadOutputs; |
| 1859 | for (Value *output : outputs) { |
| 1860 | if (StructValues.contains(key: output)) |
| 1861 | continue; |
| 1862 | |
| 1863 | AllocaInst *alloca = new AllocaInst( |
| 1864 | output->getType(), DL.getAllocaAddrSpace(), nullptr, |
| 1865 | output->getName() + ".loc" , AllocaBlock->getFirstInsertionPt()); |
| 1866 | params.push_back(x: alloca); |
| 1867 | ReloadOutputs.push_back(x: alloca); |
| 1868 | } |
| 1869 | |
| 1870 | AllocaInst *Struct = nullptr; |
| 1871 | if (!StructValues.empty()) { |
| 1872 | Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, |
| 1873 | "structArg" , AllocaBlock->getFirstInsertionPt()); |
| 1874 | if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) { |
| 1875 | auto *StructSpaceCast = new AddrSpaceCastInst( |
| 1876 | Struct, PointerType ::get(C&: Context, AddressSpace: 0), "structArg.ascast" ); |
| 1877 | StructSpaceCast->insertAfter(InsertPos: Struct->getIterator()); |
| 1878 | params.push_back(x: StructSpaceCast); |
| 1879 | } else { |
| 1880 | params.push_back(x: Struct); |
| 1881 | } |
| 1882 | |
| 1883 | unsigned AggIdx = 0; |
| 1884 | for (Value *input : inputs) { |
| 1885 | if (!StructValues.contains(key: input)) |
| 1886 | continue; |
| 1887 | |
| 1888 | Value *Idx[2]; |
| 1889 | Idx[0] = Constant::getNullValue(Ty: Type::getInt32Ty(C&: Context)); |
| 1890 | Idx[1] = ConstantInt::get(Ty: Type::getInt32Ty(C&: Context), V: AggIdx); |
| 1891 | GetElementPtrInst *GEP = GetElementPtrInst::Create( |
| 1892 | PointeeType: StructArgTy, Ptr: Struct, IdxList: Idx, NameStr: "gep_" + input->getName()); |
| 1893 | GEP->insertInto(ParentBB: codeReplacer, It: codeReplacer->end()); |
| 1894 | new StoreInst(input, GEP, codeReplacer); |
| 1895 | |
| 1896 | ++AggIdx; |
| 1897 | } |
| 1898 | } |
| 1899 | |
| 1900 | // Emit the call to the function |
| 1901 | CallInst *call = CallInst::Create( |
| 1902 | Func: newFunction, Args: params, NameStr: ExtractedFuncRetVals.size() > 1 ? "targetBlock" : "" , |
| 1903 | InsertBefore: codeReplacer); |
| 1904 | |
| 1905 | // Set swifterror parameter attributes. |
| 1906 | unsigned ParamIdx = 0; |
| 1907 | unsigned AggIdx = 0; |
| 1908 | for (auto input : inputs) { |
| 1909 | if (StructValues.contains(key: input)) { |
| 1910 | ++AggIdx; |
| 1911 | } else { |
| 1912 | if (input->isSwiftError()) |
| 1913 | call->addParamAttr(ArgNo: ParamIdx, Kind: Attribute::SwiftError); |
| 1914 | ++ParamIdx; |
| 1915 | } |
| 1916 | } |
| 1917 | |
| 1918 | // Add debug location to the new call, if the original function has debug |
| 1919 | // info. In that case, the terminator of the entry block of the extracted |
| 1920 | // function contains the first debug location of the extracted function, |
| 1921 | // set in extractCodeRegion. |
| 1922 | if (codeReplacer->getParent()->getSubprogram()) { |
| 1923 | if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) |
| 1924 | call->setDebugLoc(DL); |
| 1925 | } |
| 1926 | |
| 1927 | // Reload the outputs passed in by reference, use the struct if output is in |
| 1928 | // the aggregate or reload from the scalar argument. |
| 1929 | for (unsigned i = 0, e = outputs.size(), scalarIdx = 0; i != e; ++i) { |
| 1930 | Value *Output = nullptr; |
| 1931 | if (StructValues.contains(key: outputs[i])) { |
| 1932 | Value *Idx[2]; |
| 1933 | Idx[0] = Constant::getNullValue(Ty: Type::getInt32Ty(C&: Context)); |
| 1934 | Idx[1] = ConstantInt::get(Ty: Type::getInt32Ty(C&: Context), V: AggIdx); |
| 1935 | GetElementPtrInst *GEP = GetElementPtrInst::Create( |
| 1936 | PointeeType: StructArgTy, Ptr: Struct, IdxList: Idx, NameStr: "gep_reload_" + outputs[i]->getName()); |
| 1937 | GEP->insertInto(ParentBB: codeReplacer, It: codeReplacer->end()); |
| 1938 | Output = GEP; |
| 1939 | ++AggIdx; |
| 1940 | } else { |
| 1941 | Output = ReloadOutputs[scalarIdx]; |
| 1942 | ++scalarIdx; |
| 1943 | } |
| 1944 | LoadInst *load = |
| 1945 | new LoadInst(outputs[i]->getType(), Output, |
| 1946 | outputs[i]->getName() + ".reload" , codeReplacer); |
| 1947 | Reloads.push_back(x: load); |
| 1948 | } |
| 1949 | |
| 1950 | // Now we can emit a switch statement using the call as a value. |
| 1951 | SwitchInst *TheSwitch = |
| 1952 | SwitchInst::Create(Value: Constant::getNullValue(Ty: Type::getInt16Ty(C&: Context)), |
| 1953 | Default: codeReplacer, NumCases: 0, InsertBefore: codeReplacer); |
| 1954 | for (auto P : enumerate(First&: ExtractedFuncRetVals)) { |
| 1955 | BasicBlock *OldTarget = P.value(); |
| 1956 | size_t SuccNum = P.index(); |
| 1957 | |
| 1958 | TheSwitch->addCase(OnVal: ConstantInt::get(Ty: Type::getInt16Ty(C&: Context), V: SuccNum), |
| 1959 | Dest: OldTarget); |
| 1960 | } |
| 1961 | |
| 1962 | // Now that we've done the deed, simplify the switch instruction. |
| 1963 | Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); |
| 1964 | switch (ExtractedFuncRetVals.size()) { |
| 1965 | case 0: |
| 1966 | // There are no successors (the block containing the switch itself), which |
| 1967 | // means that previously this was the last part of the function, and hence |
| 1968 | // this should be rewritten as a `ret` or `unreachable`. |
| 1969 | if (newFunction->doesNotReturn()) { |
| 1970 | // If fn is no return, end with an unreachable terminator. |
| 1971 | (void)new UnreachableInst(Context, TheSwitch->getIterator()); |
| 1972 | } else if (OldFnRetTy->isVoidTy()) { |
| 1973 | // We have no return value. |
| 1974 | ReturnInst::Create(C&: Context, retVal: nullptr, |
| 1975 | InsertBefore: TheSwitch->getIterator()); // Return void |
| 1976 | } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { |
| 1977 | // return what we have |
| 1978 | ReturnInst::Create(C&: Context, retVal: TheSwitch->getCondition(), |
| 1979 | InsertBefore: TheSwitch->getIterator()); |
| 1980 | } else { |
| 1981 | // Otherwise we must have code extracted an unwind or something, just |
| 1982 | // return whatever we want. |
| 1983 | ReturnInst::Create(C&: Context, retVal: Constant::getNullValue(Ty: OldFnRetTy), |
| 1984 | InsertBefore: TheSwitch->getIterator()); |
| 1985 | } |
| 1986 | |
| 1987 | TheSwitch->eraseFromParent(); |
| 1988 | break; |
| 1989 | case 1: |
| 1990 | // Only a single destination, change the switch into an unconditional |
| 1991 | // branch. |
| 1992 | BranchInst::Create(IfTrue: TheSwitch->getSuccessor(idx: 1), InsertBefore: TheSwitch->getIterator()); |
| 1993 | TheSwitch->eraseFromParent(); |
| 1994 | break; |
| 1995 | case 2: |
| 1996 | // Only two destinations, convert to a condition branch. |
| 1997 | // Remark: This also swaps the target branches: |
| 1998 | // 0 -> false -> getSuccessor(2); 1 -> true -> getSuccessor(1) |
| 1999 | BranchInst::Create(IfTrue: TheSwitch->getSuccessor(idx: 1), IfFalse: TheSwitch->getSuccessor(idx: 2), |
| 2000 | Cond: call, InsertBefore: TheSwitch->getIterator()); |
| 2001 | TheSwitch->eraseFromParent(); |
| 2002 | break; |
| 2003 | default: |
| 2004 | // Otherwise, make the default destination of the switch instruction be one |
| 2005 | // of the other successors. |
| 2006 | TheSwitch->setCondition(call); |
| 2007 | TheSwitch->setDefaultDest( |
| 2008 | TheSwitch->getSuccessor(idx: ExtractedFuncRetVals.size())); |
| 2009 | // Remove redundant case |
| 2010 | TheSwitch->removeCase( |
| 2011 | I: SwitchInst::CaseIt(TheSwitch, ExtractedFuncRetVals.size() - 1)); |
| 2012 | break; |
| 2013 | } |
| 2014 | |
| 2015 | // Insert lifetime markers around the reloads of any output values. The |
| 2016 | // allocas output values are stored in are only in-use in the codeRepl block. |
| 2017 | insertLifetimeMarkersSurroundingCall(M, LifetimesStart: ReloadOutputs, LifetimesEnd: ReloadOutputs, TheCall: call); |
| 2018 | |
| 2019 | // Replicate the effects of any lifetime start/end markers which referenced |
| 2020 | // input objects in the extraction region by placing markers around the call. |
| 2021 | insertLifetimeMarkersSurroundingCall(M: oldFunction->getParent(), LifetimesStart, |
| 2022 | LifetimesEnd: {}, TheCall: call); |
| 2023 | |
| 2024 | return call; |
| 2025 | } |
| 2026 | |
| 2027 | void CodeExtractor::( |
| 2028 | Function *oldFunction, BasicBlock *, BasicBlock *codeReplacer, |
| 2029 | const ValueSet &outputs, ArrayRef<Value *> Reloads, |
| 2030 | const DenseMap<BasicBlock *, BlockFrequency> &ExitWeights) { |
| 2031 | |
| 2032 | // Rewrite branches to basic blocks outside of the loop to new dummy blocks |
| 2033 | // within the new function. This must be done before we lose track of which |
| 2034 | // blocks were originally in the code region. |
| 2035 | std::vector<User *> Users(header->user_begin(), header->user_end()); |
| 2036 | for (auto &U : Users) |
| 2037 | // The BasicBlock which contains the branch is not in the region |
| 2038 | // modify the branch target to a new block |
| 2039 | if (Instruction *I = dyn_cast<Instruction>(Val: U)) |
| 2040 | if (I->isTerminator() && I->getFunction() == oldFunction && |
| 2041 | !Blocks.count(key: I->getParent())) |
| 2042 | I->replaceUsesOfWith(From: header, To: codeReplacer); |
| 2043 | |
| 2044 | // When moving the code region it is sufficient to replace all uses to the |
| 2045 | // extracted function values. Since the original definition's block |
| 2046 | // dominated its use, it will also be dominated by codeReplacer's switch |
| 2047 | // which joined multiple exit blocks. |
| 2048 | for (BasicBlock *ExitBB : ExtractedFuncRetVals) |
| 2049 | for (PHINode &PN : ExitBB->phis()) { |
| 2050 | Value *IncomingCodeReplacerVal = nullptr; |
| 2051 | for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { |
| 2052 | // Ignore incoming values from outside of the extracted region. |
| 2053 | if (!Blocks.count(key: PN.getIncomingBlock(i))) |
| 2054 | continue; |
| 2055 | |
| 2056 | // Ensure that there is only one incoming value from codeReplacer. |
| 2057 | if (!IncomingCodeReplacerVal) { |
| 2058 | PN.setIncomingBlock(i, BB: codeReplacer); |
| 2059 | IncomingCodeReplacerVal = PN.getIncomingValue(i); |
| 2060 | } else |
| 2061 | assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) && |
| 2062 | "PHI has two incompatbile incoming values from codeRepl" ); |
| 2063 | } |
| 2064 | } |
| 2065 | |
| 2066 | for (unsigned i = 0, e = outputs.size(); i != e; ++i) { |
| 2067 | Value *load = Reloads[i]; |
| 2068 | std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end()); |
| 2069 | for (User *U : Users) { |
| 2070 | Instruction *inst = cast<Instruction>(Val: U); |
| 2071 | if (inst->getParent()->getParent() == oldFunction) |
| 2072 | inst->replaceUsesOfWith(From: outputs[i], To: load); |
| 2073 | } |
| 2074 | } |
| 2075 | |
| 2076 | // Update the branch weights for the exit block. |
| 2077 | if (BFI && ExtractedFuncRetVals.size() > 1) |
| 2078 | calculateNewCallTerminatorWeights(CodeReplacer: codeReplacer, ExitWeights, BPI); |
| 2079 | } |
| 2080 | |
| 2081 | bool CodeExtractor::(const Function &OldFunc, |
| 2082 | const Function &NewFunc, |
| 2083 | AssumptionCache *AC) { |
| 2084 | for (auto AssumeVH : AC->assumptions()) { |
| 2085 | auto *I = dyn_cast_or_null<CallInst>(Val&: AssumeVH); |
| 2086 | if (!I) |
| 2087 | continue; |
| 2088 | |
| 2089 | // There shouldn't be any llvm.assume intrinsics in the new function. |
| 2090 | if (I->getFunction() != &OldFunc) |
| 2091 | return true; |
| 2092 | |
| 2093 | // There shouldn't be any stale affected values in the assumption cache |
| 2094 | // that were previously in the old function, but that have now been moved |
| 2095 | // to the new function. |
| 2096 | for (auto AffectedValVH : AC->assumptionsFor(V: I->getOperand(i_nocapture: 0))) { |
| 2097 | auto *AffectedCI = dyn_cast_or_null<CallInst>(Val&: AffectedValVH); |
| 2098 | if (!AffectedCI) |
| 2099 | continue; |
| 2100 | if (AffectedCI->getFunction() != &OldFunc) |
| 2101 | return true; |
| 2102 | auto *AssumedInst = cast<Instruction>(Val: AffectedCI->getOperand(i_nocapture: 0)); |
| 2103 | if (AssumedInst->getFunction() != &OldFunc) |
| 2104 | return true; |
| 2105 | } |
| 2106 | } |
| 2107 | return false; |
| 2108 | } |
| 2109 | |
| 2110 | void CodeExtractor::(Value *Arg) { |
| 2111 | ExcludeArgsFromAggregate.insert(X: Arg); |
| 2112 | } |
| 2113 | |