| 1 | //===- InlineFunction.cpp - Code to perform function inlining -------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements inlining of a function into a call site, resolving |
| 10 | // parameters and the return value as appropriate. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/ADT/DenseMap.h" |
| 15 | #include "llvm/ADT/STLExtras.h" |
| 16 | #include "llvm/ADT/SetVector.h" |
| 17 | #include "llvm/ADT/SmallPtrSet.h" |
| 18 | #include "llvm/ADT/SmallVector.h" |
| 19 | #include "llvm/ADT/StringExtras.h" |
| 20 | #include "llvm/ADT/iterator_range.h" |
| 21 | #include "llvm/Analysis/AliasAnalysis.h" |
| 22 | #include "llvm/Analysis/AssumptionCache.h" |
| 23 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
| 24 | #include "llvm/Analysis/CallGraph.h" |
| 25 | #include "llvm/Analysis/CaptureTracking.h" |
| 26 | #include "llvm/Analysis/CtxProfAnalysis.h" |
| 27 | #include "llvm/Analysis/IndirectCallVisitor.h" |
| 28 | #include "llvm/Analysis/InstructionSimplify.h" |
| 29 | #include "llvm/Analysis/MemoryProfileInfo.h" |
| 30 | #include "llvm/Analysis/ObjCARCAnalysisUtils.h" |
| 31 | #include "llvm/Analysis/ObjCARCUtil.h" |
| 32 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
| 33 | #include "llvm/Analysis/ValueTracking.h" |
| 34 | #include "llvm/Analysis/VectorUtils.h" |
| 35 | #include "llvm/IR/Argument.h" |
| 36 | #include "llvm/IR/AttributeMask.h" |
| 37 | #include "llvm/IR/Attributes.h" |
| 38 | #include "llvm/IR/BasicBlock.h" |
| 39 | #include "llvm/IR/CFG.h" |
| 40 | #include "llvm/IR/Constant.h" |
| 41 | #include "llvm/IR/ConstantRange.h" |
| 42 | #include "llvm/IR/Constants.h" |
| 43 | #include "llvm/IR/DataLayout.h" |
| 44 | #include "llvm/IR/DebugInfo.h" |
| 45 | #include "llvm/IR/DebugInfoMetadata.h" |
| 46 | #include "llvm/IR/DebugLoc.h" |
| 47 | #include "llvm/IR/DerivedTypes.h" |
| 48 | #include "llvm/IR/Dominators.h" |
| 49 | #include "llvm/IR/EHPersonalities.h" |
| 50 | #include "llvm/IR/Function.h" |
| 51 | #include "llvm/IR/GlobalVariable.h" |
| 52 | #include "llvm/IR/IRBuilder.h" |
| 53 | #include "llvm/IR/InlineAsm.h" |
| 54 | #include "llvm/IR/InstrTypes.h" |
| 55 | #include "llvm/IR/Instruction.h" |
| 56 | #include "llvm/IR/Instructions.h" |
| 57 | #include "llvm/IR/IntrinsicInst.h" |
| 58 | #include "llvm/IR/Intrinsics.h" |
| 59 | #include "llvm/IR/LLVMContext.h" |
| 60 | #include "llvm/IR/MDBuilder.h" |
| 61 | #include "llvm/IR/Metadata.h" |
| 62 | #include "llvm/IR/Module.h" |
| 63 | #include "llvm/IR/PatternMatch.h" |
| 64 | #include "llvm/IR/ProfDataUtils.h" |
| 65 | #include "llvm/IR/Type.h" |
| 66 | #include "llvm/IR/User.h" |
| 67 | #include "llvm/IR/Value.h" |
| 68 | #include "llvm/Support/Casting.h" |
| 69 | #include "llvm/Support/CommandLine.h" |
| 70 | #include "llvm/Support/ErrorHandling.h" |
| 71 | #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" |
| 72 | #include "llvm/Transforms/Utils/Cloning.h" |
| 73 | #include "llvm/Transforms/Utils/Local.h" |
| 74 | #include "llvm/Transforms/Utils/ValueMapper.h" |
| 75 | #include <algorithm> |
| 76 | #include <cassert> |
| 77 | #include <cstdint> |
| 78 | #include <deque> |
| 79 | #include <iterator> |
| 80 | #include <limits> |
| 81 | #include <optional> |
| 82 | #include <string> |
| 83 | #include <utility> |
| 84 | #include <vector> |
| 85 | |
| 86 | #define DEBUG_TYPE "inline-function" |
| 87 | |
| 88 | using namespace llvm; |
| 89 | using namespace llvm::memprof; |
| 90 | using ProfileCount = Function::ProfileCount; |
| 91 | |
| 92 | static cl::opt<bool> |
| 93 | EnableNoAliasConversion("enable-noalias-to-md-conversion" , cl::init(Val: true), |
| 94 | cl::Hidden, |
| 95 | cl::desc("Convert noalias attributes to metadata during inlining." )); |
| 96 | |
| 97 | static cl::opt<bool> |
| 98 | UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining" , cl::Hidden, |
| 99 | cl::init(Val: true), |
| 100 | cl::desc("Use the llvm.experimental.noalias.scope.decl " |
| 101 | "intrinsic during inlining." )); |
| 102 | |
| 103 | // Disabled by default, because the added alignment assumptions may increase |
| 104 | // compile-time and block optimizations. This option is not suitable for use |
| 105 | // with frontends that emit comprehensive parameter alignment annotations. |
| 106 | static cl::opt<bool> |
| 107 | PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining" , |
| 108 | cl::init(Val: false), cl::Hidden, |
| 109 | cl::desc("Convert align attributes to assumptions during inlining." )); |
| 110 | |
| 111 | static cl::opt<unsigned> InlinerAttributeWindow( |
| 112 | "max-inst-checked-for-throw-during-inlining" , cl::Hidden, |
| 113 | cl::desc("the maximum number of instructions analyzed for may throw during " |
| 114 | "attribute inference in inlined body" ), |
| 115 | cl::init(Val: 4)); |
| 116 | |
| 117 | namespace { |
| 118 | |
| 119 | /// A class for recording information about inlining a landing pad. |
| 120 | class LandingPadInliningInfo { |
| 121 | /// Destination of the invoke's unwind. |
| 122 | BasicBlock *OuterResumeDest; |
| 123 | |
| 124 | /// Destination for the callee's resume. |
| 125 | BasicBlock *InnerResumeDest = nullptr; |
| 126 | |
| 127 | /// LandingPadInst associated with the invoke. |
| 128 | LandingPadInst *CallerLPad = nullptr; |
| 129 | |
| 130 | /// PHI for EH values from landingpad insts. |
| 131 | PHINode *InnerEHValuesPHI = nullptr; |
| 132 | |
| 133 | SmallVector<Value*, 8> UnwindDestPHIValues; |
| 134 | |
| 135 | public: |
| 136 | LandingPadInliningInfo(InvokeInst *II) |
| 137 | : OuterResumeDest(II->getUnwindDest()) { |
| 138 | // If there are PHI nodes in the unwind destination block, we need to keep |
| 139 | // track of which values came into them from the invoke before removing |
| 140 | // the edge from this block. |
| 141 | BasicBlock *InvokeBB = II->getParent(); |
| 142 | BasicBlock::iterator I = OuterResumeDest->begin(); |
| 143 | for (; isa<PHINode>(Val: I); ++I) { |
| 144 | // Save the value to use for this edge. |
| 145 | PHINode *PHI = cast<PHINode>(Val&: I); |
| 146 | UnwindDestPHIValues.push_back(Elt: PHI->getIncomingValueForBlock(BB: InvokeBB)); |
| 147 | } |
| 148 | |
| 149 | CallerLPad = cast<LandingPadInst>(Val&: I); |
| 150 | } |
| 151 | |
| 152 | /// The outer unwind destination is the target of |
| 153 | /// unwind edges introduced for calls within the inlined function. |
| 154 | BasicBlock *getOuterResumeDest() const { |
| 155 | return OuterResumeDest; |
| 156 | } |
| 157 | |
| 158 | BasicBlock *getInnerResumeDest(); |
| 159 | |
| 160 | LandingPadInst *getLandingPadInst() const { return CallerLPad; } |
| 161 | |
| 162 | /// Forward the 'resume' instruction to the caller's landing pad block. |
| 163 | /// When the landing pad block has only one predecessor, this is |
| 164 | /// a simple branch. When there is more than one predecessor, we need to |
| 165 | /// split the landing pad block after the landingpad instruction and jump |
| 166 | /// to there. |
| 167 | void forwardResume(ResumeInst *RI, |
| 168 | SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); |
| 169 | |
| 170 | /// Add incoming-PHI values to the unwind destination block for the given |
| 171 | /// basic block, using the values for the original invoke's source block. |
| 172 | void addIncomingPHIValuesFor(BasicBlock *BB) const { |
| 173 | addIncomingPHIValuesForInto(src: BB, dest: OuterResumeDest); |
| 174 | } |
| 175 | |
| 176 | void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { |
| 177 | BasicBlock::iterator I = dest->begin(); |
| 178 | for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { |
| 179 | PHINode *phi = cast<PHINode>(Val&: I); |
| 180 | phi->addIncoming(V: UnwindDestPHIValues[i], BB: src); |
| 181 | } |
| 182 | } |
| 183 | }; |
| 184 | } // end anonymous namespace |
| 185 | |
| 186 | static IntrinsicInst *getConvergenceEntry(BasicBlock &BB) { |
| 187 | BasicBlock::iterator It = BB.getFirstNonPHIIt(); |
| 188 | while (It != BB.end()) { |
| 189 | if (auto *IntrinsicCall = dyn_cast<ConvergenceControlInst>(Val&: It)) { |
| 190 | if (IntrinsicCall->isEntry()) { |
| 191 | return IntrinsicCall; |
| 192 | } |
| 193 | } |
| 194 | It = std::next(x: It); |
| 195 | } |
| 196 | return nullptr; |
| 197 | } |
| 198 | |
| 199 | /// Get or create a target for the branch from ResumeInsts. |
| 200 | BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { |
| 201 | if (InnerResumeDest) return InnerResumeDest; |
| 202 | |
| 203 | // Split the landing pad. |
| 204 | BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); |
| 205 | InnerResumeDest = |
| 206 | OuterResumeDest->splitBasicBlock(I: SplitPoint, |
| 207 | BBName: OuterResumeDest->getName() + ".body" ); |
| 208 | |
| 209 | // The number of incoming edges we expect to the inner landing pad. |
| 210 | const unsigned PHICapacity = 2; |
| 211 | |
| 212 | // Create corresponding new PHIs for all the PHIs in the outer landing pad. |
| 213 | BasicBlock::iterator InsertPoint = InnerResumeDest->begin(); |
| 214 | BasicBlock::iterator I = OuterResumeDest->begin(); |
| 215 | for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { |
| 216 | PHINode *OuterPHI = cast<PHINode>(Val&: I); |
| 217 | PHINode *InnerPHI = PHINode::Create(Ty: OuterPHI->getType(), NumReservedValues: PHICapacity, |
| 218 | NameStr: OuterPHI->getName() + ".lpad-body" ); |
| 219 | InnerPHI->insertBefore(InsertPos: InsertPoint); |
| 220 | OuterPHI->replaceAllUsesWith(V: InnerPHI); |
| 221 | InnerPHI->addIncoming(V: OuterPHI, BB: OuterResumeDest); |
| 222 | } |
| 223 | |
| 224 | // Create a PHI for the exception values. |
| 225 | InnerEHValuesPHI = |
| 226 | PHINode::Create(Ty: CallerLPad->getType(), NumReservedValues: PHICapacity, NameStr: "eh.lpad-body" ); |
| 227 | InnerEHValuesPHI->insertBefore(InsertPos: InsertPoint); |
| 228 | CallerLPad->replaceAllUsesWith(V: InnerEHValuesPHI); |
| 229 | InnerEHValuesPHI->addIncoming(V: CallerLPad, BB: OuterResumeDest); |
| 230 | |
| 231 | // All done. |
| 232 | return InnerResumeDest; |
| 233 | } |
| 234 | |
| 235 | /// Forward the 'resume' instruction to the caller's landing pad block. |
| 236 | /// When the landing pad block has only one predecessor, this is a simple |
| 237 | /// branch. When there is more than one predecessor, we need to split the |
| 238 | /// landing pad block after the landingpad instruction and jump to there. |
| 239 | void LandingPadInliningInfo::forwardResume( |
| 240 | ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) { |
| 241 | BasicBlock *Dest = getInnerResumeDest(); |
| 242 | BasicBlock *Src = RI->getParent(); |
| 243 | |
| 244 | auto *BI = BranchInst::Create(IfTrue: Dest, InsertBefore: Src); |
| 245 | BI->setDebugLoc(RI->getDebugLoc()); |
| 246 | |
| 247 | // Update the PHIs in the destination. They were inserted in an order which |
| 248 | // makes this work. |
| 249 | addIncomingPHIValuesForInto(src: Src, dest: Dest); |
| 250 | |
| 251 | InnerEHValuesPHI->addIncoming(V: RI->getOperand(i_nocapture: 0), BB: Src); |
| 252 | RI->eraseFromParent(); |
| 253 | } |
| 254 | |
| 255 | /// Helper for getUnwindDestToken/getUnwindDestTokenHelper. |
| 256 | static Value *getParentPad(Value *EHPad) { |
| 257 | if (auto *FPI = dyn_cast<FuncletPadInst>(Val: EHPad)) |
| 258 | return FPI->getParentPad(); |
| 259 | return cast<CatchSwitchInst>(Val: EHPad)->getParentPad(); |
| 260 | } |
| 261 | |
| 262 | using UnwindDestMemoTy = DenseMap<Instruction *, Value *>; |
| 263 | |
| 264 | /// Helper for getUnwindDestToken that does the descendant-ward part of |
| 265 | /// the search. |
| 266 | static Value *getUnwindDestTokenHelper(Instruction *EHPad, |
| 267 | UnwindDestMemoTy &MemoMap) { |
| 268 | SmallVector<Instruction *, 8> Worklist(1, EHPad); |
| 269 | |
| 270 | while (!Worklist.empty()) { |
| 271 | Instruction *CurrentPad = Worklist.pop_back_val(); |
| 272 | // We only put pads on the worklist that aren't in the MemoMap. When |
| 273 | // we find an unwind dest for a pad we may update its ancestors, but |
| 274 | // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, |
| 275 | // so they should never get updated while queued on the worklist. |
| 276 | assert(!MemoMap.count(CurrentPad)); |
| 277 | Value *UnwindDestToken = nullptr; |
| 278 | if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: CurrentPad)) { |
| 279 | if (CatchSwitch->hasUnwindDest()) { |
| 280 | UnwindDestToken = &*CatchSwitch->getUnwindDest()->getFirstNonPHIIt(); |
| 281 | } else { |
| 282 | // Catchswitch doesn't have a 'nounwind' variant, and one might be |
| 283 | // annotated as "unwinds to caller" when really it's nounwind (see |
| 284 | // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the |
| 285 | // parent's unwind dest from this. We can check its catchpads' |
| 286 | // descendants, since they might include a cleanuppad with an |
| 287 | // "unwinds to caller" cleanupret, which can be trusted. |
| 288 | for (auto HI = CatchSwitch->handler_begin(), |
| 289 | HE = CatchSwitch->handler_end(); |
| 290 | HI != HE && !UnwindDestToken; ++HI) { |
| 291 | BasicBlock *HandlerBlock = *HI; |
| 292 | auto *CatchPad = |
| 293 | cast<CatchPadInst>(Val: &*HandlerBlock->getFirstNonPHIIt()); |
| 294 | for (User *Child : CatchPad->users()) { |
| 295 | // Intentionally ignore invokes here -- since the catchswitch is |
| 296 | // marked "unwind to caller", it would be a verifier error if it |
| 297 | // contained an invoke which unwinds out of it, so any invoke we'd |
| 298 | // encounter must unwind to some child of the catch. |
| 299 | if (!isa<CleanupPadInst>(Val: Child) && !isa<CatchSwitchInst>(Val: Child)) |
| 300 | continue; |
| 301 | |
| 302 | Instruction *ChildPad = cast<Instruction>(Val: Child); |
| 303 | auto Memo = MemoMap.find(Val: ChildPad); |
| 304 | if (Memo == MemoMap.end()) { |
| 305 | // Haven't figured out this child pad yet; queue it. |
| 306 | Worklist.push_back(Elt: ChildPad); |
| 307 | continue; |
| 308 | } |
| 309 | // We've already checked this child, but might have found that |
| 310 | // it offers no proof either way. |
| 311 | Value *ChildUnwindDestToken = Memo->second; |
| 312 | if (!ChildUnwindDestToken) |
| 313 | continue; |
| 314 | // We already know the child's unwind dest, which can either |
| 315 | // be ConstantTokenNone to indicate unwind to caller, or can |
| 316 | // be another child of the catchpad. Only the former indicates |
| 317 | // the unwind dest of the catchswitch. |
| 318 | if (isa<ConstantTokenNone>(Val: ChildUnwindDestToken)) { |
| 319 | UnwindDestToken = ChildUnwindDestToken; |
| 320 | break; |
| 321 | } |
| 322 | assert(getParentPad(ChildUnwindDestToken) == CatchPad); |
| 323 | } |
| 324 | } |
| 325 | } |
| 326 | } else { |
| 327 | auto *CleanupPad = cast<CleanupPadInst>(Val: CurrentPad); |
| 328 | for (User *U : CleanupPad->users()) { |
| 329 | if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: U)) { |
| 330 | if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) |
| 331 | UnwindDestToken = &*RetUnwindDest->getFirstNonPHIIt(); |
| 332 | else |
| 333 | UnwindDestToken = ConstantTokenNone::get(Context&: CleanupPad->getContext()); |
| 334 | break; |
| 335 | } |
| 336 | Value *ChildUnwindDestToken; |
| 337 | if (auto *Invoke = dyn_cast<InvokeInst>(Val: U)) { |
| 338 | ChildUnwindDestToken = &*Invoke->getUnwindDest()->getFirstNonPHIIt(); |
| 339 | } else if (isa<CleanupPadInst>(Val: U) || isa<CatchSwitchInst>(Val: U)) { |
| 340 | Instruction *ChildPad = cast<Instruction>(Val: U); |
| 341 | auto Memo = MemoMap.find(Val: ChildPad); |
| 342 | if (Memo == MemoMap.end()) { |
| 343 | // Haven't resolved this child yet; queue it and keep searching. |
| 344 | Worklist.push_back(Elt: ChildPad); |
| 345 | continue; |
| 346 | } |
| 347 | // We've checked this child, but still need to ignore it if it |
| 348 | // had no proof either way. |
| 349 | ChildUnwindDestToken = Memo->second; |
| 350 | if (!ChildUnwindDestToken) |
| 351 | continue; |
| 352 | } else { |
| 353 | // Not a relevant user of the cleanuppad |
| 354 | continue; |
| 355 | } |
| 356 | // In a well-formed program, the child/invoke must either unwind to |
| 357 | // an(other) child of the cleanup, or exit the cleanup. In the |
| 358 | // first case, continue searching. |
| 359 | if (isa<Instruction>(Val: ChildUnwindDestToken) && |
| 360 | getParentPad(EHPad: ChildUnwindDestToken) == CleanupPad) |
| 361 | continue; |
| 362 | UnwindDestToken = ChildUnwindDestToken; |
| 363 | break; |
| 364 | } |
| 365 | } |
| 366 | // If we haven't found an unwind dest for CurrentPad, we may have queued its |
| 367 | // children, so move on to the next in the worklist. |
| 368 | if (!UnwindDestToken) |
| 369 | continue; |
| 370 | |
| 371 | // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits |
| 372 | // any ancestors of CurrentPad up to but not including UnwindDestToken's |
| 373 | // parent pad. Record this in the memo map, and check to see if the |
| 374 | // original EHPad being queried is one of the ones exited. |
| 375 | Value *UnwindParent; |
| 376 | if (auto *UnwindPad = dyn_cast<Instruction>(Val: UnwindDestToken)) |
| 377 | UnwindParent = getParentPad(EHPad: UnwindPad); |
| 378 | else |
| 379 | UnwindParent = nullptr; |
| 380 | bool ExitedOriginalPad = false; |
| 381 | for (Instruction *ExitedPad = CurrentPad; |
| 382 | ExitedPad && ExitedPad != UnwindParent; |
| 383 | ExitedPad = dyn_cast<Instruction>(Val: getParentPad(EHPad: ExitedPad))) { |
| 384 | // Skip over catchpads since they just follow their catchswitches. |
| 385 | if (isa<CatchPadInst>(Val: ExitedPad)) |
| 386 | continue; |
| 387 | MemoMap[ExitedPad] = UnwindDestToken; |
| 388 | ExitedOriginalPad |= (ExitedPad == EHPad); |
| 389 | } |
| 390 | |
| 391 | if (ExitedOriginalPad) |
| 392 | return UnwindDestToken; |
| 393 | |
| 394 | // Continue the search. |
| 395 | } |
| 396 | |
| 397 | // No definitive information is contained within this funclet. |
| 398 | return nullptr; |
| 399 | } |
| 400 | |
| 401 | /// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, |
| 402 | /// return that pad instruction. If it unwinds to caller, return |
| 403 | /// ConstantTokenNone. If it does not have a definitive unwind destination, |
| 404 | /// return nullptr. |
| 405 | /// |
| 406 | /// This routine gets invoked for calls in funclets in inlinees when inlining |
| 407 | /// an invoke. Since many funclets don't have calls inside them, it's queried |
| 408 | /// on-demand rather than building a map of pads to unwind dests up front. |
| 409 | /// Determining a funclet's unwind dest may require recursively searching its |
| 410 | /// descendants, and also ancestors and cousins if the descendants don't provide |
| 411 | /// an answer. Since most funclets will have their unwind dest immediately |
| 412 | /// available as the unwind dest of a catchswitch or cleanupret, this routine |
| 413 | /// searches top-down from the given pad and then up. To avoid worst-case |
| 414 | /// quadratic run-time given that approach, it uses a memo map to avoid |
| 415 | /// re-processing funclet trees. The callers that rewrite the IR as they go |
| 416 | /// take advantage of this, for correctness, by checking/forcing rewritten |
| 417 | /// pads' entries to match the original callee view. |
| 418 | static Value *getUnwindDestToken(Instruction *EHPad, |
| 419 | UnwindDestMemoTy &MemoMap) { |
| 420 | // Catchpads unwind to the same place as their catchswitch; |
| 421 | // redirct any queries on catchpads so the code below can |
| 422 | // deal with just catchswitches and cleanuppads. |
| 423 | if (auto *CPI = dyn_cast<CatchPadInst>(Val: EHPad)) |
| 424 | EHPad = CPI->getCatchSwitch(); |
| 425 | |
| 426 | // Check if we've already determined the unwind dest for this pad. |
| 427 | auto Memo = MemoMap.find(Val: EHPad); |
| 428 | if (Memo != MemoMap.end()) |
| 429 | return Memo->second; |
| 430 | |
| 431 | // Search EHPad and, if necessary, its descendants. |
| 432 | Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); |
| 433 | assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); |
| 434 | if (UnwindDestToken) |
| 435 | return UnwindDestToken; |
| 436 | |
| 437 | // No information is available for this EHPad from itself or any of its |
| 438 | // descendants. An unwind all the way out to a pad in the caller would |
| 439 | // need also to agree with the unwind dest of the parent funclet, so |
| 440 | // search up the chain to try to find a funclet with information. Put |
| 441 | // null entries in the memo map to avoid re-processing as we go up. |
| 442 | MemoMap[EHPad] = nullptr; |
| 443 | #ifndef NDEBUG |
| 444 | SmallPtrSet<Instruction *, 4> TempMemos; |
| 445 | TempMemos.insert(EHPad); |
| 446 | #endif |
| 447 | Instruction *LastUselessPad = EHPad; |
| 448 | Value *AncestorToken; |
| 449 | for (AncestorToken = getParentPad(EHPad); |
| 450 | auto *AncestorPad = dyn_cast<Instruction>(Val: AncestorToken); |
| 451 | AncestorToken = getParentPad(EHPad: AncestorToken)) { |
| 452 | // Skip over catchpads since they just follow their catchswitches. |
| 453 | if (isa<CatchPadInst>(Val: AncestorPad)) |
| 454 | continue; |
| 455 | // If the MemoMap had an entry mapping AncestorPad to nullptr, since we |
| 456 | // haven't yet called getUnwindDestTokenHelper for AncestorPad in this |
| 457 | // call to getUnwindDestToken, that would mean that AncestorPad had no |
| 458 | // information in itself, its descendants, or its ancestors. If that |
| 459 | // were the case, then we should also have recorded the lack of information |
| 460 | // for the descendant that we're coming from. So assert that we don't |
| 461 | // find a null entry in the MemoMap for AncestorPad. |
| 462 | assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); |
| 463 | auto AncestorMemo = MemoMap.find(Val: AncestorPad); |
| 464 | if (AncestorMemo == MemoMap.end()) { |
| 465 | UnwindDestToken = getUnwindDestTokenHelper(EHPad: AncestorPad, MemoMap); |
| 466 | } else { |
| 467 | UnwindDestToken = AncestorMemo->second; |
| 468 | } |
| 469 | if (UnwindDestToken) |
| 470 | break; |
| 471 | LastUselessPad = AncestorPad; |
| 472 | MemoMap[LastUselessPad] = nullptr; |
| 473 | #ifndef NDEBUG |
| 474 | TempMemos.insert(LastUselessPad); |
| 475 | #endif |
| 476 | } |
| 477 | |
| 478 | // We know that getUnwindDestTokenHelper was called on LastUselessPad and |
| 479 | // returned nullptr (and likewise for EHPad and any of its ancestors up to |
| 480 | // LastUselessPad), so LastUselessPad has no information from below. Since |
| 481 | // getUnwindDestTokenHelper must investigate all downward paths through |
| 482 | // no-information nodes to prove that a node has no information like this, |
| 483 | // and since any time it finds information it records it in the MemoMap for |
| 484 | // not just the immediately-containing funclet but also any ancestors also |
| 485 | // exited, it must be the case that, walking downward from LastUselessPad, |
| 486 | // visiting just those nodes which have not been mapped to an unwind dest |
| 487 | // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since |
| 488 | // they are just used to keep getUnwindDestTokenHelper from repeating work), |
| 489 | // any node visited must have been exhaustively searched with no information |
| 490 | // for it found. |
| 491 | SmallVector<Instruction *, 8> Worklist(1, LastUselessPad); |
| 492 | while (!Worklist.empty()) { |
| 493 | Instruction *UselessPad = Worklist.pop_back_val(); |
| 494 | auto Memo = MemoMap.find(Val: UselessPad); |
| 495 | if (Memo != MemoMap.end() && Memo->second) { |
| 496 | // Here the name 'UselessPad' is a bit of a misnomer, because we've found |
| 497 | // that it is a funclet that does have information about unwinding to |
| 498 | // a particular destination; its parent was a useless pad. |
| 499 | // Since its parent has no information, the unwind edge must not escape |
| 500 | // the parent, and must target a sibling of this pad. This local unwind |
| 501 | // gives us no information about EHPad. Leave it and the subtree rooted |
| 502 | // at it alone. |
| 503 | assert(getParentPad(Memo->second) == getParentPad(UselessPad)); |
| 504 | continue; |
| 505 | } |
| 506 | // We know we don't have information for UselesPad. If it has an entry in |
| 507 | // the MemoMap (mapping it to nullptr), it must be one of the TempMemos |
| 508 | // added on this invocation of getUnwindDestToken; if a previous invocation |
| 509 | // recorded nullptr, it would have had to prove that the ancestors of |
| 510 | // UselessPad, which include LastUselessPad, had no information, and that |
| 511 | // in turn would have required proving that the descendants of |
| 512 | // LastUselesPad, which include EHPad, have no information about |
| 513 | // LastUselessPad, which would imply that EHPad was mapped to nullptr in |
| 514 | // the MemoMap on that invocation, which isn't the case if we got here. |
| 515 | assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad)); |
| 516 | // Assert as we enumerate users that 'UselessPad' doesn't have any unwind |
| 517 | // information that we'd be contradicting by making a map entry for it |
| 518 | // (which is something that getUnwindDestTokenHelper must have proved for |
| 519 | // us to get here). Just assert on is direct users here; the checks in |
| 520 | // this downward walk at its descendants will verify that they don't have |
| 521 | // any unwind edges that exit 'UselessPad' either (i.e. they either have no |
| 522 | // unwind edges or unwind to a sibling). |
| 523 | MemoMap[UselessPad] = UnwindDestToken; |
| 524 | if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: UselessPad)) { |
| 525 | assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad" ); |
| 526 | for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) { |
| 527 | auto *CatchPad = &*HandlerBlock->getFirstNonPHIIt(); |
| 528 | for (User *U : CatchPad->users()) { |
| 529 | assert((!isa<InvokeInst>(U) || |
| 530 | (getParentPad(&*cast<InvokeInst>(U) |
| 531 | ->getUnwindDest() |
| 532 | ->getFirstNonPHIIt()) == CatchPad)) && |
| 533 | "Expected useless pad" ); |
| 534 | if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U)) |
| 535 | Worklist.push_back(Elt: cast<Instruction>(Val: U)); |
| 536 | } |
| 537 | } |
| 538 | } else { |
| 539 | assert(isa<CleanupPadInst>(UselessPad)); |
| 540 | for (User *U : UselessPad->users()) { |
| 541 | assert(!isa<CleanupReturnInst>(U) && "Expected useless pad" ); |
| 542 | assert( |
| 543 | (!isa<InvokeInst>(U) || |
| 544 | (getParentPad( |
| 545 | &*cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHIIt()) == |
| 546 | UselessPad)) && |
| 547 | "Expected useless pad" ); |
| 548 | if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U)) |
| 549 | Worklist.push_back(Elt: cast<Instruction>(Val: U)); |
| 550 | } |
| 551 | } |
| 552 | } |
| 553 | |
| 554 | return UnwindDestToken; |
| 555 | } |
| 556 | |
| 557 | /// When we inline a basic block into an invoke, |
| 558 | /// we have to turn all of the calls that can throw into invokes. |
| 559 | /// This function analyze BB to see if there are any calls, and if so, |
| 560 | /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI |
| 561 | /// nodes in that block with the values specified in InvokeDestPHIValues. |
| 562 | static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( |
| 563 | BasicBlock *BB, BasicBlock *UnwindEdge, |
| 564 | UnwindDestMemoTy *FuncletUnwindMap = nullptr) { |
| 565 | for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) { |
| 566 | // We only need to check for function calls: inlined invoke |
| 567 | // instructions require no special handling. |
| 568 | CallInst *CI = dyn_cast<CallInst>(Val: &I); |
| 569 | |
| 570 | if (!CI || CI->doesNotThrow()) |
| 571 | continue; |
| 572 | |
| 573 | // We do not need to (and in fact, cannot) convert possibly throwing calls |
| 574 | // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into |
| 575 | // invokes. The caller's "segment" of the deoptimization continuation |
| 576 | // attached to the newly inlined @llvm.experimental_deoptimize |
| 577 | // (resp. @llvm.experimental.guard) call should contain the exception |
| 578 | // handling logic, if any. |
| 579 | if (auto *F = CI->getCalledFunction()) |
| 580 | if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || |
| 581 | F->getIntrinsicID() == Intrinsic::experimental_guard) |
| 582 | continue; |
| 583 | |
| 584 | if (auto FuncletBundle = CI->getOperandBundle(ID: LLVMContext::OB_funclet)) { |
| 585 | // This call is nested inside a funclet. If that funclet has an unwind |
| 586 | // destination within the inlinee, then unwinding out of this call would |
| 587 | // be UB. Rewriting this call to an invoke which targets the inlined |
| 588 | // invoke's unwind dest would give the call's parent funclet multiple |
| 589 | // unwind destinations, which is something that subsequent EH table |
| 590 | // generation can't handle and that the veirifer rejects. So when we |
| 591 | // see such a call, leave it as a call. |
| 592 | auto *FuncletPad = cast<Instruction>(Val: FuncletBundle->Inputs[0]); |
| 593 | Value *UnwindDestToken = |
| 594 | getUnwindDestToken(EHPad: FuncletPad, MemoMap&: *FuncletUnwindMap); |
| 595 | if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken)) |
| 596 | continue; |
| 597 | #ifndef NDEBUG |
| 598 | Instruction *MemoKey; |
| 599 | if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) |
| 600 | MemoKey = CatchPad->getCatchSwitch(); |
| 601 | else |
| 602 | MemoKey = FuncletPad; |
| 603 | assert(FuncletUnwindMap->count(MemoKey) && |
| 604 | (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && |
| 605 | "must get memoized to avoid confusing later searches" ); |
| 606 | #endif // NDEBUG |
| 607 | } |
| 608 | |
| 609 | changeToInvokeAndSplitBasicBlock(CI, UnwindEdge); |
| 610 | return BB; |
| 611 | } |
| 612 | return nullptr; |
| 613 | } |
| 614 | |
| 615 | /// If we inlined an invoke site, we need to convert calls |
| 616 | /// in the body of the inlined function into invokes. |
| 617 | /// |
| 618 | /// II is the invoke instruction being inlined. FirstNewBlock is the first |
| 619 | /// block of the inlined code (the last block is the end of the function), |
| 620 | /// and InlineCodeInfo is information about the code that got inlined. |
| 621 | static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, |
| 622 | ClonedCodeInfo &InlinedCodeInfo) { |
| 623 | BasicBlock *InvokeDest = II->getUnwindDest(); |
| 624 | |
| 625 | Function *Caller = FirstNewBlock->getParent(); |
| 626 | |
| 627 | // The inlined code is currently at the end of the function, scan from the |
| 628 | // start of the inlined code to its end, checking for stuff we need to |
| 629 | // rewrite. |
| 630 | LandingPadInliningInfo Invoke(II); |
| 631 | |
| 632 | // Get all of the inlined landing pad instructions. |
| 633 | SmallPtrSet<LandingPadInst*, 16> InlinedLPads; |
| 634 | for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); |
| 635 | I != E; ++I) |
| 636 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: I->getTerminator())) |
| 637 | InlinedLPads.insert(Ptr: II->getLandingPadInst()); |
| 638 | |
| 639 | // Append the clauses from the outer landing pad instruction into the inlined |
| 640 | // landing pad instructions. |
| 641 | LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); |
| 642 | for (LandingPadInst *InlinedLPad : InlinedLPads) { |
| 643 | unsigned OuterNum = OuterLPad->getNumClauses(); |
| 644 | InlinedLPad->reserveClauses(Size: OuterNum); |
| 645 | for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) |
| 646 | InlinedLPad->addClause(ClauseVal: OuterLPad->getClause(Idx: OuterIdx)); |
| 647 | if (OuterLPad->isCleanup()) |
| 648 | InlinedLPad->setCleanup(true); |
| 649 | } |
| 650 | |
| 651 | for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); |
| 652 | BB != E; ++BB) { |
| 653 | if (InlinedCodeInfo.ContainsCalls) |
| 654 | if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( |
| 655 | BB: &*BB, UnwindEdge: Invoke.getOuterResumeDest())) |
| 656 | // Update any PHI nodes in the exceptional block to indicate that there |
| 657 | // is now a new entry in them. |
| 658 | Invoke.addIncomingPHIValuesFor(BB: NewBB); |
| 659 | |
| 660 | // Forward any resumes that are remaining here. |
| 661 | if (ResumeInst *RI = dyn_cast<ResumeInst>(Val: BB->getTerminator())) |
| 662 | Invoke.forwardResume(RI, InlinedLPads); |
| 663 | } |
| 664 | |
| 665 | // Now that everything is happy, we have one final detail. The PHI nodes in |
| 666 | // the exception destination block still have entries due to the original |
| 667 | // invoke instruction. Eliminate these entries (which might even delete the |
| 668 | // PHI node) now. |
| 669 | InvokeDest->removePredecessor(Pred: II->getParent()); |
| 670 | } |
| 671 | |
| 672 | /// If we inlined an invoke site, we need to convert calls |
| 673 | /// in the body of the inlined function into invokes. |
| 674 | /// |
| 675 | /// II is the invoke instruction being inlined. FirstNewBlock is the first |
| 676 | /// block of the inlined code (the last block is the end of the function), |
| 677 | /// and InlineCodeInfo is information about the code that got inlined. |
| 678 | static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, |
| 679 | ClonedCodeInfo &InlinedCodeInfo) { |
| 680 | BasicBlock *UnwindDest = II->getUnwindDest(); |
| 681 | Function *Caller = FirstNewBlock->getParent(); |
| 682 | |
| 683 | assert(UnwindDest->getFirstNonPHIIt()->isEHPad() && "unexpected BasicBlock!" ); |
| 684 | |
| 685 | // If there are PHI nodes in the unwind destination block, we need to keep |
| 686 | // track of which values came into them from the invoke before removing the |
| 687 | // edge from this block. |
| 688 | SmallVector<Value *, 8> UnwindDestPHIValues; |
| 689 | BasicBlock *InvokeBB = II->getParent(); |
| 690 | for (PHINode &PHI : UnwindDest->phis()) { |
| 691 | // Save the value to use for this edge. |
| 692 | UnwindDestPHIValues.push_back(Elt: PHI.getIncomingValueForBlock(BB: InvokeBB)); |
| 693 | } |
| 694 | |
| 695 | // Add incoming-PHI values to the unwind destination block for the given basic |
| 696 | // block, using the values for the original invoke's source block. |
| 697 | auto UpdatePHINodes = [&](BasicBlock *Src) { |
| 698 | BasicBlock::iterator I = UnwindDest->begin(); |
| 699 | for (Value *V : UnwindDestPHIValues) { |
| 700 | PHINode *PHI = cast<PHINode>(Val&: I); |
| 701 | PHI->addIncoming(V, BB: Src); |
| 702 | ++I; |
| 703 | } |
| 704 | }; |
| 705 | |
| 706 | // This connects all the instructions which 'unwind to caller' to the invoke |
| 707 | // destination. |
| 708 | UnwindDestMemoTy FuncletUnwindMap; |
| 709 | for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); |
| 710 | BB != E; ++BB) { |
| 711 | if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator())) { |
| 712 | if (CRI->unwindsToCaller()) { |
| 713 | auto *CleanupPad = CRI->getCleanupPad(); |
| 714 | CleanupReturnInst::Create(CleanupPad, UnwindBB: UnwindDest, InsertBefore: CRI->getIterator()); |
| 715 | CRI->eraseFromParent(); |
| 716 | UpdatePHINodes(&*BB); |
| 717 | // Finding a cleanupret with an unwind destination would confuse |
| 718 | // subsequent calls to getUnwindDestToken, so map the cleanuppad |
| 719 | // to short-circuit any such calls and recognize this as an "unwind |
| 720 | // to caller" cleanup. |
| 721 | assert(!FuncletUnwindMap.count(CleanupPad) || |
| 722 | isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])); |
| 723 | FuncletUnwindMap[CleanupPad] = |
| 724 | ConstantTokenNone::get(Context&: Caller->getContext()); |
| 725 | } |
| 726 | } |
| 727 | |
| 728 | BasicBlock::iterator I = BB->getFirstNonPHIIt(); |
| 729 | if (!I->isEHPad()) |
| 730 | continue; |
| 731 | |
| 732 | Instruction *Replacement = nullptr; |
| 733 | if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) { |
| 734 | if (CatchSwitch->unwindsToCaller()) { |
| 735 | Value *UnwindDestToken; |
| 736 | if (auto *ParentPad = |
| 737 | dyn_cast<Instruction>(Val: CatchSwitch->getParentPad())) { |
| 738 | // This catchswitch is nested inside another funclet. If that |
| 739 | // funclet has an unwind destination within the inlinee, then |
| 740 | // unwinding out of this catchswitch would be UB. Rewriting this |
| 741 | // catchswitch to unwind to the inlined invoke's unwind dest would |
| 742 | // give the parent funclet multiple unwind destinations, which is |
| 743 | // something that subsequent EH table generation can't handle and |
| 744 | // that the veirifer rejects. So when we see such a call, leave it |
| 745 | // as "unwind to caller". |
| 746 | UnwindDestToken = getUnwindDestToken(EHPad: ParentPad, MemoMap&: FuncletUnwindMap); |
| 747 | if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken)) |
| 748 | continue; |
| 749 | } else { |
| 750 | // This catchswitch has no parent to inherit constraints from, and |
| 751 | // none of its descendants can have an unwind edge that exits it and |
| 752 | // targets another funclet in the inlinee. It may or may not have a |
| 753 | // descendant that definitively has an unwind to caller. In either |
| 754 | // case, we'll have to assume that any unwinds out of it may need to |
| 755 | // be routed to the caller, so treat it as though it has a definitive |
| 756 | // unwind to caller. |
| 757 | UnwindDestToken = ConstantTokenNone::get(Context&: Caller->getContext()); |
| 758 | } |
| 759 | auto *NewCatchSwitch = CatchSwitchInst::Create( |
| 760 | ParentPad: CatchSwitch->getParentPad(), UnwindDest, |
| 761 | NumHandlers: CatchSwitch->getNumHandlers(), NameStr: CatchSwitch->getName(), |
| 762 | InsertBefore: CatchSwitch->getIterator()); |
| 763 | for (BasicBlock *PadBB : CatchSwitch->handlers()) |
| 764 | NewCatchSwitch->addHandler(Dest: PadBB); |
| 765 | // Propagate info for the old catchswitch over to the new one in |
| 766 | // the unwind map. This also serves to short-circuit any subsequent |
| 767 | // checks for the unwind dest of this catchswitch, which would get |
| 768 | // confused if they found the outer handler in the callee. |
| 769 | FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken; |
| 770 | Replacement = NewCatchSwitch; |
| 771 | } |
| 772 | } else if (!isa<FuncletPadInst>(Val: I)) { |
| 773 | llvm_unreachable("unexpected EHPad!" ); |
| 774 | } |
| 775 | |
| 776 | if (Replacement) { |
| 777 | Replacement->takeName(V: &*I); |
| 778 | I->replaceAllUsesWith(V: Replacement); |
| 779 | I->eraseFromParent(); |
| 780 | UpdatePHINodes(&*BB); |
| 781 | } |
| 782 | } |
| 783 | |
| 784 | if (InlinedCodeInfo.ContainsCalls) |
| 785 | for (Function::iterator BB = FirstNewBlock->getIterator(), |
| 786 | E = Caller->end(); |
| 787 | BB != E; ++BB) |
| 788 | if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( |
| 789 | BB: &*BB, UnwindEdge: UnwindDest, FuncletUnwindMap: &FuncletUnwindMap)) |
| 790 | // Update any PHI nodes in the exceptional block to indicate that there |
| 791 | // is now a new entry in them. |
| 792 | UpdatePHINodes(NewBB); |
| 793 | |
| 794 | // Now that everything is happy, we have one final detail. The PHI nodes in |
| 795 | // the exception destination block still have entries due to the original |
| 796 | // invoke instruction. Eliminate these entries (which might even delete the |
| 797 | // PHI node) now. |
| 798 | UnwindDest->removePredecessor(Pred: InvokeBB); |
| 799 | } |
| 800 | |
| 801 | static bool haveCommonPrefix(MDNode *MIBStackContext, |
| 802 | MDNode *CallsiteStackContext) { |
| 803 | assert(MIBStackContext->getNumOperands() > 0 && |
| 804 | CallsiteStackContext->getNumOperands() > 0); |
| 805 | // Because of the context trimming performed during matching, the callsite |
| 806 | // context could have more stack ids than the MIB. We match up to the end of |
| 807 | // the shortest stack context. |
| 808 | for (auto MIBStackIter = MIBStackContext->op_begin(), |
| 809 | CallsiteStackIter = CallsiteStackContext->op_begin(); |
| 810 | MIBStackIter != MIBStackContext->op_end() && |
| 811 | CallsiteStackIter != CallsiteStackContext->op_end(); |
| 812 | MIBStackIter++, CallsiteStackIter++) { |
| 813 | auto *Val1 = mdconst::dyn_extract<ConstantInt>(MD: *MIBStackIter); |
| 814 | auto *Val2 = mdconst::dyn_extract<ConstantInt>(MD: *CallsiteStackIter); |
| 815 | assert(Val1 && Val2); |
| 816 | if (Val1->getZExtValue() != Val2->getZExtValue()) |
| 817 | return false; |
| 818 | } |
| 819 | return true; |
| 820 | } |
| 821 | |
| 822 | static void removeMemProfMetadata(CallBase *Call) { |
| 823 | Call->setMetadata(KindID: LLVMContext::MD_memprof, Node: nullptr); |
| 824 | } |
| 825 | |
| 826 | static void removeCallsiteMetadata(CallBase *Call) { |
| 827 | Call->setMetadata(KindID: LLVMContext::MD_callsite, Node: nullptr); |
| 828 | } |
| 829 | |
| 830 | static void (CallBase *CI, |
| 831 | const std::vector<Metadata *> &MIBList, |
| 832 | OptimizationRemarkEmitter *ORE) { |
| 833 | assert(!MIBList.empty()); |
| 834 | // Remove existing memprof, which will either be replaced or may not be needed |
| 835 | // if we are able to use a single allocation type function attribute. |
| 836 | removeMemProfMetadata(Call: CI); |
| 837 | CallStackTrie CallStack(ORE); |
| 838 | for (Metadata *MIB : MIBList) |
| 839 | CallStack.addCallStack(MIB: cast<MDNode>(Val: MIB)); |
| 840 | bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI); |
| 841 | assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof)); |
| 842 | if (!MemprofMDAttached) |
| 843 | // If we used a function attribute remove the callsite metadata as well. |
| 844 | removeCallsiteMetadata(Call: CI); |
| 845 | } |
| 846 | |
| 847 | // Update the metadata on the inlined copy ClonedCall of a call OrigCall in the |
| 848 | // inlined callee body, based on the callsite metadata InlinedCallsiteMD from |
| 849 | // the call that was inlined. |
| 850 | static void (const CallBase *OrigCall, |
| 851 | CallBase *ClonedCall, |
| 852 | MDNode *InlinedCallsiteMD, |
| 853 | OptimizationRemarkEmitter *ORE) { |
| 854 | MDNode *OrigCallsiteMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_callsite); |
| 855 | MDNode *ClonedCallsiteMD = nullptr; |
| 856 | // Check if the call originally had callsite metadata, and update it for the |
| 857 | // new call in the inlined body. |
| 858 | if (OrigCallsiteMD) { |
| 859 | // The cloned call's context is now the concatenation of the original call's |
| 860 | // callsite metadata and the callsite metadata on the call where it was |
| 861 | // inlined. |
| 862 | ClonedCallsiteMD = MDNode::concatenate(A: OrigCallsiteMD, B: InlinedCallsiteMD); |
| 863 | ClonedCall->setMetadata(KindID: LLVMContext::MD_callsite, Node: ClonedCallsiteMD); |
| 864 | } |
| 865 | |
| 866 | // Update any memprof metadata on the cloned call. |
| 867 | MDNode *OrigMemProfMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_memprof); |
| 868 | if (!OrigMemProfMD) |
| 869 | return; |
| 870 | // We currently expect that allocations with memprof metadata also have |
| 871 | // callsite metadata for the allocation's part of the context. |
| 872 | assert(OrigCallsiteMD); |
| 873 | |
| 874 | // New call's MIB list. |
| 875 | std::vector<Metadata *> NewMIBList; |
| 876 | |
| 877 | // For each MIB metadata, check if its call stack context starts with the |
| 878 | // new clone's callsite metadata. If so, that MIB goes onto the cloned call in |
| 879 | // the inlined body. If not, it stays on the out-of-line original call. |
| 880 | for (auto &MIBOp : OrigMemProfMD->operands()) { |
| 881 | MDNode *MIB = dyn_cast<MDNode>(Val: MIBOp); |
| 882 | // Stack is first operand of MIB. |
| 883 | MDNode *StackMD = getMIBStackNode(MIB); |
| 884 | assert(StackMD); |
| 885 | // See if the new cloned callsite context matches this profiled context. |
| 886 | if (haveCommonPrefix(MIBStackContext: StackMD, CallsiteStackContext: ClonedCallsiteMD)) |
| 887 | // Add it to the cloned call's MIB list. |
| 888 | NewMIBList.push_back(x: MIB); |
| 889 | } |
| 890 | if (NewMIBList.empty()) { |
| 891 | removeMemProfMetadata(Call: ClonedCall); |
| 892 | removeCallsiteMetadata(Call: ClonedCall); |
| 893 | return; |
| 894 | } |
| 895 | if (NewMIBList.size() < OrigMemProfMD->getNumOperands()) |
| 896 | updateMemprofMetadata(CI: ClonedCall, MIBList: NewMIBList, ORE); |
| 897 | } |
| 898 | |
| 899 | // Update memprof related metadata (!memprof and !callsite) based on the |
| 900 | // inlining of Callee into the callsite at CB. The updates include merging the |
| 901 | // inlined callee's callsite metadata with that of the inlined call, |
| 902 | // and moving the subset of any memprof contexts to the inlined callee |
| 903 | // allocations if they match the new inlined call stack. |
| 904 | static void |
| 905 | propagateMemProfMetadata(Function *Callee, CallBase &CB, |
| 906 | bool ContainsMemProfMetadata, |
| 907 | const ValueMap<const Value *, WeakTrackingVH> &VMap, |
| 908 | OptimizationRemarkEmitter *ORE) { |
| 909 | MDNode *CallsiteMD = CB.getMetadata(KindID: LLVMContext::MD_callsite); |
| 910 | // Only need to update if the inlined callsite had callsite metadata, or if |
| 911 | // there was any memprof metadata inlined. |
| 912 | if (!CallsiteMD && !ContainsMemProfMetadata) |
| 913 | return; |
| 914 | |
| 915 | // Propagate metadata onto the cloned calls in the inlined callee. |
| 916 | for (const auto &Entry : VMap) { |
| 917 | // See if this is a call that has been inlined and remapped, and not |
| 918 | // simplified away in the process. |
| 919 | auto *OrigCall = dyn_cast_or_null<CallBase>(Val: Entry.first); |
| 920 | auto *ClonedCall = dyn_cast_or_null<CallBase>(Val: Entry.second); |
| 921 | if (!OrigCall || !ClonedCall) |
| 922 | continue; |
| 923 | // If the inlined callsite did not have any callsite metadata, then it isn't |
| 924 | // involved in any profiled call contexts, and we can remove any memprof |
| 925 | // metadata on the cloned call. |
| 926 | if (!CallsiteMD) { |
| 927 | removeMemProfMetadata(Call: ClonedCall); |
| 928 | removeCallsiteMetadata(Call: ClonedCall); |
| 929 | continue; |
| 930 | } |
| 931 | propagateMemProfHelper(OrigCall, ClonedCall, InlinedCallsiteMD: CallsiteMD, ORE); |
| 932 | } |
| 933 | } |
| 934 | |
| 935 | /// When inlining a call site that has !llvm.mem.parallel_loop_access, |
| 936 | /// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should |
| 937 | /// be propagated to all memory-accessing cloned instructions. |
| 938 | static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, |
| 939 | Function::iterator FEnd) { |
| 940 | MDNode *MemParallelLoopAccess = |
| 941 | CB.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access); |
| 942 | MDNode *AccessGroup = CB.getMetadata(KindID: LLVMContext::MD_access_group); |
| 943 | MDNode *AliasScope = CB.getMetadata(KindID: LLVMContext::MD_alias_scope); |
| 944 | MDNode *NoAlias = CB.getMetadata(KindID: LLVMContext::MD_noalias); |
| 945 | if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias) |
| 946 | return; |
| 947 | |
| 948 | for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) { |
| 949 | for (Instruction &I : BB) { |
| 950 | // This metadata is only relevant for instructions that access memory. |
| 951 | if (!I.mayReadOrWriteMemory()) |
| 952 | continue; |
| 953 | |
| 954 | if (MemParallelLoopAccess) { |
| 955 | // TODO: This probably should not overwrite MemParalleLoopAccess. |
| 956 | MemParallelLoopAccess = MDNode::concatenate( |
| 957 | A: I.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access), |
| 958 | B: MemParallelLoopAccess); |
| 959 | I.setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, |
| 960 | Node: MemParallelLoopAccess); |
| 961 | } |
| 962 | |
| 963 | if (AccessGroup) |
| 964 | I.setMetadata(KindID: LLVMContext::MD_access_group, Node: uniteAccessGroups( |
| 965 | AccGroups1: I.getMetadata(KindID: LLVMContext::MD_access_group), AccGroups2: AccessGroup)); |
| 966 | |
| 967 | if (AliasScope) |
| 968 | I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MDNode::concatenate( |
| 969 | A: I.getMetadata(KindID: LLVMContext::MD_alias_scope), B: AliasScope)); |
| 970 | |
| 971 | if (NoAlias) |
| 972 | I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::concatenate( |
| 973 | A: I.getMetadata(KindID: LLVMContext::MD_noalias), B: NoAlias)); |
| 974 | } |
| 975 | } |
| 976 | } |
| 977 | |
| 978 | /// Bundle operands of the inlined function must be added to inlined call sites. |
| 979 | static void PropagateOperandBundles(Function::iterator InlinedBB, |
| 980 | Instruction *CallSiteEHPad) { |
| 981 | for (Instruction &II : llvm::make_early_inc_range(Range&: *InlinedBB)) { |
| 982 | CallBase *I = dyn_cast<CallBase>(Val: &II); |
| 983 | if (!I) |
| 984 | continue; |
| 985 | // Skip call sites which already have a "funclet" bundle. |
| 986 | if (I->getOperandBundle(ID: LLVMContext::OB_funclet)) |
| 987 | continue; |
| 988 | // Skip call sites which are nounwind intrinsics (as long as they don't |
| 989 | // lower into regular function calls in the course of IR transformations). |
| 990 | auto *CalledFn = |
| 991 | dyn_cast<Function>(Val: I->getCalledOperand()->stripPointerCasts()); |
| 992 | if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() && |
| 993 | !IntrinsicInst::mayLowerToFunctionCall(IID: CalledFn->getIntrinsicID())) |
| 994 | continue; |
| 995 | |
| 996 | SmallVector<OperandBundleDef, 1> OpBundles; |
| 997 | I->getOperandBundlesAsDefs(Defs&: OpBundles); |
| 998 | OpBundles.emplace_back(Args: "funclet" , Args&: CallSiteEHPad); |
| 999 | |
| 1000 | Instruction *NewInst = CallBase::Create(CB: I, Bundles: OpBundles, InsertPt: I->getIterator()); |
| 1001 | NewInst->takeName(V: I); |
| 1002 | I->replaceAllUsesWith(V: NewInst); |
| 1003 | I->eraseFromParent(); |
| 1004 | } |
| 1005 | } |
| 1006 | |
| 1007 | namespace { |
| 1008 | /// Utility for cloning !noalias and !alias.scope metadata. When a code region |
| 1009 | /// using scoped alias metadata is inlined, the aliasing relationships may not |
| 1010 | /// hold between the two version. It is necessary to create a deep clone of the |
| 1011 | /// metadata, putting the two versions in separate scope domains. |
| 1012 | class ScopedAliasMetadataDeepCloner { |
| 1013 | using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>; |
| 1014 | SetVector<const MDNode *> MD; |
| 1015 | MetadataMap MDMap; |
| 1016 | void addRecursiveMetadataUses(); |
| 1017 | |
| 1018 | public: |
| 1019 | ScopedAliasMetadataDeepCloner(const Function *F); |
| 1020 | |
| 1021 | /// Create a new clone of the scoped alias metadata, which will be used by |
| 1022 | /// subsequent remap() calls. |
| 1023 | void clone(); |
| 1024 | |
| 1025 | /// Remap instructions in the given range from the original to the cloned |
| 1026 | /// metadata. |
| 1027 | void remap(Function::iterator FStart, Function::iterator FEnd); |
| 1028 | }; |
| 1029 | } // namespace |
| 1030 | |
| 1031 | ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( |
| 1032 | const Function *F) { |
| 1033 | for (const BasicBlock &BB : *F) { |
| 1034 | for (const Instruction &I : BB) { |
| 1035 | if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope)) |
| 1036 | MD.insert(X: M); |
| 1037 | if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias)) |
| 1038 | MD.insert(X: M); |
| 1039 | |
| 1040 | // We also need to clone the metadata in noalias intrinsics. |
| 1041 | if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I)) |
| 1042 | MD.insert(X: Decl->getScopeList()); |
| 1043 | } |
| 1044 | } |
| 1045 | addRecursiveMetadataUses(); |
| 1046 | } |
| 1047 | |
| 1048 | void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() { |
| 1049 | SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); |
| 1050 | while (!Queue.empty()) { |
| 1051 | const MDNode *M = cast<MDNode>(Val: Queue.pop_back_val()); |
| 1052 | for (const Metadata *Op : M->operands()) |
| 1053 | if (const MDNode *OpMD = dyn_cast<MDNode>(Val: Op)) |
| 1054 | if (MD.insert(X: OpMD)) |
| 1055 | Queue.push_back(Elt: OpMD); |
| 1056 | } |
| 1057 | } |
| 1058 | |
| 1059 | void ScopedAliasMetadataDeepCloner::clone() { |
| 1060 | assert(MDMap.empty() && "clone() already called ?" ); |
| 1061 | |
| 1062 | SmallVector<TempMDTuple, 16> DummyNodes; |
| 1063 | for (const MDNode *I : MD) { |
| 1064 | DummyNodes.push_back(Elt: MDTuple::getTemporary(Context&: I->getContext(), MDs: {})); |
| 1065 | MDMap[I].reset(MD: DummyNodes.back().get()); |
| 1066 | } |
| 1067 | |
| 1068 | // Create new metadata nodes to replace the dummy nodes, replacing old |
| 1069 | // metadata references with either a dummy node or an already-created new |
| 1070 | // node. |
| 1071 | SmallVector<Metadata *, 4> NewOps; |
| 1072 | for (const MDNode *I : MD) { |
| 1073 | for (const Metadata *Op : I->operands()) { |
| 1074 | if (const MDNode *M = dyn_cast<MDNode>(Val: Op)) |
| 1075 | NewOps.push_back(Elt: MDMap[M]); |
| 1076 | else |
| 1077 | NewOps.push_back(Elt: const_cast<Metadata *>(Op)); |
| 1078 | } |
| 1079 | |
| 1080 | MDNode *NewM = MDNode::get(Context&: I->getContext(), MDs: NewOps); |
| 1081 | MDTuple *TempM = cast<MDTuple>(Val&: MDMap[I]); |
| 1082 | assert(TempM->isTemporary() && "Expected temporary node" ); |
| 1083 | |
| 1084 | TempM->replaceAllUsesWith(MD: NewM); |
| 1085 | NewOps.clear(); |
| 1086 | } |
| 1087 | } |
| 1088 | |
| 1089 | void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, |
| 1090 | Function::iterator FEnd) { |
| 1091 | if (MDMap.empty()) |
| 1092 | return; // Nothing to do. |
| 1093 | |
| 1094 | for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) { |
| 1095 | for (Instruction &I : BB) { |
| 1096 | // TODO: The null checks for the MDMap.lookup() results should no longer |
| 1097 | // be necessary. |
| 1098 | if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope)) |
| 1099 | if (MDNode *MNew = MDMap.lookup(Val: M)) |
| 1100 | I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MNew); |
| 1101 | |
| 1102 | if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias)) |
| 1103 | if (MDNode *MNew = MDMap.lookup(Val: M)) |
| 1104 | I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MNew); |
| 1105 | |
| 1106 | if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I)) |
| 1107 | if (MDNode *MNew = MDMap.lookup(Val: Decl->getScopeList())) |
| 1108 | Decl->setScopeList(MNew); |
| 1109 | } |
| 1110 | } |
| 1111 | } |
| 1112 | |
| 1113 | /// If the inlined function has noalias arguments, |
| 1114 | /// then add new alias scopes for each noalias argument, tag the mapped noalias |
| 1115 | /// parameters with noalias metadata specifying the new scope, and tag all |
| 1116 | /// non-derived loads, stores and memory intrinsics with the new alias scopes. |
| 1117 | static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, |
| 1118 | const DataLayout &DL, AAResults *CalleeAAR, |
| 1119 | ClonedCodeInfo &InlinedFunctionInfo) { |
| 1120 | if (!EnableNoAliasConversion) |
| 1121 | return; |
| 1122 | |
| 1123 | const Function *CalledFunc = CB.getCalledFunction(); |
| 1124 | SmallVector<const Argument *, 4> NoAliasArgs; |
| 1125 | |
| 1126 | for (const Argument &Arg : CalledFunc->args()) |
| 1127 | if (CB.paramHasAttr(ArgNo: Arg.getArgNo(), Kind: Attribute::NoAlias) && !Arg.use_empty()) |
| 1128 | NoAliasArgs.push_back(Elt: &Arg); |
| 1129 | |
| 1130 | if (NoAliasArgs.empty()) |
| 1131 | return; |
| 1132 | |
| 1133 | // To do a good job, if a noalias variable is captured, we need to know if |
| 1134 | // the capture point dominates the particular use we're considering. |
| 1135 | DominatorTree DT; |
| 1136 | DT.recalculate(Func&: const_cast<Function&>(*CalledFunc)); |
| 1137 | |
| 1138 | // noalias indicates that pointer values based on the argument do not alias |
| 1139 | // pointer values which are not based on it. So we add a new "scope" for each |
| 1140 | // noalias function argument. Accesses using pointers based on that argument |
| 1141 | // become part of that alias scope, accesses using pointers not based on that |
| 1142 | // argument are tagged as noalias with that scope. |
| 1143 | |
| 1144 | DenseMap<const Argument *, MDNode *> NewScopes; |
| 1145 | MDBuilder MDB(CalledFunc->getContext()); |
| 1146 | |
| 1147 | // Create a new scope domain for this function. |
| 1148 | MDNode *NewDomain = |
| 1149 | MDB.createAnonymousAliasScopeDomain(Name: CalledFunc->getName()); |
| 1150 | for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) { |
| 1151 | const Argument *A = NoAliasArgs[i]; |
| 1152 | |
| 1153 | std::string Name = std::string(CalledFunc->getName()); |
| 1154 | if (A->hasName()) { |
| 1155 | Name += ": %" ; |
| 1156 | Name += A->getName(); |
| 1157 | } else { |
| 1158 | Name += ": argument " ; |
| 1159 | Name += utostr(X: i); |
| 1160 | } |
| 1161 | |
| 1162 | // Note: We always create a new anonymous root here. This is true regardless |
| 1163 | // of the linkage of the callee because the aliasing "scope" is not just a |
| 1164 | // property of the callee, but also all control dependencies in the caller. |
| 1165 | MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name); |
| 1166 | NewScopes.insert(KV: std::make_pair(x&: A, y&: NewScope)); |
| 1167 | |
| 1168 | if (UseNoAliasIntrinsic) { |
| 1169 | // Introduce a llvm.experimental.noalias.scope.decl for the noalias |
| 1170 | // argument. |
| 1171 | MDNode *AScopeList = MDNode::get(Context&: CalledFunc->getContext(), MDs: NewScope); |
| 1172 | auto *NoAliasDecl = |
| 1173 | IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(ScopeTag: AScopeList); |
| 1174 | // Ignore the result for now. The result will be used when the |
| 1175 | // llvm.noalias intrinsic is introduced. |
| 1176 | (void)NoAliasDecl; |
| 1177 | } |
| 1178 | } |
| 1179 | |
| 1180 | // Iterate over all new instructions in the map; for all memory-access |
| 1181 | // instructions, add the alias scope metadata. |
| 1182 | for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); |
| 1183 | VMI != VMIE; ++VMI) { |
| 1184 | if (const Instruction *I = dyn_cast<Instruction>(Val: VMI->first)) { |
| 1185 | if (!VMI->second) |
| 1186 | continue; |
| 1187 | |
| 1188 | Instruction *NI = dyn_cast<Instruction>(Val&: VMI->second); |
| 1189 | if (!NI || InlinedFunctionInfo.isSimplified(From: I, To: NI)) |
| 1190 | continue; |
| 1191 | |
| 1192 | bool IsArgMemOnlyCall = false, IsFuncCall = false; |
| 1193 | SmallVector<const Value *, 2> PtrArgs; |
| 1194 | |
| 1195 | if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I)) |
| 1196 | PtrArgs.push_back(Elt: LI->getPointerOperand()); |
| 1197 | else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I)) |
| 1198 | PtrArgs.push_back(Elt: SI->getPointerOperand()); |
| 1199 | else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(Val: I)) |
| 1200 | PtrArgs.push_back(Elt: VAAI->getPointerOperand()); |
| 1201 | else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Val: I)) |
| 1202 | PtrArgs.push_back(Elt: CXI->getPointerOperand()); |
| 1203 | else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Val: I)) |
| 1204 | PtrArgs.push_back(Elt: RMWI->getPointerOperand()); |
| 1205 | else if (const auto *Call = dyn_cast<CallBase>(Val: I)) { |
| 1206 | // If we know that the call does not access memory, then we'll still |
| 1207 | // know that about the inlined clone of this call site, and we don't |
| 1208 | // need to add metadata. |
| 1209 | if (Call->doesNotAccessMemory()) |
| 1210 | continue; |
| 1211 | |
| 1212 | IsFuncCall = true; |
| 1213 | if (CalleeAAR) { |
| 1214 | MemoryEffects ME = CalleeAAR->getMemoryEffects(Call); |
| 1215 | |
| 1216 | // We'll retain this knowledge without additional metadata. |
| 1217 | if (ME.onlyAccessesInaccessibleMem()) |
| 1218 | continue; |
| 1219 | |
| 1220 | if (ME.onlyAccessesArgPointees()) |
| 1221 | IsArgMemOnlyCall = true; |
| 1222 | } |
| 1223 | |
| 1224 | for (Value *Arg : Call->args()) { |
| 1225 | // Only care about pointer arguments. If a noalias argument is |
| 1226 | // accessed through a non-pointer argument, it must be captured |
| 1227 | // first (e.g. via ptrtoint), and we protect against captures below. |
| 1228 | if (!Arg->getType()->isPointerTy()) |
| 1229 | continue; |
| 1230 | |
| 1231 | PtrArgs.push_back(Elt: Arg); |
| 1232 | } |
| 1233 | } |
| 1234 | |
| 1235 | // If we found no pointers, then this instruction is not suitable for |
| 1236 | // pairing with an instruction to receive aliasing metadata. |
| 1237 | // However, if this is a call, this we might just alias with none of the |
| 1238 | // noalias arguments. |
| 1239 | if (PtrArgs.empty() && !IsFuncCall) |
| 1240 | continue; |
| 1241 | |
| 1242 | // It is possible that there is only one underlying object, but you |
| 1243 | // need to go through several PHIs to see it, and thus could be |
| 1244 | // repeated in the Objects list. |
| 1245 | SmallPtrSet<const Value *, 4> ObjSet; |
| 1246 | SmallVector<Metadata *, 4> Scopes, NoAliases; |
| 1247 | |
| 1248 | for (const Value *V : PtrArgs) { |
| 1249 | SmallVector<const Value *, 4> Objects; |
| 1250 | getUnderlyingObjects(V, Objects, /* LI = */ nullptr); |
| 1251 | |
| 1252 | ObjSet.insert_range(R&: Objects); |
| 1253 | } |
| 1254 | |
| 1255 | // Figure out if we're derived from anything that is not a noalias |
| 1256 | // argument. |
| 1257 | bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false, |
| 1258 | UsesUnknownObject = false; |
| 1259 | for (const Value *V : ObjSet) { |
| 1260 | // Is this value a constant that cannot be derived from any pointer |
| 1261 | // value (we need to exclude constant expressions, for example, that |
| 1262 | // are formed from arithmetic on global symbols). |
| 1263 | bool IsNonPtrConst = isa<ConstantInt>(Val: V) || isa<ConstantFP>(Val: V) || |
| 1264 | isa<ConstantPointerNull>(Val: V) || |
| 1265 | isa<ConstantDataVector>(Val: V) || isa<UndefValue>(Val: V); |
| 1266 | if (IsNonPtrConst) |
| 1267 | continue; |
| 1268 | |
| 1269 | // If this is anything other than a noalias argument, then we cannot |
| 1270 | // completely describe the aliasing properties using alias.scope |
| 1271 | // metadata (and, thus, won't add any). |
| 1272 | if (const Argument *A = dyn_cast<Argument>(Val: V)) { |
| 1273 | if (!CB.paramHasAttr(ArgNo: A->getArgNo(), Kind: Attribute::NoAlias)) |
| 1274 | UsesAliasingPtr = true; |
| 1275 | } else { |
| 1276 | UsesAliasingPtr = true; |
| 1277 | } |
| 1278 | |
| 1279 | if (isEscapeSource(V)) { |
| 1280 | // An escape source can only alias with a noalias argument if it has |
| 1281 | // been captured beforehand. |
| 1282 | RequiresNoCaptureBefore = true; |
| 1283 | } else if (!isa<Argument>(Val: V) && !isIdentifiedObject(V)) { |
| 1284 | // If this is neither an escape source, nor some identified object |
| 1285 | // (which cannot directly alias a noalias argument), nor some other |
| 1286 | // argument (which, by definition, also cannot alias a noalias |
| 1287 | // argument), conservatively do not make any assumptions. |
| 1288 | UsesUnknownObject = true; |
| 1289 | } |
| 1290 | } |
| 1291 | |
| 1292 | // Nothing we can do if the used underlying object cannot be reliably |
| 1293 | // determined. |
| 1294 | if (UsesUnknownObject) |
| 1295 | continue; |
| 1296 | |
| 1297 | // A function call can always get captured noalias pointers (via other |
| 1298 | // parameters, globals, etc.). |
| 1299 | if (IsFuncCall && !IsArgMemOnlyCall) |
| 1300 | RequiresNoCaptureBefore = true; |
| 1301 | |
| 1302 | // First, we want to figure out all of the sets with which we definitely |
| 1303 | // don't alias. Iterate over all noalias set, and add those for which: |
| 1304 | // 1. The noalias argument is not in the set of objects from which we |
| 1305 | // definitely derive. |
| 1306 | // 2. The noalias argument has not yet been captured. |
| 1307 | // An arbitrary function that might load pointers could see captured |
| 1308 | // noalias arguments via other noalias arguments or globals, and so we |
| 1309 | // must always check for prior capture. |
| 1310 | for (const Argument *A : NoAliasArgs) { |
| 1311 | if (ObjSet.contains(Ptr: A)) |
| 1312 | continue; // May be based on a noalias argument. |
| 1313 | |
| 1314 | // It might be tempting to skip the PointerMayBeCapturedBefore check if |
| 1315 | // A->hasNoCaptureAttr() is true, but this is incorrect because |
| 1316 | // nocapture only guarantees that no copies outlive the function, not |
| 1317 | // that the value cannot be locally captured. |
| 1318 | if (!RequiresNoCaptureBefore || |
| 1319 | !capturesAnything(CC: PointerMayBeCapturedBefore( |
| 1320 | V: A, /*ReturnCaptures=*/false, I, DT: &DT, /*IncludeI=*/false, |
| 1321 | Mask: CaptureComponents::Provenance))) |
| 1322 | NoAliases.push_back(Elt: NewScopes[A]); |
| 1323 | } |
| 1324 | |
| 1325 | if (!NoAliases.empty()) |
| 1326 | NI->setMetadata(KindID: LLVMContext::MD_noalias, |
| 1327 | Node: MDNode::concatenate( |
| 1328 | A: NI->getMetadata(KindID: LLVMContext::MD_noalias), |
| 1329 | B: MDNode::get(Context&: CalledFunc->getContext(), MDs: NoAliases))); |
| 1330 | |
| 1331 | // Next, we want to figure out all of the sets to which we might belong. |
| 1332 | // We might belong to a set if the noalias argument is in the set of |
| 1333 | // underlying objects. If there is some non-noalias argument in our list |
| 1334 | // of underlying objects, then we cannot add a scope because the fact |
| 1335 | // that some access does not alias with any set of our noalias arguments |
| 1336 | // cannot itself guarantee that it does not alias with this access |
| 1337 | // (because there is some pointer of unknown origin involved and the |
| 1338 | // other access might also depend on this pointer). We also cannot add |
| 1339 | // scopes to arbitrary functions unless we know they don't access any |
| 1340 | // non-parameter pointer-values. |
| 1341 | bool CanAddScopes = !UsesAliasingPtr; |
| 1342 | if (CanAddScopes && IsFuncCall) |
| 1343 | CanAddScopes = IsArgMemOnlyCall; |
| 1344 | |
| 1345 | if (CanAddScopes) |
| 1346 | for (const Argument *A : NoAliasArgs) { |
| 1347 | if (ObjSet.count(Ptr: A)) |
| 1348 | Scopes.push_back(Elt: NewScopes[A]); |
| 1349 | } |
| 1350 | |
| 1351 | if (!Scopes.empty()) |
| 1352 | NI->setMetadata( |
| 1353 | KindID: LLVMContext::MD_alias_scope, |
| 1354 | Node: MDNode::concatenate(A: NI->getMetadata(KindID: LLVMContext::MD_alias_scope), |
| 1355 | B: MDNode::get(Context&: CalledFunc->getContext(), MDs: Scopes))); |
| 1356 | } |
| 1357 | } |
| 1358 | } |
| 1359 | |
| 1360 | static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin, |
| 1361 | ReturnInst *End) { |
| 1362 | |
| 1363 | assert(Begin->getParent() == End->getParent() && |
| 1364 | "Expected to be in same basic block!" ); |
| 1365 | auto BeginIt = Begin->getIterator(); |
| 1366 | assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator" ); |
| 1367 | return !llvm::isGuaranteedToTransferExecutionToSuccessor( |
| 1368 | Begin: ++BeginIt, End: End->getIterator(), ScanLimit: InlinerAttributeWindow + 1); |
| 1369 | } |
| 1370 | |
| 1371 | // Add attributes from CB params and Fn attributes that can always be propagated |
| 1372 | // to the corresponding argument / inner callbases. |
| 1373 | static void AddParamAndFnBasicAttributes(const CallBase &CB, |
| 1374 | ValueToValueMapTy &VMap, |
| 1375 | ClonedCodeInfo &InlinedFunctionInfo) { |
| 1376 | auto *CalledFunction = CB.getCalledFunction(); |
| 1377 | auto &Context = CalledFunction->getContext(); |
| 1378 | |
| 1379 | // Collect valid attributes for all params. |
| 1380 | SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs; |
| 1381 | bool HasAttrToPropagate = false; |
| 1382 | |
| 1383 | // Attributes we can only propagate if the exact parameter is forwarded. |
| 1384 | // We can propagate both poison generating and UB generating attributes |
| 1385 | // without any extra checks. The only attribute that is tricky to propagate |
| 1386 | // is `noundef` (skipped for now) as that can create new UB where previous |
| 1387 | // behavior was just using a poison value. |
| 1388 | static const Attribute::AttrKind ExactAttrsToPropagate[] = { |
| 1389 | Attribute::Dereferenceable, Attribute::DereferenceableOrNull, |
| 1390 | Attribute::NonNull, Attribute::NoFPClass, |
| 1391 | Attribute::Alignment, Attribute::Range}; |
| 1392 | |
| 1393 | for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) { |
| 1394 | ValidObjParamAttrs.emplace_back(Args: AttrBuilder{CB.getContext()}); |
| 1395 | ValidExactParamAttrs.emplace_back(Args: AttrBuilder{CB.getContext()}); |
| 1396 | // Access attributes can be propagated to any param with the same underlying |
| 1397 | // object as the argument. |
| 1398 | if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadNone)) |
| 1399 | ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadNone); |
| 1400 | if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadOnly)) |
| 1401 | ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadOnly); |
| 1402 | |
| 1403 | for (Attribute::AttrKind AK : ExactAttrsToPropagate) { |
| 1404 | Attribute Attr = CB.getParamAttr(ArgNo: I, Kind: AK); |
| 1405 | if (Attr.isValid()) |
| 1406 | ValidExactParamAttrs.back().addAttribute(A: Attr); |
| 1407 | } |
| 1408 | |
| 1409 | HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes(); |
| 1410 | HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes(); |
| 1411 | } |
| 1412 | |
| 1413 | // Won't be able to propagate anything. |
| 1414 | if (!HasAttrToPropagate) |
| 1415 | return; |
| 1416 | |
| 1417 | for (BasicBlock &BB : *CalledFunction) { |
| 1418 | for (Instruction &Ins : BB) { |
| 1419 | const auto *InnerCB = dyn_cast<CallBase>(Val: &Ins); |
| 1420 | if (!InnerCB) |
| 1421 | continue; |
| 1422 | auto *NewInnerCB = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: InnerCB)); |
| 1423 | if (!NewInnerCB) |
| 1424 | continue; |
| 1425 | // The InnerCB might have be simplified during the inlining |
| 1426 | // process which can make propagation incorrect. |
| 1427 | if (InlinedFunctionInfo.isSimplified(From: InnerCB, To: NewInnerCB)) |
| 1428 | continue; |
| 1429 | |
| 1430 | AttributeList AL = NewInnerCB->getAttributes(); |
| 1431 | for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) { |
| 1432 | // It's unsound or requires special handling to propagate |
| 1433 | // attributes to byval arguments. Even if CalledFunction |
| 1434 | // doesn't e.g. write to the argument (readonly), the call to |
| 1435 | // NewInnerCB may write to its by-value copy. |
| 1436 | if (NewInnerCB->paramHasAttr(ArgNo: I, Kind: Attribute::ByVal)) |
| 1437 | continue; |
| 1438 | |
| 1439 | // Don't bother propagating attrs to constants. |
| 1440 | if (match(V: NewInnerCB->getArgOperand(i: I), |
| 1441 | P: llvm::PatternMatch::m_ImmConstant())) |
| 1442 | continue; |
| 1443 | |
| 1444 | // Check if the underlying value for the parameter is an argument. |
| 1445 | const Argument *Arg = dyn_cast<Argument>(Val: InnerCB->getArgOperand(i: I)); |
| 1446 | unsigned ArgNo; |
| 1447 | if (Arg) { |
| 1448 | ArgNo = Arg->getArgNo(); |
| 1449 | // For dereferenceable, dereferenceable_or_null, align, etc... |
| 1450 | // we don't want to propagate if the existing param has the same |
| 1451 | // attribute with "better" constraints. So remove from the |
| 1452 | // new AL if the region of the existing param is larger than |
| 1453 | // what we can propagate. |
| 1454 | AttrBuilder NewAB{ |
| 1455 | Context, AttributeSet::get(C&: Context, B: ValidExactParamAttrs[ArgNo])}; |
| 1456 | if (AL.getParamDereferenceableBytes(Index: I) > |
| 1457 | NewAB.getDereferenceableBytes()) |
| 1458 | NewAB.removeAttribute(Val: Attribute::Dereferenceable); |
| 1459 | if (AL.getParamDereferenceableOrNullBytes(ArgNo: I) > |
| 1460 | NewAB.getDereferenceableOrNullBytes()) |
| 1461 | NewAB.removeAttribute(Val: Attribute::DereferenceableOrNull); |
| 1462 | if (AL.getParamAlignment(ArgNo: I).valueOrOne() > |
| 1463 | NewAB.getAlignment().valueOrOne()) |
| 1464 | NewAB.removeAttribute(Val: Attribute::Alignment); |
| 1465 | if (auto ExistingRange = AL.getParamRange(ArgNo: I)) { |
| 1466 | if (auto NewRange = NewAB.getRange()) { |
| 1467 | ConstantRange CombinedRange = |
| 1468 | ExistingRange->intersectWith(CR: *NewRange); |
| 1469 | NewAB.removeAttribute(Val: Attribute::Range); |
| 1470 | NewAB.addRangeAttr(CR: CombinedRange); |
| 1471 | } |
| 1472 | } |
| 1473 | |
| 1474 | if (FPClassTest ExistingNoFP = AL.getParamNoFPClass(ArgNo: I)) |
| 1475 | NewAB.addNoFPClassAttr(NoFPClassMask: ExistingNoFP | NewAB.getNoFPClass()); |
| 1476 | |
| 1477 | AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: NewAB); |
| 1478 | } else if (NewInnerCB->getArgOperand(i: I)->getType()->isPointerTy()) { |
| 1479 | // Check if the underlying value for the parameter is an argument. |
| 1480 | const Value *UnderlyingV = |
| 1481 | getUnderlyingObject(V: InnerCB->getArgOperand(i: I)); |
| 1482 | Arg = dyn_cast<Argument>(Val: UnderlyingV); |
| 1483 | if (!Arg) |
| 1484 | continue; |
| 1485 | ArgNo = Arg->getArgNo(); |
| 1486 | } else { |
| 1487 | continue; |
| 1488 | } |
| 1489 | |
| 1490 | // If so, propagate its access attributes. |
| 1491 | AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: ValidObjParamAttrs[ArgNo]); |
| 1492 | |
| 1493 | // We can have conflicting attributes from the inner callsite and |
| 1494 | // to-be-inlined callsite. In that case, choose the most |
| 1495 | // restrictive. |
| 1496 | |
| 1497 | // readonly + writeonly means we can never deref so make readnone. |
| 1498 | if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) && |
| 1499 | AL.hasParamAttr(ArgNo: I, Kind: Attribute::WriteOnly)) |
| 1500 | AL = AL.addParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadNone); |
| 1501 | |
| 1502 | // If have readnone, need to clear readonly/writeonly |
| 1503 | if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone)) { |
| 1504 | AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadOnly); |
| 1505 | AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::WriteOnly); |
| 1506 | } |
| 1507 | |
| 1508 | // Writable cannot exist in conjunction w/ readonly/readnone |
| 1509 | if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) || |
| 1510 | AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone)) |
| 1511 | AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::Writable); |
| 1512 | } |
| 1513 | NewInnerCB->setAttributes(AL); |
| 1514 | } |
| 1515 | } |
| 1516 | } |
| 1517 | |
| 1518 | // Only allow these white listed attributes to be propagated back to the |
| 1519 | // callee. This is because other attributes may only be valid on the call |
| 1520 | // itself, i.e. attributes such as signext and zeroext. |
| 1521 | |
| 1522 | // Attributes that are always okay to propagate as if they are violated its |
| 1523 | // immediate UB. |
| 1524 | static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) { |
| 1525 | AttrBuilder Valid(CB.getContext()); |
| 1526 | if (auto DerefBytes = CB.getRetDereferenceableBytes()) |
| 1527 | Valid.addDereferenceableAttr(Bytes: DerefBytes); |
| 1528 | if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes()) |
| 1529 | Valid.addDereferenceableOrNullAttr(Bytes: DerefOrNullBytes); |
| 1530 | if (CB.hasRetAttr(Kind: Attribute::NoAlias)) |
| 1531 | Valid.addAttribute(Val: Attribute::NoAlias); |
| 1532 | if (CB.hasRetAttr(Kind: Attribute::NoUndef)) |
| 1533 | Valid.addAttribute(Val: Attribute::NoUndef); |
| 1534 | return Valid; |
| 1535 | } |
| 1536 | |
| 1537 | // Attributes that need additional checks as propagating them may change |
| 1538 | // behavior or cause new UB. |
| 1539 | static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) { |
| 1540 | AttrBuilder Valid(CB.getContext()); |
| 1541 | if (CB.hasRetAttr(Kind: Attribute::NonNull)) |
| 1542 | Valid.addAttribute(Val: Attribute::NonNull); |
| 1543 | if (CB.hasRetAttr(Kind: Attribute::Alignment)) |
| 1544 | Valid.addAlignmentAttr(Align: CB.getRetAlign()); |
| 1545 | if (std::optional<ConstantRange> Range = CB.getRange()) |
| 1546 | Valid.addRangeAttr(CR: *Range); |
| 1547 | return Valid; |
| 1548 | } |
| 1549 | |
| 1550 | static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap, |
| 1551 | ClonedCodeInfo &InlinedFunctionInfo) { |
| 1552 | AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB); |
| 1553 | AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB); |
| 1554 | if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes()) |
| 1555 | return; |
| 1556 | auto *CalledFunction = CB.getCalledFunction(); |
| 1557 | auto &Context = CalledFunction->getContext(); |
| 1558 | |
| 1559 | for (auto &BB : *CalledFunction) { |
| 1560 | auto *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator()); |
| 1561 | if (!RI || !isa<CallBase>(Val: RI->getOperand(i_nocapture: 0))) |
| 1562 | continue; |
| 1563 | auto *RetVal = cast<CallBase>(Val: RI->getOperand(i_nocapture: 0)); |
| 1564 | // Check that the cloned RetVal exists and is a call, otherwise we cannot |
| 1565 | // add the attributes on the cloned RetVal. Simplification during inlining |
| 1566 | // could have transformed the cloned instruction. |
| 1567 | auto *NewRetVal = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: RetVal)); |
| 1568 | if (!NewRetVal) |
| 1569 | continue; |
| 1570 | |
| 1571 | // The RetVal might have be simplified during the inlining |
| 1572 | // process which can make propagation incorrect. |
| 1573 | if (InlinedFunctionInfo.isSimplified(From: RetVal, To: NewRetVal)) |
| 1574 | continue; |
| 1575 | // Backward propagation of attributes to the returned value may be incorrect |
| 1576 | // if it is control flow dependent. |
| 1577 | // Consider: |
| 1578 | // @callee { |
| 1579 | // %rv = call @foo() |
| 1580 | // %rv2 = call @bar() |
| 1581 | // if (%rv2 != null) |
| 1582 | // return %rv2 |
| 1583 | // if (%rv == null) |
| 1584 | // exit() |
| 1585 | // return %rv |
| 1586 | // } |
| 1587 | // caller() { |
| 1588 | // %val = call nonnull @callee() |
| 1589 | // } |
| 1590 | // Here we cannot add the nonnull attribute on either foo or bar. So, we |
| 1591 | // limit the check to both RetVal and RI are in the same basic block and |
| 1592 | // there are no throwing/exiting instructions between these instructions. |
| 1593 | if (RI->getParent() != RetVal->getParent() || |
| 1594 | MayContainThrowingOrExitingCallAfterCB(Begin: RetVal, End: RI)) |
| 1595 | continue; |
| 1596 | // Add to the existing attributes of NewRetVal, i.e. the cloned call |
| 1597 | // instruction. |
| 1598 | // NB! When we have the same attribute already existing on NewRetVal, but |
| 1599 | // with a differing value, the AttributeList's merge API honours the already |
| 1600 | // existing attribute value (i.e. attributes such as dereferenceable, |
| 1601 | // dereferenceable_or_null etc). See AttrBuilder::merge for more details. |
| 1602 | AttributeList AL = NewRetVal->getAttributes(); |
| 1603 | if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes()) |
| 1604 | ValidUB.removeAttribute(Val: Attribute::Dereferenceable); |
| 1605 | if (ValidUB.getDereferenceableOrNullBytes() < |
| 1606 | AL.getRetDereferenceableOrNullBytes()) |
| 1607 | ValidUB.removeAttribute(Val: Attribute::DereferenceableOrNull); |
| 1608 | AttributeList NewAL = AL.addRetAttributes(C&: Context, B: ValidUB); |
| 1609 | // Attributes that may generate poison returns are a bit tricky. If we |
| 1610 | // propagate them, other uses of the callsite might have their behavior |
| 1611 | // change or cause UB (if they have noundef) b.c of the new potential |
| 1612 | // poison. |
| 1613 | // Take the following three cases: |
| 1614 | // |
| 1615 | // 1) |
| 1616 | // define nonnull ptr @foo() { |
| 1617 | // %p = call ptr @bar() |
| 1618 | // call void @use(ptr %p) willreturn nounwind |
| 1619 | // ret ptr %p |
| 1620 | // } |
| 1621 | // |
| 1622 | // 2) |
| 1623 | // define noundef nonnull ptr @foo() { |
| 1624 | // %p = call ptr @bar() |
| 1625 | // call void @use(ptr %p) willreturn nounwind |
| 1626 | // ret ptr %p |
| 1627 | // } |
| 1628 | // |
| 1629 | // 3) |
| 1630 | // define nonnull ptr @foo() { |
| 1631 | // %p = call noundef ptr @bar() |
| 1632 | // ret ptr %p |
| 1633 | // } |
| 1634 | // |
| 1635 | // In case 1, we can't propagate nonnull because poison value in @use may |
| 1636 | // change behavior or trigger UB. |
| 1637 | // In case 2, we don't need to be concerned about propagating nonnull, as |
| 1638 | // any new poison at @use will trigger UB anyways. |
| 1639 | // In case 3, we can never propagate nonnull because it may create UB due to |
| 1640 | // the noundef on @bar. |
| 1641 | if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne()) |
| 1642 | ValidPG.removeAttribute(Val: Attribute::Alignment); |
| 1643 | if (ValidPG.hasAttributes()) { |
| 1644 | Attribute CBRange = ValidPG.getAttribute(Kind: Attribute::Range); |
| 1645 | if (CBRange.isValid()) { |
| 1646 | Attribute NewRange = AL.getRetAttr(Kind: Attribute::Range); |
| 1647 | if (NewRange.isValid()) { |
| 1648 | ValidPG.addRangeAttr( |
| 1649 | CR: CBRange.getRange().intersectWith(CR: NewRange.getRange())); |
| 1650 | } |
| 1651 | } |
| 1652 | // Three checks. |
| 1653 | // If the callsite has `noundef`, then a poison due to violating the |
| 1654 | // return attribute will create UB anyways so we can always propagate. |
| 1655 | // Otherwise, if the return value (callee to be inlined) has `noundef`, we |
| 1656 | // can't propagate as a new poison return will cause UB. |
| 1657 | // Finally, check if the return value has no uses whose behavior may |
| 1658 | // change/may cause UB if we potentially return poison. At the moment this |
| 1659 | // is implemented overly conservatively with a single-use check. |
| 1660 | // TODO: Update the single-use check to iterate through uses and only bail |
| 1661 | // if we have a potentially dangerous use. |
| 1662 | |
| 1663 | if (CB.hasRetAttr(Kind: Attribute::NoUndef) || |
| 1664 | (RetVal->hasOneUse() && !RetVal->hasRetAttr(Kind: Attribute::NoUndef))) |
| 1665 | NewAL = NewAL.addRetAttributes(C&: Context, B: ValidPG); |
| 1666 | } |
| 1667 | NewRetVal->setAttributes(NewAL); |
| 1668 | } |
| 1669 | } |
| 1670 | |
| 1671 | /// If the inlined function has non-byval align arguments, then |
| 1672 | /// add @llvm.assume-based alignment assumptions to preserve this information. |
| 1673 | static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { |
| 1674 | if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) |
| 1675 | return; |
| 1676 | |
| 1677 | AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller()); |
| 1678 | auto &DL = CB.getDataLayout(); |
| 1679 | |
| 1680 | // To avoid inserting redundant assumptions, we should check for assumptions |
| 1681 | // already in the caller. To do this, we might need a DT of the caller. |
| 1682 | DominatorTree DT; |
| 1683 | bool DTCalculated = false; |
| 1684 | |
| 1685 | Function *CalledFunc = CB.getCalledFunction(); |
| 1686 | for (Argument &Arg : CalledFunc->args()) { |
| 1687 | if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() || |
| 1688 | Arg.use_empty()) |
| 1689 | continue; |
| 1690 | MaybeAlign Alignment = Arg.getParamAlign(); |
| 1691 | if (!Alignment) |
| 1692 | continue; |
| 1693 | |
| 1694 | if (!DTCalculated) { |
| 1695 | DT.recalculate(Func&: *CB.getCaller()); |
| 1696 | DTCalculated = true; |
| 1697 | } |
| 1698 | // If we can already prove the asserted alignment in the context of the |
| 1699 | // caller, then don't bother inserting the assumption. |
| 1700 | Value *ArgVal = CB.getArgOperand(i: Arg.getArgNo()); |
| 1701 | if (getKnownAlignment(V: ArgVal, DL, CxtI: &CB, AC, DT: &DT) >= *Alignment) |
| 1702 | continue; |
| 1703 | |
| 1704 | CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption( |
| 1705 | DL, PtrValue: ArgVal, Alignment: Alignment->value()); |
| 1706 | AC->registerAssumption(CI: cast<AssumeInst>(Val: NewAsmp)); |
| 1707 | } |
| 1708 | } |
| 1709 | |
| 1710 | static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src, |
| 1711 | MaybeAlign SrcAlign, Module *M, |
| 1712 | BasicBlock *InsertBlock, |
| 1713 | InlineFunctionInfo &IFI, |
| 1714 | Function *CalledFunc) { |
| 1715 | IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); |
| 1716 | |
| 1717 | Value *Size = |
| 1718 | Builder.getInt64(C: M->getDataLayout().getTypeStoreSize(Ty: ByValType)); |
| 1719 | |
| 1720 | Align DstAlign = Dst->getPointerAlignment(DL: M->getDataLayout()); |
| 1721 | |
| 1722 | // Generate a memcpy with the correct alignments. |
| 1723 | CallInst *CI = Builder.CreateMemCpy(Dst, DstAlign, Src, SrcAlign, Size); |
| 1724 | |
| 1725 | // The verifier requires that all calls of debug-info-bearing functions |
| 1726 | // from debug-info-bearing functions have a debug location (for inlining |
| 1727 | // purposes). Assign a dummy location to satisfy the constraint. |
| 1728 | if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram()) |
| 1729 | if (DISubprogram *SP = CalledFunc->getSubprogram()) |
| 1730 | CI->setDebugLoc(DILocation::get(Context&: SP->getContext(), Line: 0, Column: 0, Scope: SP)); |
| 1731 | } |
| 1732 | |
| 1733 | /// When inlining a call site that has a byval argument, |
| 1734 | /// we have to make the implicit memcpy explicit by adding it. |
| 1735 | static Value *HandleByValArgument(Type *ByValType, Value *Arg, |
| 1736 | Instruction *TheCall, |
| 1737 | const Function *CalledFunc, |
| 1738 | InlineFunctionInfo &IFI, |
| 1739 | MaybeAlign ByValAlignment) { |
| 1740 | Function *Caller = TheCall->getFunction(); |
| 1741 | const DataLayout &DL = Caller->getDataLayout(); |
| 1742 | |
| 1743 | // If the called function is readonly, then it could not mutate the caller's |
| 1744 | // copy of the byval'd memory. In this case, it is safe to elide the copy and |
| 1745 | // temporary. |
| 1746 | if (CalledFunc->onlyReadsMemory()) { |
| 1747 | // If the byval argument has a specified alignment that is greater than the |
| 1748 | // passed in pointer, then we either have to round up the input pointer or |
| 1749 | // give up on this transformation. |
| 1750 | if (ByValAlignment.valueOrOne() == 1) |
| 1751 | return Arg; |
| 1752 | |
| 1753 | AssumptionCache *AC = |
| 1754 | IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; |
| 1755 | |
| 1756 | // If the pointer is already known to be sufficiently aligned, or if we can |
| 1757 | // round it up to a larger alignment, then we don't need a temporary. |
| 1758 | if (getOrEnforceKnownAlignment(V: Arg, PrefAlign: *ByValAlignment, DL, CxtI: TheCall, AC) >= |
| 1759 | *ByValAlignment) |
| 1760 | return Arg; |
| 1761 | |
| 1762 | // Otherwise, we have to make a memcpy to get a safe alignment. This is bad |
| 1763 | // for code quality, but rarely happens and is required for correctness. |
| 1764 | } |
| 1765 | |
| 1766 | // Create the alloca. If we have DataLayout, use nice alignment. |
| 1767 | Align Alignment = DL.getPrefTypeAlign(Ty: ByValType); |
| 1768 | |
| 1769 | // If the byval had an alignment specified, we *must* use at least that |
| 1770 | // alignment, as it is required by the byval argument (and uses of the |
| 1771 | // pointer inside the callee). |
| 1772 | if (ByValAlignment) |
| 1773 | Alignment = std::max(a: Alignment, b: *ByValAlignment); |
| 1774 | |
| 1775 | AllocaInst *NewAlloca = |
| 1776 | new AllocaInst(ByValType, Arg->getType()->getPointerAddressSpace(), |
| 1777 | nullptr, Alignment, Arg->getName()); |
| 1778 | NewAlloca->setDebugLoc(DebugLoc::getCompilerGenerated()); |
| 1779 | NewAlloca->insertBefore(InsertPos: Caller->begin()->begin()); |
| 1780 | IFI.StaticAllocas.push_back(Elt: NewAlloca); |
| 1781 | |
| 1782 | // Uses of the argument in the function should use our new alloca |
| 1783 | // instead. |
| 1784 | return NewAlloca; |
| 1785 | } |
| 1786 | |
| 1787 | // Check whether this Value is used by a lifetime intrinsic. |
| 1788 | static bool isUsedByLifetimeMarker(Value *V) { |
| 1789 | for (User *U : V->users()) |
| 1790 | if (isa<LifetimeIntrinsic>(Val: U)) |
| 1791 | return true; |
| 1792 | return false; |
| 1793 | } |
| 1794 | |
| 1795 | // Check whether the given alloca already has |
| 1796 | // lifetime.start or lifetime.end intrinsics. |
| 1797 | static bool hasLifetimeMarkers(AllocaInst *AI) { |
| 1798 | Type *Ty = AI->getType(); |
| 1799 | Type *Int8PtrTy = |
| 1800 | PointerType::get(C&: Ty->getContext(), AddressSpace: Ty->getPointerAddressSpace()); |
| 1801 | if (Ty == Int8PtrTy) |
| 1802 | return isUsedByLifetimeMarker(V: AI); |
| 1803 | |
| 1804 | // Do a scan to find all the casts to i8*. |
| 1805 | for (User *U : AI->users()) { |
| 1806 | if (U->getType() != Int8PtrTy) continue; |
| 1807 | if (U->stripPointerCasts() != AI) continue; |
| 1808 | if (isUsedByLifetimeMarker(V: U)) |
| 1809 | return true; |
| 1810 | } |
| 1811 | return false; |
| 1812 | } |
| 1813 | |
| 1814 | /// Return the result of AI->isStaticAlloca() if AI were moved to the entry |
| 1815 | /// block. Allocas used in inalloca calls and allocas of dynamic array size |
| 1816 | /// cannot be static. |
| 1817 | static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { |
| 1818 | return isa<Constant>(Val: AI->getArraySize()) && !AI->isUsedWithInAlloca(); |
| 1819 | } |
| 1820 | |
| 1821 | /// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL |
| 1822 | /// inlined at \p InlinedAt. \p IANodes is an inlined-at cache. |
| 1823 | static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, |
| 1824 | LLVMContext &Ctx, |
| 1825 | DenseMap<const MDNode *, MDNode *> &IANodes) { |
| 1826 | auto IA = DebugLoc::appendInlinedAt(DL: OrigDL, InlinedAt, Ctx, Cache&: IANodes); |
| 1827 | return DILocation::get(Context&: Ctx, Line: OrigDL.getLine(), Column: OrigDL.getCol(), |
| 1828 | Scope: OrigDL.getScope(), InlinedAt: IA, ImplicitCode: OrigDL.isImplicitCode(), |
| 1829 | AtomGroup: OrigDL->getAtomGroup(), AtomRank: OrigDL->getAtomRank()); |
| 1830 | } |
| 1831 | |
| 1832 | /// Update inlined instructions' line numbers to |
| 1833 | /// to encode location where these instructions are inlined. |
| 1834 | static void fixupLineNumbers(Function *Fn, Function::iterator FI, |
| 1835 | Instruction *TheCall, bool CalleeHasDebugInfo) { |
| 1836 | if (!TheCall->getDebugLoc()) |
| 1837 | return; |
| 1838 | |
| 1839 | // Don't propagate the source location atom from the call to inlined nodebug |
| 1840 | // instructions, and avoid putting it in the InlinedAt field of inlined |
| 1841 | // not-nodebug instructions. FIXME: Possibly worth transferring/generating |
| 1842 | // an atom for the returned value, otherwise we miss stepping on inlined |
| 1843 | // nodebug functions (which is different to existing behaviour). |
| 1844 | DebugLoc TheCallDL = TheCall->getDebugLoc()->getWithoutAtom(); |
| 1845 | |
| 1846 | auto &Ctx = Fn->getContext(); |
| 1847 | DILocation *InlinedAtNode = TheCallDL; |
| 1848 | |
| 1849 | // Create a unique call site, not to be confused with any other call from the |
| 1850 | // same location. |
| 1851 | InlinedAtNode = DILocation::getDistinct( |
| 1852 | Context&: Ctx, Line: InlinedAtNode->getLine(), Column: InlinedAtNode->getColumn(), |
| 1853 | Scope: InlinedAtNode->getScope(), InlinedAt: InlinedAtNode->getInlinedAt()); |
| 1854 | |
| 1855 | // Cache the inlined-at nodes as they're built so they are reused, without |
| 1856 | // this every instruction's inlined-at chain would become distinct from each |
| 1857 | // other. |
| 1858 | DenseMap<const MDNode *, MDNode *> IANodes; |
| 1859 | |
| 1860 | // Check if we are not generating inline line tables and want to use |
| 1861 | // the call site location instead. |
| 1862 | bool NoInlineLineTables = Fn->hasFnAttribute(Kind: "no-inline-line-tables" ); |
| 1863 | |
| 1864 | // Helper-util for updating the metadata attached to an instruction. |
| 1865 | auto UpdateInst = [&](Instruction &I) { |
| 1866 | // Loop metadata needs to be updated so that the start and end locs |
| 1867 | // reference inlined-at locations. |
| 1868 | auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, |
| 1869 | &IANodes](Metadata *MD) -> Metadata * { |
| 1870 | if (auto *Loc = dyn_cast_or_null<DILocation>(Val: MD)) |
| 1871 | return inlineDebugLoc(OrigDL: Loc, InlinedAt: InlinedAtNode, Ctx, IANodes).get(); |
| 1872 | return MD; |
| 1873 | }; |
| 1874 | updateLoopMetadataDebugLocations(I, Updater: updateLoopInfoLoc); |
| 1875 | |
| 1876 | if (!NoInlineLineTables) |
| 1877 | if (DebugLoc DL = I.getDebugLoc()) { |
| 1878 | DebugLoc IDL = |
| 1879 | inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode, Ctx&: I.getContext(), IANodes); |
| 1880 | I.setDebugLoc(IDL); |
| 1881 | return; |
| 1882 | } |
| 1883 | |
| 1884 | if (CalleeHasDebugInfo && !NoInlineLineTables) |
| 1885 | return; |
| 1886 | |
| 1887 | // If the inlined instruction has no line number, or if inline info |
| 1888 | // is not being generated, make it look as if it originates from the call |
| 1889 | // location. This is important for ((__always_inline, __nodebug__)) |
| 1890 | // functions which must use caller location for all instructions in their |
| 1891 | // function body. |
| 1892 | |
| 1893 | // Don't update static allocas, as they may get moved later. |
| 1894 | if (auto *AI = dyn_cast<AllocaInst>(Val: &I)) |
| 1895 | if (allocaWouldBeStaticInEntry(AI)) |
| 1896 | return; |
| 1897 | |
| 1898 | // Do not force a debug loc for pseudo probes, since they do not need to |
| 1899 | // be debuggable, and also they are expected to have a zero/null dwarf |
| 1900 | // discriminator at this point which could be violated otherwise. |
| 1901 | if (isa<PseudoProbeInst>(Val: I)) |
| 1902 | return; |
| 1903 | |
| 1904 | I.setDebugLoc(TheCallDL); |
| 1905 | }; |
| 1906 | |
| 1907 | // Helper-util for updating debug-info records attached to instructions. |
| 1908 | auto UpdateDVR = [&](DbgRecord *DVR) { |
| 1909 | assert(DVR->getDebugLoc() && "Debug Value must have debug loc" ); |
| 1910 | if (NoInlineLineTables) { |
| 1911 | DVR->setDebugLoc(TheCallDL); |
| 1912 | return; |
| 1913 | } |
| 1914 | DebugLoc DL = DVR->getDebugLoc(); |
| 1915 | DebugLoc IDL = |
| 1916 | inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode, |
| 1917 | Ctx&: DVR->getMarker()->getParent()->getContext(), IANodes); |
| 1918 | DVR->setDebugLoc(IDL); |
| 1919 | }; |
| 1920 | |
| 1921 | // Iterate over all instructions, updating metadata and debug-info records. |
| 1922 | for (; FI != Fn->end(); ++FI) { |
| 1923 | for (Instruction &I : *FI) { |
| 1924 | UpdateInst(I); |
| 1925 | for (DbgRecord &DVR : I.getDbgRecordRange()) { |
| 1926 | UpdateDVR(&DVR); |
| 1927 | } |
| 1928 | } |
| 1929 | |
| 1930 | // Remove debug info records if we're not keeping inline info. |
| 1931 | if (NoInlineLineTables) { |
| 1932 | BasicBlock::iterator BI = FI->begin(); |
| 1933 | while (BI != FI->end()) { |
| 1934 | BI->dropDbgRecords(); |
| 1935 | ++BI; |
| 1936 | } |
| 1937 | } |
| 1938 | } |
| 1939 | } |
| 1940 | |
| 1941 | #undef DEBUG_TYPE |
| 1942 | #define DEBUG_TYPE "assignment-tracking" |
| 1943 | /// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB. |
| 1944 | static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL, |
| 1945 | const CallBase &CB) { |
| 1946 | at::StorageToVarsMap EscapedLocals; |
| 1947 | SmallPtrSet<const Value *, 4> SeenBases; |
| 1948 | |
| 1949 | LLVM_DEBUG( |
| 1950 | errs() << "# Finding caller local variables escaped by callee\n" ); |
| 1951 | for (const Value *Arg : CB.args()) { |
| 1952 | LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n" ); |
| 1953 | if (!Arg->getType()->isPointerTy()) { |
| 1954 | LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n" ); |
| 1955 | continue; |
| 1956 | } |
| 1957 | |
| 1958 | const Instruction *I = dyn_cast<Instruction>(Val: Arg); |
| 1959 | if (!I) { |
| 1960 | LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n" ); |
| 1961 | continue; |
| 1962 | } |
| 1963 | |
| 1964 | // Walk back to the base storage. |
| 1965 | assert(Arg->getType()->isPtrOrPtrVectorTy()); |
| 1966 | APInt TmpOffset(DL.getIndexTypeSizeInBits(Ty: Arg->getType()), 0, false); |
| 1967 | const AllocaInst *Base = dyn_cast<AllocaInst>( |
| 1968 | Val: Arg->stripAndAccumulateConstantOffsets(DL, Offset&: TmpOffset, AllowNonInbounds: true)); |
| 1969 | if (!Base) { |
| 1970 | LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n" ); |
| 1971 | continue; |
| 1972 | } |
| 1973 | |
| 1974 | assert(Base); |
| 1975 | LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n" ); |
| 1976 | // We only need to process each base address once - skip any duplicates. |
| 1977 | if (!SeenBases.insert(Ptr: Base).second) |
| 1978 | continue; |
| 1979 | |
| 1980 | // Find all local variables associated with the backing storage. |
| 1981 | auto CollectAssignsForStorage = [&](auto *DbgAssign) { |
| 1982 | // Skip variables from inlined functions - they are not local variables. |
| 1983 | if (DbgAssign->getDebugLoc().getInlinedAt()) |
| 1984 | return; |
| 1985 | LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n" ); |
| 1986 | EscapedLocals[Base].insert(X: at::VarRecord(DbgAssign)); |
| 1987 | }; |
| 1988 | for_each(Range: at::getAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage); |
| 1989 | for_each(Range: at::getDVRAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage); |
| 1990 | } |
| 1991 | return EscapedLocals; |
| 1992 | } |
| 1993 | |
| 1994 | static void trackInlinedStores(Function::iterator Start, Function::iterator End, |
| 1995 | const CallBase &CB) { |
| 1996 | LLVM_DEBUG(errs() << "trackInlinedStores into " |
| 1997 | << Start->getParent()->getName() << " from " |
| 1998 | << CB.getCalledFunction()->getName() << "\n" ); |
| 1999 | const DataLayout &DL = CB.getDataLayout(); |
| 2000 | at::trackAssignments(Start, End, Vars: collectEscapedLocals(DL, CB), DL); |
| 2001 | } |
| 2002 | |
| 2003 | /// Update inlined instructions' DIAssignID metadata. We need to do this |
| 2004 | /// otherwise a function inlined more than once into the same function |
| 2005 | /// will cause DIAssignID to be shared by many instructions. |
| 2006 | static void fixupAssignments(Function::iterator Start, Function::iterator End) { |
| 2007 | DenseMap<DIAssignID *, DIAssignID *> Map; |
| 2008 | // Loop over all the inlined instructions. If we find a DIAssignID |
| 2009 | // attachment or use, replace it with a new version. |
| 2010 | for (auto BBI = Start; BBI != End; ++BBI) { |
| 2011 | for (Instruction &I : *BBI) |
| 2012 | at::remapAssignID(Map, I); |
| 2013 | } |
| 2014 | } |
| 2015 | #undef DEBUG_TYPE |
| 2016 | #define DEBUG_TYPE "inline-function" |
| 2017 | |
| 2018 | /// Update the block frequencies of the caller after a callee has been inlined. |
| 2019 | /// |
| 2020 | /// Each block cloned into the caller has its block frequency scaled by the |
| 2021 | /// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of |
| 2022 | /// callee's entry block gets the same frequency as the callsite block and the |
| 2023 | /// relative frequencies of all cloned blocks remain the same after cloning. |
| 2024 | static void updateCallerBFI(BasicBlock *CallSiteBlock, |
| 2025 | const ValueToValueMapTy &VMap, |
| 2026 | BlockFrequencyInfo *CallerBFI, |
| 2027 | BlockFrequencyInfo *CalleeBFI, |
| 2028 | const BasicBlock &CalleeEntryBlock) { |
| 2029 | SmallPtrSet<BasicBlock *, 16> ClonedBBs; |
| 2030 | for (auto Entry : VMap) { |
| 2031 | if (!isa<BasicBlock>(Val: Entry.first) || !Entry.second) |
| 2032 | continue; |
| 2033 | auto *OrigBB = cast<BasicBlock>(Val: Entry.first); |
| 2034 | auto *ClonedBB = cast<BasicBlock>(Val: Entry.second); |
| 2035 | BlockFrequency Freq = CalleeBFI->getBlockFreq(BB: OrigBB); |
| 2036 | if (!ClonedBBs.insert(Ptr: ClonedBB).second) { |
| 2037 | // Multiple blocks in the callee might get mapped to one cloned block in |
| 2038 | // the caller since we prune the callee as we clone it. When that happens, |
| 2039 | // we want to use the maximum among the original blocks' frequencies. |
| 2040 | BlockFrequency NewFreq = CallerBFI->getBlockFreq(BB: ClonedBB); |
| 2041 | if (NewFreq > Freq) |
| 2042 | Freq = NewFreq; |
| 2043 | } |
| 2044 | CallerBFI->setBlockFreq(BB: ClonedBB, Freq); |
| 2045 | } |
| 2046 | BasicBlock *EntryClone = cast<BasicBlock>(Val: VMap.lookup(Val: &CalleeEntryBlock)); |
| 2047 | CallerBFI->setBlockFreqAndScale( |
| 2048 | ReferenceBB: EntryClone, Freq: CallerBFI->getBlockFreq(BB: CallSiteBlock), BlocksToScale&: ClonedBBs); |
| 2049 | } |
| 2050 | |
| 2051 | /// Update the branch metadata for cloned call instructions. |
| 2052 | static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, |
| 2053 | const ProfileCount &CalleeEntryCount, |
| 2054 | const CallBase &TheCall, ProfileSummaryInfo *PSI, |
| 2055 | BlockFrequencyInfo *CallerBFI) { |
| 2056 | if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1) |
| 2057 | return; |
| 2058 | auto CallSiteCount = |
| 2059 | PSI ? PSI->getProfileCount(CallInst: TheCall, BFI: CallerBFI) : std::nullopt; |
| 2060 | int64_t CallCount = |
| 2061 | std::min(a: CallSiteCount.value_or(u: 0), b: CalleeEntryCount.getCount()); |
| 2062 | updateProfileCallee(Callee, EntryDelta: -CallCount, VMap: &VMap); |
| 2063 | } |
| 2064 | |
| 2065 | void llvm::updateProfileCallee( |
| 2066 | Function *Callee, int64_t EntryDelta, |
| 2067 | const ValueMap<const Value *, WeakTrackingVH> *VMap) { |
| 2068 | auto CalleeCount = Callee->getEntryCount(); |
| 2069 | if (!CalleeCount) |
| 2070 | return; |
| 2071 | |
| 2072 | const uint64_t PriorEntryCount = CalleeCount->getCount(); |
| 2073 | |
| 2074 | // Since CallSiteCount is an estimate, it could exceed the original callee |
| 2075 | // count and has to be set to 0 so guard against underflow. |
| 2076 | const uint64_t NewEntryCount = |
| 2077 | (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount) |
| 2078 | ? 0 |
| 2079 | : PriorEntryCount + EntryDelta; |
| 2080 | |
| 2081 | auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount, |
| 2082 | const uint64_t PriorEntryCount) { |
| 2083 | Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB); |
| 2084 | if (VPtr) |
| 2085 | scaleProfData(I&: *VPtr, S: NewEntryCount, T: PriorEntryCount); |
| 2086 | }; |
| 2087 | |
| 2088 | // During inlining ? |
| 2089 | if (VMap) { |
| 2090 | uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount; |
| 2091 | for (auto Entry : *VMap) { |
| 2092 | if (isa<CallInst>(Val: Entry.first)) |
| 2093 | if (auto *CI = dyn_cast_or_null<CallInst>(Val: Entry.second)) { |
| 2094 | CI->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount); |
| 2095 | updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount); |
| 2096 | } |
| 2097 | |
| 2098 | if (isa<InvokeInst>(Val: Entry.first)) |
| 2099 | if (auto *II = dyn_cast_or_null<InvokeInst>(Val: Entry.second)) { |
| 2100 | II->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount); |
| 2101 | updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount); |
| 2102 | } |
| 2103 | } |
| 2104 | } |
| 2105 | |
| 2106 | if (EntryDelta) { |
| 2107 | Callee->setEntryCount(Count: NewEntryCount); |
| 2108 | |
| 2109 | for (BasicBlock &BB : *Callee) |
| 2110 | // No need to update the callsite if it is pruned during inlining. |
| 2111 | if (!VMap || VMap->count(Val: &BB)) |
| 2112 | for (Instruction &I : BB) { |
| 2113 | if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) { |
| 2114 | CI->updateProfWeight(S: NewEntryCount, T: PriorEntryCount); |
| 2115 | updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount); |
| 2116 | } |
| 2117 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &I)) { |
| 2118 | II->updateProfWeight(S: NewEntryCount, T: PriorEntryCount); |
| 2119 | updateVTableProfWeight(II, NewEntryCount, PriorEntryCount); |
| 2120 | } |
| 2121 | } |
| 2122 | } |
| 2123 | } |
| 2124 | |
| 2125 | /// An operand bundle "clang.arc.attachedcall" on a call indicates the call |
| 2126 | /// result is implicitly consumed by a call to retainRV or claimRV immediately |
| 2127 | /// after the call. This function inlines the retainRV/claimRV calls. |
| 2128 | /// |
| 2129 | /// There are three cases to consider: |
| 2130 | /// |
| 2131 | /// 1. If there is a call to autoreleaseRV that takes a pointer to the returned |
| 2132 | /// object in the callee return block, the autoreleaseRV call and the |
| 2133 | /// retainRV/claimRV call in the caller cancel out. If the call in the caller |
| 2134 | /// is a claimRV call, a call to objc_release is emitted. |
| 2135 | /// |
| 2136 | /// 2. If there is a call in the callee return block that doesn't have operand |
| 2137 | /// bundle "clang.arc.attachedcall", the operand bundle on the original call |
| 2138 | /// is transferred to the call in the callee. |
| 2139 | /// |
| 2140 | /// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is |
| 2141 | /// a retainRV call. |
| 2142 | static void |
| 2143 | inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, |
| 2144 | const SmallVectorImpl<ReturnInst *> &Returns) { |
| 2145 | assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function" ); |
| 2146 | bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV, |
| 2147 | IsUnsafeClaimRV = !IsRetainRV; |
| 2148 | |
| 2149 | for (auto *RI : Returns) { |
| 2150 | Value *RetOpnd = objcarc::GetRCIdentityRoot(V: RI->getOperand(i_nocapture: 0)); |
| 2151 | bool InsertRetainCall = IsRetainRV; |
| 2152 | IRBuilder<> Builder(RI->getContext()); |
| 2153 | |
| 2154 | // Walk backwards through the basic block looking for either a matching |
| 2155 | // autoreleaseRV call or an unannotated call. |
| 2156 | auto InstRange = llvm::make_range(x: ++(RI->getIterator().getReverse()), |
| 2157 | y: RI->getParent()->rend()); |
| 2158 | for (Instruction &I : llvm::make_early_inc_range(Range&: InstRange)) { |
| 2159 | // Ignore casts. |
| 2160 | if (isa<CastInst>(Val: I)) |
| 2161 | continue; |
| 2162 | |
| 2163 | if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) { |
| 2164 | if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue || |
| 2165 | !II->use_empty() || |
| 2166 | objcarc::GetRCIdentityRoot(V: II->getOperand(i_nocapture: 0)) != RetOpnd) |
| 2167 | break; |
| 2168 | |
| 2169 | // If we've found a matching authoreleaseRV call: |
| 2170 | // - If claimRV is attached to the call, insert a call to objc_release |
| 2171 | // and erase the autoreleaseRV call. |
| 2172 | // - If retainRV is attached to the call, just erase the autoreleaseRV |
| 2173 | // call. |
| 2174 | if (IsUnsafeClaimRV) { |
| 2175 | Builder.SetInsertPoint(II); |
| 2176 | Builder.CreateIntrinsic(ID: Intrinsic::objc_release, Args: RetOpnd); |
| 2177 | } |
| 2178 | II->eraseFromParent(); |
| 2179 | InsertRetainCall = false; |
| 2180 | break; |
| 2181 | } |
| 2182 | |
| 2183 | auto *CI = dyn_cast<CallInst>(Val: &I); |
| 2184 | |
| 2185 | if (!CI) |
| 2186 | break; |
| 2187 | |
| 2188 | if (objcarc::GetRCIdentityRoot(V: CI) != RetOpnd || |
| 2189 | objcarc::hasAttachedCallOpBundle(CB: CI)) |
| 2190 | break; |
| 2191 | |
| 2192 | // If we've found an unannotated call that defines RetOpnd, add a |
| 2193 | // "clang.arc.attachedcall" operand bundle. |
| 2194 | Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(CB: &CB)}; |
| 2195 | OperandBundleDef OB("clang.arc.attachedcall" , BundleArgs); |
| 2196 | auto *NewCall = CallBase::addOperandBundle( |
| 2197 | CB: CI, ID: LLVMContext::OB_clang_arc_attachedcall, OB, InsertPt: CI->getIterator()); |
| 2198 | NewCall->copyMetadata(SrcInst: *CI); |
| 2199 | CI->replaceAllUsesWith(V: NewCall); |
| 2200 | CI->eraseFromParent(); |
| 2201 | InsertRetainCall = false; |
| 2202 | break; |
| 2203 | } |
| 2204 | |
| 2205 | if (InsertRetainCall) { |
| 2206 | // The retainRV is attached to the call and we've failed to find a |
| 2207 | // matching autoreleaseRV or an annotated call in the callee. Emit a call |
| 2208 | // to objc_retain. |
| 2209 | Builder.SetInsertPoint(RI); |
| 2210 | Builder.CreateIntrinsic(ID: Intrinsic::objc_retain, Args: RetOpnd); |
| 2211 | } |
| 2212 | } |
| 2213 | } |
| 2214 | |
| 2215 | // In contextual profiling, when an inline succeeds, we want to remap the |
| 2216 | // indices of the callee into the index space of the caller. We can't just leave |
| 2217 | // them as-is because the same callee may appear in other places in this caller |
| 2218 | // (other callsites), and its (callee's) counters and sub-contextual profile |
| 2219 | // tree would be potentially different. |
| 2220 | // Not all BBs of the callee may survive the opportunistic DCE InlineFunction |
| 2221 | // does (same goes for callsites in the callee). |
| 2222 | // We will return a pair of vectors, one for basic block IDs and one for |
| 2223 | // callsites. For such a vector V, V[Idx] will be -1 if the callee |
| 2224 | // instrumentation with index Idx did not survive inlining, and a new value |
| 2225 | // otherwise. |
| 2226 | // This function will update the caller's instrumentation intrinsics |
| 2227 | // accordingly, mapping indices as described above. We also replace the "name" |
| 2228 | // operand because we use it to distinguish between "own" instrumentation and |
| 2229 | // "from callee" instrumentation when performing the traversal of the CFG of the |
| 2230 | // caller. We traverse depth-first from the callsite's BB and up to the point we |
| 2231 | // hit BBs owned by the caller. |
| 2232 | // The return values will be then used to update the contextual |
| 2233 | // profile. Note: we only update the "name" and "index" operands in the |
| 2234 | // instrumentation intrinsics, we leave the hash and total nr of indices as-is, |
| 2235 | // it's not worth updating those. |
| 2236 | static std::pair<std::vector<int64_t>, std::vector<int64_t>> |
| 2237 | remapIndices(Function &Caller, BasicBlock *StartBB, |
| 2238 | PGOContextualProfile &CtxProf, uint32_t CalleeCounters, |
| 2239 | uint32_t CalleeCallsites) { |
| 2240 | // We'll allocate a new ID to imported callsite counters and callsites. We're |
| 2241 | // using -1 to indicate a counter we delete. Most likely the entry ID, for |
| 2242 | // example, will be deleted - we don't want 2 IDs in the same BB, and the |
| 2243 | // entry would have been cloned in the callsite's old BB. |
| 2244 | std::vector<int64_t> CalleeCounterMap; |
| 2245 | std::vector<int64_t> CalleeCallsiteMap; |
| 2246 | CalleeCounterMap.resize(new_size: CalleeCounters, x: -1); |
| 2247 | CalleeCallsiteMap.resize(new_size: CalleeCallsites, x: -1); |
| 2248 | |
| 2249 | auto RewriteInstrIfNeeded = [&](InstrProfIncrementInst &Ins) -> bool { |
| 2250 | if (Ins.getNameValue() == &Caller) |
| 2251 | return false; |
| 2252 | const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue()); |
| 2253 | if (CalleeCounterMap[OldID] == -1) |
| 2254 | CalleeCounterMap[OldID] = CtxProf.allocateNextCounterIndex(F: Caller); |
| 2255 | const auto NewID = static_cast<uint32_t>(CalleeCounterMap[OldID]); |
| 2256 | |
| 2257 | Ins.setNameValue(&Caller); |
| 2258 | Ins.setIndex(NewID); |
| 2259 | return true; |
| 2260 | }; |
| 2261 | |
| 2262 | auto RewriteCallsiteInsIfNeeded = [&](InstrProfCallsite &Ins) -> bool { |
| 2263 | if (Ins.getNameValue() == &Caller) |
| 2264 | return false; |
| 2265 | const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue()); |
| 2266 | if (CalleeCallsiteMap[OldID] == -1) |
| 2267 | CalleeCallsiteMap[OldID] = CtxProf.allocateNextCallsiteIndex(F: Caller); |
| 2268 | const auto NewID = static_cast<uint32_t>(CalleeCallsiteMap[OldID]); |
| 2269 | |
| 2270 | Ins.setNameValue(&Caller); |
| 2271 | Ins.setIndex(NewID); |
| 2272 | return true; |
| 2273 | }; |
| 2274 | |
| 2275 | std::deque<BasicBlock *> Worklist; |
| 2276 | DenseSet<const BasicBlock *> Seen; |
| 2277 | // We will traverse the BBs starting from the callsite BB. The callsite BB |
| 2278 | // will have at least a BB ID - maybe its own, and in any case the one coming |
| 2279 | // from the cloned function's entry BB. The other BBs we'll start seeing from |
| 2280 | // there on may or may not have BB IDs. BBs with IDs belonging to our caller |
| 2281 | // are definitely not coming from the imported function and form a boundary |
| 2282 | // past which we don't need to traverse anymore. BBs may have no |
| 2283 | // instrumentation (because we originally inserted instrumentation as per |
| 2284 | // MST), in which case we'll traverse past them. An invariant we'll keep is |
| 2285 | // that a BB will have at most 1 BB ID. For example, in the callsite BB, we |
| 2286 | // will delete the callee BB's instrumentation. This doesn't result in |
| 2287 | // information loss: the entry BB of the callee will have the same count as |
| 2288 | // the callsite's BB. At the end of this traversal, all the callee's |
| 2289 | // instrumentation would be mapped into the caller's instrumentation index |
| 2290 | // space. Some of the callee's counters may be deleted (as mentioned, this |
| 2291 | // should result in no loss of information). |
| 2292 | Worklist.push_back(x: StartBB); |
| 2293 | while (!Worklist.empty()) { |
| 2294 | auto *BB = Worklist.front(); |
| 2295 | Worklist.pop_front(); |
| 2296 | bool Changed = false; |
| 2297 | auto *BBID = CtxProfAnalysis::getBBInstrumentation(BB&: *BB); |
| 2298 | if (BBID) { |
| 2299 | Changed |= RewriteInstrIfNeeded(*BBID); |
| 2300 | // this may be the entryblock from the inlined callee, coming into a BB |
| 2301 | // that didn't have instrumentation because of MST decisions. Let's make |
| 2302 | // sure it's placed accordingly. This is a noop elsewhere. |
| 2303 | BBID->moveBefore(InsertPos: BB->getFirstInsertionPt()); |
| 2304 | } |
| 2305 | for (auto &I : llvm::make_early_inc_range(Range&: *BB)) { |
| 2306 | if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Val: &I)) { |
| 2307 | if (isa<InstrProfIncrementInstStep>(Val: Inc)) { |
| 2308 | // Step instrumentation is used for select instructions. Inlining may |
| 2309 | // have propagated a constant resulting in the condition of the select |
| 2310 | // being resolved, case in which function cloning resolves the value |
| 2311 | // of the select, and elides the select instruction. If that is the |
| 2312 | // case, the step parameter of the instrumentation will reflect that. |
| 2313 | // We can delete the instrumentation in that case. |
| 2314 | if (isa<Constant>(Val: Inc->getStep())) { |
| 2315 | assert(!Inc->getNextNode() || !isa<SelectInst>(Inc->getNextNode())); |
| 2316 | Inc->eraseFromParent(); |
| 2317 | } else { |
| 2318 | assert(isa_and_nonnull<SelectInst>(Inc->getNextNode())); |
| 2319 | RewriteInstrIfNeeded(*Inc); |
| 2320 | } |
| 2321 | } else if (Inc != BBID) { |
| 2322 | // If we're here it means that the BB had more than 1 IDs, presumably |
| 2323 | // some coming from the callee. We "made up our mind" to keep the |
| 2324 | // first one (which may or may not have been originally the caller's). |
| 2325 | // All the others are superfluous and we delete them. |
| 2326 | Inc->eraseFromParent(); |
| 2327 | Changed = true; |
| 2328 | } |
| 2329 | } else if (auto *CS = dyn_cast<InstrProfCallsite>(Val: &I)) { |
| 2330 | Changed |= RewriteCallsiteInsIfNeeded(*CS); |
| 2331 | } |
| 2332 | } |
| 2333 | if (!BBID || Changed) |
| 2334 | for (auto *Succ : successors(BB)) |
| 2335 | if (Seen.insert(V: Succ).second) |
| 2336 | Worklist.push_back(x: Succ); |
| 2337 | } |
| 2338 | |
| 2339 | assert(!llvm::is_contained(CalleeCounterMap, 0) && |
| 2340 | "Counter index mapping should be either to -1 or to non-zero index, " |
| 2341 | "because the 0 " |
| 2342 | "index corresponds to the entry BB of the caller" ); |
| 2343 | assert(!llvm::is_contained(CalleeCallsiteMap, 0) && |
| 2344 | "Callsite index mapping should be either to -1 or to non-zero index, " |
| 2345 | "because there should have been at least a callsite - the inlined one " |
| 2346 | "- which would have had a 0 index." ); |
| 2347 | |
| 2348 | return {std::move(CalleeCounterMap), std::move(CalleeCallsiteMap)}; |
| 2349 | } |
| 2350 | |
| 2351 | // Inline. If successful, update the contextual profile (if a valid one is |
| 2352 | // given). |
| 2353 | // The contextual profile data is organized in trees, as follows: |
| 2354 | // - each node corresponds to a function |
| 2355 | // - the root of each tree corresponds to an "entrypoint" - e.g. |
| 2356 | // RPC handler for server side |
| 2357 | // - the path from the root to a node is a particular call path |
| 2358 | // - the counters stored in a node are counter values observed in that |
| 2359 | // particular call path ("context") |
| 2360 | // - the edges between nodes are annotated with callsite IDs. |
| 2361 | // |
| 2362 | // Updating the contextual profile after an inlining means, at a high level, |
| 2363 | // copying over the data of the callee, **intentionally without any value |
| 2364 | // scaling**, and copying over the callees of the inlined callee. |
| 2365 | llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, |
| 2366 | PGOContextualProfile &CtxProf, |
| 2367 | bool MergeAttributes, |
| 2368 | AAResults *CalleeAAR, |
| 2369 | bool InsertLifetime, |
| 2370 | Function *ForwardVarArgsTo) { |
| 2371 | if (!CtxProf.isInSpecializedModule()) |
| 2372 | return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, |
| 2373 | ForwardVarArgsTo); |
| 2374 | |
| 2375 | auto &Caller = *CB.getCaller(); |
| 2376 | auto &Callee = *CB.getCalledFunction(); |
| 2377 | auto *StartBB = CB.getParent(); |
| 2378 | |
| 2379 | // Get some preliminary data about the callsite before it might get inlined. |
| 2380 | // Inlining shouldn't delete the callee, but it's cleaner (and low-cost) to |
| 2381 | // get this data upfront and rely less on InlineFunction's behavior. |
| 2382 | const auto CalleeGUID = AssignGUIDPass::getGUID(F: Callee); |
| 2383 | auto *CallsiteIDIns = CtxProfAnalysis::getCallsiteInstrumentation(CB); |
| 2384 | const auto CallsiteID = |
| 2385 | static_cast<uint32_t>(CallsiteIDIns->getIndex()->getZExtValue()); |
| 2386 | |
| 2387 | const auto NumCalleeCounters = CtxProf.getNumCounters(F: Callee); |
| 2388 | const auto NumCalleeCallsites = CtxProf.getNumCallsites(F: Callee); |
| 2389 | |
| 2390 | auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, |
| 2391 | ForwardVarArgsTo); |
| 2392 | if (!Ret.isSuccess()) |
| 2393 | return Ret; |
| 2394 | |
| 2395 | // Inlining succeeded, we don't need the instrumentation of the inlined |
| 2396 | // callsite. |
| 2397 | CallsiteIDIns->eraseFromParent(); |
| 2398 | |
| 2399 | // Assinging Maps and then capturing references into it in the lambda because |
| 2400 | // captured structured bindings are a C++20 extension. We do also need a |
| 2401 | // capture here, though. |
| 2402 | const auto IndicesMaps = remapIndices(Caller, StartBB, CtxProf, |
| 2403 | CalleeCounters: NumCalleeCounters, CalleeCallsites: NumCalleeCallsites); |
| 2404 | const uint32_t = CtxProf.getNumCounters(F: Caller); |
| 2405 | |
| 2406 | auto Updater = [&](PGOCtxProfContext &Ctx) { |
| 2407 | assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller)); |
| 2408 | const auto &[CalleeCounterMap, CalleeCallsiteMap] = IndicesMaps; |
| 2409 | assert( |
| 2410 | (Ctx.counters().size() + |
| 2411 | llvm::count_if(CalleeCounterMap, [](auto V) { return V != -1; }) == |
| 2412 | NewCountersSize) && |
| 2413 | "The caller's counters size should have grown by the number of new " |
| 2414 | "distinct counters inherited from the inlined callee." ); |
| 2415 | Ctx.resizeCounters(Size: NewCountersSize); |
| 2416 | // If the callsite wasn't exercised in this context, the value of the |
| 2417 | // counters coming from it is 0 - which it is right now, after resizing them |
| 2418 | // - and so we're done. |
| 2419 | auto CSIt = Ctx.callsites().find(x: CallsiteID); |
| 2420 | if (CSIt == Ctx.callsites().end()) |
| 2421 | return; |
| 2422 | auto CalleeCtxIt = CSIt->second.find(x: CalleeGUID); |
| 2423 | // The callsite was exercised, but not with this callee (so presumably this |
| 2424 | // is an indirect callsite). Again, we're done here. |
| 2425 | if (CalleeCtxIt == CSIt->second.end()) |
| 2426 | return; |
| 2427 | |
| 2428 | // Let's pull in the counter values and the subcontexts coming from the |
| 2429 | // inlined callee. |
| 2430 | auto &CalleeCtx = CalleeCtxIt->second; |
| 2431 | assert(CalleeCtx.guid() == CalleeGUID); |
| 2432 | |
| 2433 | for (auto I = 0U; I < CalleeCtx.counters().size(); ++I) { |
| 2434 | const int64_t NewIndex = CalleeCounterMap[I]; |
| 2435 | if (NewIndex >= 0) { |
| 2436 | assert(NewIndex != 0 && "counter index mapping shouldn't happen to a 0 " |
| 2437 | "index, that's the caller's entry BB" ); |
| 2438 | Ctx.counters()[NewIndex] = CalleeCtx.counters()[I]; |
| 2439 | } |
| 2440 | } |
| 2441 | for (auto &[I, OtherSet] : CalleeCtx.callsites()) { |
| 2442 | const int64_t NewCSIdx = CalleeCallsiteMap[I]; |
| 2443 | if (NewCSIdx >= 0) { |
| 2444 | assert(NewCSIdx != 0 && |
| 2445 | "callsite index mapping shouldn't happen to a 0 index, the " |
| 2446 | "caller must've had at least one callsite (with such an index)" ); |
| 2447 | Ctx.ingestAllContexts(CSId: NewCSIdx, Other: std::move(OtherSet)); |
| 2448 | } |
| 2449 | } |
| 2450 | // We know the traversal is preorder, so it wouldn't have yet looked at the |
| 2451 | // sub-contexts of this context that it's currently visiting. Meaning, the |
| 2452 | // erase below invalidates no iterators. |
| 2453 | auto Deleted = Ctx.callsites().erase(x: CallsiteID); |
| 2454 | assert(Deleted); |
| 2455 | (void)Deleted; |
| 2456 | }; |
| 2457 | CtxProf.update(Updater, F: Caller); |
| 2458 | return Ret; |
| 2459 | } |
| 2460 | |
| 2461 | /// This function inlines the called function into the basic block of the |
| 2462 | /// caller. This returns false if it is not possible to inline this call. |
| 2463 | /// The program is still in a well defined state if this occurs though. |
| 2464 | /// |
| 2465 | /// Note that this only does one level of inlining. For example, if the |
| 2466 | /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now |
| 2467 | /// exists in the instruction stream. Similarly this will inline a recursive |
| 2468 | /// function by one level. |
| 2469 | llvm::InlineResult llvm::(CallBase &CB, InlineFunctionInfo &IFI, |
| 2470 | bool MergeAttributes, |
| 2471 | AAResults *CalleeAAR, |
| 2472 | bool InsertLifetime, |
| 2473 | Function *ForwardVarArgsTo, |
| 2474 | OptimizationRemarkEmitter *ORE) { |
| 2475 | assert(CB.getParent() && CB.getFunction() && "Instruction not in function!" ); |
| 2476 | |
| 2477 | // FIXME: we don't inline callbr yet. |
| 2478 | if (isa<CallBrInst>(Val: CB)) |
| 2479 | return InlineResult::failure(Reason: "We don't inline callbr yet." ); |
| 2480 | |
| 2481 | // If IFI has any state in it, zap it before we fill it in. |
| 2482 | IFI.reset(); |
| 2483 | |
| 2484 | Function *CalledFunc = CB.getCalledFunction(); |
| 2485 | if (!CalledFunc || // Can't inline external function or indirect |
| 2486 | CalledFunc->isDeclaration()) // call! |
| 2487 | return InlineResult::failure(Reason: "external or indirect" ); |
| 2488 | |
| 2489 | // The inliner does not know how to inline through calls with operand bundles |
| 2490 | // in general ... |
| 2491 | Value *ConvergenceControlToken = nullptr; |
| 2492 | if (CB.hasOperandBundles()) { |
| 2493 | for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) { |
| 2494 | auto OBUse = CB.getOperandBundleAt(Index: i); |
| 2495 | uint32_t Tag = OBUse.getTagID(); |
| 2496 | // ... but it knows how to inline through "deopt" operand bundles ... |
| 2497 | if (Tag == LLVMContext::OB_deopt) |
| 2498 | continue; |
| 2499 | // ... and "funclet" operand bundles. |
| 2500 | if (Tag == LLVMContext::OB_funclet) |
| 2501 | continue; |
| 2502 | if (Tag == LLVMContext::OB_clang_arc_attachedcall) |
| 2503 | continue; |
| 2504 | if (Tag == LLVMContext::OB_kcfi) |
| 2505 | continue; |
| 2506 | if (Tag == LLVMContext::OB_convergencectrl) { |
| 2507 | ConvergenceControlToken = OBUse.Inputs[0].get(); |
| 2508 | continue; |
| 2509 | } |
| 2510 | |
| 2511 | return InlineResult::failure(Reason: "unsupported operand bundle" ); |
| 2512 | } |
| 2513 | } |
| 2514 | |
| 2515 | // FIXME: The check below is redundant and incomplete. According to spec, if a |
| 2516 | // convergent call is missing a token, then the caller is using uncontrolled |
| 2517 | // convergence. If the callee has an entry intrinsic, then the callee is using |
| 2518 | // controlled convergence, and the call cannot be inlined. A proper |
| 2519 | // implemenation of this check requires a whole new analysis that identifies |
| 2520 | // convergence in every function. For now, we skip that and just do this one |
| 2521 | // cursory check. The underlying assumption is that in a compiler flow that |
| 2522 | // fully implements convergence control tokens, there is no mixing of |
| 2523 | // controlled and uncontrolled convergent operations in the whole program. |
| 2524 | if (CB.isConvergent()) { |
| 2525 | if (!ConvergenceControlToken && |
| 2526 | getConvergenceEntry(BB&: CalledFunc->getEntryBlock())) { |
| 2527 | return InlineResult::failure( |
| 2528 | Reason: "convergent call needs convergencectrl operand" ); |
| 2529 | } |
| 2530 | } |
| 2531 | |
| 2532 | // If the call to the callee cannot throw, set the 'nounwind' flag on any |
| 2533 | // calls that we inline. |
| 2534 | bool MarkNoUnwind = CB.doesNotThrow(); |
| 2535 | |
| 2536 | BasicBlock *OrigBB = CB.getParent(); |
| 2537 | Function *Caller = OrigBB->getParent(); |
| 2538 | |
| 2539 | // GC poses two hazards to inlining, which only occur when the callee has GC: |
| 2540 | // 1. If the caller has no GC, then the callee's GC must be propagated to the |
| 2541 | // caller. |
| 2542 | // 2. If the caller has a differing GC, it is invalid to inline. |
| 2543 | if (CalledFunc->hasGC()) { |
| 2544 | if (!Caller->hasGC()) |
| 2545 | Caller->setGC(CalledFunc->getGC()); |
| 2546 | else if (CalledFunc->getGC() != Caller->getGC()) |
| 2547 | return InlineResult::failure(Reason: "incompatible GC" ); |
| 2548 | } |
| 2549 | |
| 2550 | // Get the personality function from the callee if it contains a landing pad. |
| 2551 | Constant *CalledPersonality = |
| 2552 | CalledFunc->hasPersonalityFn() |
| 2553 | ? CalledFunc->getPersonalityFn()->stripPointerCasts() |
| 2554 | : nullptr; |
| 2555 | |
| 2556 | // Find the personality function used by the landing pads of the caller. If it |
| 2557 | // exists, then check to see that it matches the personality function used in |
| 2558 | // the callee. |
| 2559 | Constant *CallerPersonality = |
| 2560 | Caller->hasPersonalityFn() |
| 2561 | ? Caller->getPersonalityFn()->stripPointerCasts() |
| 2562 | : nullptr; |
| 2563 | if (CalledPersonality) { |
| 2564 | if (!CallerPersonality) |
| 2565 | Caller->setPersonalityFn(CalledPersonality); |
| 2566 | // If the personality functions match, then we can perform the |
| 2567 | // inlining. Otherwise, we can't inline. |
| 2568 | // TODO: This isn't 100% true. Some personality functions are proper |
| 2569 | // supersets of others and can be used in place of the other. |
| 2570 | else if (CalledPersonality != CallerPersonality) |
| 2571 | return InlineResult::failure(Reason: "incompatible personality" ); |
| 2572 | } |
| 2573 | |
| 2574 | // We need to figure out which funclet the callsite was in so that we may |
| 2575 | // properly nest the callee. |
| 2576 | Instruction *CallSiteEHPad = nullptr; |
| 2577 | if (CallerPersonality) { |
| 2578 | EHPersonality Personality = classifyEHPersonality(Pers: CallerPersonality); |
| 2579 | if (isScopedEHPersonality(Pers: Personality)) { |
| 2580 | std::optional<OperandBundleUse> ParentFunclet = |
| 2581 | CB.getOperandBundle(ID: LLVMContext::OB_funclet); |
| 2582 | if (ParentFunclet) |
| 2583 | CallSiteEHPad = cast<FuncletPadInst>(Val: ParentFunclet->Inputs.front()); |
| 2584 | |
| 2585 | // OK, the inlining site is legal. What about the target function? |
| 2586 | |
| 2587 | if (CallSiteEHPad) { |
| 2588 | if (Personality == EHPersonality::MSVC_CXX) { |
| 2589 | // The MSVC personality cannot tolerate catches getting inlined into |
| 2590 | // cleanup funclets. |
| 2591 | if (isa<CleanupPadInst>(Val: CallSiteEHPad)) { |
| 2592 | // Ok, the call site is within a cleanuppad. Let's check the callee |
| 2593 | // for catchpads. |
| 2594 | for (const BasicBlock &CalledBB : *CalledFunc) { |
| 2595 | if (isa<CatchSwitchInst>(Val: CalledBB.getFirstNonPHIIt())) |
| 2596 | return InlineResult::failure(Reason: "catch in cleanup funclet" ); |
| 2597 | } |
| 2598 | } |
| 2599 | } else if (isAsynchronousEHPersonality(Pers: Personality)) { |
| 2600 | // SEH is even less tolerant, there may not be any sort of exceptional |
| 2601 | // funclet in the callee. |
| 2602 | for (const BasicBlock &CalledBB : *CalledFunc) { |
| 2603 | if (CalledBB.isEHPad()) |
| 2604 | return InlineResult::failure(Reason: "SEH in cleanup funclet" ); |
| 2605 | } |
| 2606 | } |
| 2607 | } |
| 2608 | } |
| 2609 | } |
| 2610 | |
| 2611 | // Determine if we are dealing with a call in an EHPad which does not unwind |
| 2612 | // to caller. |
| 2613 | bool EHPadForCallUnwindsLocally = false; |
| 2614 | if (CallSiteEHPad && isa<CallInst>(Val: CB)) { |
| 2615 | UnwindDestMemoTy FuncletUnwindMap; |
| 2616 | Value *CallSiteUnwindDestToken = |
| 2617 | getUnwindDestToken(EHPad: CallSiteEHPad, MemoMap&: FuncletUnwindMap); |
| 2618 | |
| 2619 | EHPadForCallUnwindsLocally = |
| 2620 | CallSiteUnwindDestToken && |
| 2621 | !isa<ConstantTokenNone>(Val: CallSiteUnwindDestToken); |
| 2622 | } |
| 2623 | |
| 2624 | // Get an iterator to the last basic block in the function, which will have |
| 2625 | // the new function inlined after it. |
| 2626 | Function::iterator LastBlock = --Caller->end(); |
| 2627 | |
| 2628 | // Make sure to capture all of the return instructions from the cloned |
| 2629 | // function. |
| 2630 | SmallVector<ReturnInst*, 8> Returns; |
| 2631 | ClonedCodeInfo InlinedFunctionInfo; |
| 2632 | Function::iterator FirstNewBlock; |
| 2633 | |
| 2634 | { // Scope to destroy VMap after cloning. |
| 2635 | ValueToValueMapTy VMap; |
| 2636 | struct ByValInit { |
| 2637 | Value *Dst; |
| 2638 | Value *Src; |
| 2639 | MaybeAlign SrcAlign; |
| 2640 | Type *Ty; |
| 2641 | }; |
| 2642 | // Keep a list of tuples (dst, src, src_align) to emit byval |
| 2643 | // initializations. Src Alignment is only available though the callbase, |
| 2644 | // therefore has to be saved. |
| 2645 | SmallVector<ByValInit, 4> ByValInits; |
| 2646 | |
| 2647 | // When inlining a function that contains noalias scope metadata, |
| 2648 | // this metadata needs to be cloned so that the inlined blocks |
| 2649 | // have different "unique scopes" at every call site. |
| 2650 | // Track the metadata that must be cloned. Do this before other changes to |
| 2651 | // the function, so that we do not get in trouble when inlining caller == |
| 2652 | // callee. |
| 2653 | ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction()); |
| 2654 | |
| 2655 | auto &DL = Caller->getDataLayout(); |
| 2656 | |
| 2657 | // Calculate the vector of arguments to pass into the function cloner, which |
| 2658 | // matches up the formal to the actual argument values. |
| 2659 | auto AI = CB.arg_begin(); |
| 2660 | unsigned ArgNo = 0; |
| 2661 | for (Function::arg_iterator I = CalledFunc->arg_begin(), |
| 2662 | E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { |
| 2663 | Value *ActualArg = *AI; |
| 2664 | |
| 2665 | // When byval arguments actually inlined, we need to make the copy implied |
| 2666 | // by them explicit. However, we don't do this if the callee is readonly |
| 2667 | // or readnone, because the copy would be unneeded: the callee doesn't |
| 2668 | // modify the struct. |
| 2669 | if (CB.isByValArgument(ArgNo)) { |
| 2670 | ActualArg = HandleByValArgument(ByValType: CB.getParamByValType(ArgNo), Arg: ActualArg, |
| 2671 | TheCall: &CB, CalledFunc, IFI, |
| 2672 | ByValAlignment: CalledFunc->getParamAlign(ArgNo)); |
| 2673 | if (ActualArg != *AI) |
| 2674 | ByValInits.push_back(Elt: {.Dst: ActualArg, .Src: (Value *)*AI, |
| 2675 | .SrcAlign: CB.getParamAlign(ArgNo), |
| 2676 | .Ty: CB.getParamByValType(ArgNo)}); |
| 2677 | } |
| 2678 | |
| 2679 | VMap[&*I] = ActualArg; |
| 2680 | } |
| 2681 | |
| 2682 | // TODO: Remove this when users have been updated to the assume bundles. |
| 2683 | // Add alignment assumptions if necessary. We do this before the inlined |
| 2684 | // instructions are actually cloned into the caller so that we can easily |
| 2685 | // check what will be known at the start of the inlined code. |
| 2686 | AddAlignmentAssumptions(CB, IFI); |
| 2687 | |
| 2688 | AssumptionCache *AC = |
| 2689 | IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; |
| 2690 | |
| 2691 | /// Preserve all attributes on of the call and its parameters. |
| 2692 | salvageKnowledge(I: &CB, AC); |
| 2693 | |
| 2694 | // We want the inliner to prune the code as it copies. We would LOVE to |
| 2695 | // have no dead or constant instructions leftover after inlining occurs |
| 2696 | // (which can happen, e.g., because an argument was constant), but we'll be |
| 2697 | // happy with whatever the cloner can do. |
| 2698 | CloneAndPruneFunctionInto(NewFunc: Caller, OldFunc: CalledFunc, VMap, |
| 2699 | /*ModuleLevelChanges=*/false, Returns, NameSuffix: ".i" , |
| 2700 | CodeInfo: &InlinedFunctionInfo); |
| 2701 | // Remember the first block that is newly cloned over. |
| 2702 | FirstNewBlock = LastBlock; ++FirstNewBlock; |
| 2703 | |
| 2704 | // Insert retainRV/clainRV runtime calls. |
| 2705 | objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(CB: &CB); |
| 2706 | if (RVCallKind != objcarc::ARCInstKind::None) |
| 2707 | inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns); |
| 2708 | |
| 2709 | // Updated caller/callee profiles only when requested. For sample loader |
| 2710 | // inlining, the context-sensitive inlinee profile doesn't need to be |
| 2711 | // subtracted from callee profile, and the inlined clone also doesn't need |
| 2712 | // to be scaled based on call site count. |
| 2713 | if (IFI.UpdateProfile) { |
| 2714 | if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) |
| 2715 | // Update the BFI of blocks cloned into the caller. |
| 2716 | updateCallerBFI(CallSiteBlock: OrigBB, VMap, CallerBFI: IFI.CallerBFI, CalleeBFI: IFI.CalleeBFI, |
| 2717 | CalleeEntryBlock: CalledFunc->front()); |
| 2718 | |
| 2719 | if (auto Profile = CalledFunc->getEntryCount()) |
| 2720 | updateCallProfile(Callee: CalledFunc, VMap, CalleeEntryCount: *Profile, TheCall: CB, PSI: IFI.PSI, |
| 2721 | CallerBFI: IFI.CallerBFI); |
| 2722 | } |
| 2723 | |
| 2724 | // Inject byval arguments initialization. |
| 2725 | for (ByValInit &Init : ByValInits) |
| 2726 | HandleByValArgumentInit(ByValType: Init.Ty, Dst: Init.Dst, Src: Init.Src, SrcAlign: Init.SrcAlign, |
| 2727 | M: Caller->getParent(), InsertBlock: &*FirstNewBlock, IFI, |
| 2728 | CalledFunc); |
| 2729 | |
| 2730 | std::optional<OperandBundleUse> ParentDeopt = |
| 2731 | CB.getOperandBundle(ID: LLVMContext::OB_deopt); |
| 2732 | if (ParentDeopt) { |
| 2733 | SmallVector<OperandBundleDef, 2> OpDefs; |
| 2734 | |
| 2735 | for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { |
| 2736 | CallBase *ICS = dyn_cast_or_null<CallBase>(Val&: VH); |
| 2737 | if (!ICS) |
| 2738 | continue; // instruction was DCE'd or RAUW'ed to undef |
| 2739 | |
| 2740 | OpDefs.clear(); |
| 2741 | |
| 2742 | OpDefs.reserve(N: ICS->getNumOperandBundles()); |
| 2743 | |
| 2744 | for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe; |
| 2745 | ++COBi) { |
| 2746 | auto ChildOB = ICS->getOperandBundleAt(Index: COBi); |
| 2747 | if (ChildOB.getTagID() != LLVMContext::OB_deopt) { |
| 2748 | // If the inlined call has other operand bundles, let them be |
| 2749 | OpDefs.emplace_back(Args&: ChildOB); |
| 2750 | continue; |
| 2751 | } |
| 2752 | |
| 2753 | // It may be useful to separate this logic (of handling operand |
| 2754 | // bundles) out to a separate "policy" component if this gets crowded. |
| 2755 | // Prepend the parent's deoptimization continuation to the newly |
| 2756 | // inlined call's deoptimization continuation. |
| 2757 | std::vector<Value *> MergedDeoptArgs; |
| 2758 | MergedDeoptArgs.reserve(n: ParentDeopt->Inputs.size() + |
| 2759 | ChildOB.Inputs.size()); |
| 2760 | |
| 2761 | llvm::append_range(C&: MergedDeoptArgs, R&: ParentDeopt->Inputs); |
| 2762 | llvm::append_range(C&: MergedDeoptArgs, R&: ChildOB.Inputs); |
| 2763 | |
| 2764 | OpDefs.emplace_back(Args: "deopt" , Args: std::move(MergedDeoptArgs)); |
| 2765 | } |
| 2766 | |
| 2767 | Instruction *NewI = CallBase::Create(CB: ICS, Bundles: OpDefs, InsertPt: ICS->getIterator()); |
| 2768 | |
| 2769 | // Note: the RAUW does the appropriate fixup in VMap, so we need to do |
| 2770 | // this even if the call returns void. |
| 2771 | ICS->replaceAllUsesWith(V: NewI); |
| 2772 | |
| 2773 | VH = nullptr; |
| 2774 | ICS->eraseFromParent(); |
| 2775 | } |
| 2776 | } |
| 2777 | |
| 2778 | // For 'nodebug' functions, the associated DISubprogram is always null. |
| 2779 | // Conservatively avoid propagating the callsite debug location to |
| 2780 | // instructions inlined from a function whose DISubprogram is not null. |
| 2781 | fixupLineNumbers(Fn: Caller, FI: FirstNewBlock, TheCall: &CB, |
| 2782 | CalleeHasDebugInfo: CalledFunc->getSubprogram() != nullptr); |
| 2783 | |
| 2784 | if (isAssignmentTrackingEnabled(M: *Caller->getParent())) { |
| 2785 | // Interpret inlined stores to caller-local variables as assignments. |
| 2786 | trackInlinedStores(Start: FirstNewBlock, End: Caller->end(), CB); |
| 2787 | |
| 2788 | // Update DIAssignID metadata attachments and uses so that they are |
| 2789 | // unique to this inlined instance. |
| 2790 | fixupAssignments(Start: FirstNewBlock, End: Caller->end()); |
| 2791 | } |
| 2792 | |
| 2793 | // Now clone the inlined noalias scope metadata. |
| 2794 | SAMetadataCloner.clone(); |
| 2795 | SAMetadataCloner.remap(FStart: FirstNewBlock, FEnd: Caller->end()); |
| 2796 | |
| 2797 | // Add noalias metadata if necessary. |
| 2798 | AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo); |
| 2799 | |
| 2800 | // Clone return attributes on the callsite into the calls within the inlined |
| 2801 | // function which feed into its return value. |
| 2802 | AddReturnAttributes(CB, VMap, InlinedFunctionInfo); |
| 2803 | |
| 2804 | // Clone attributes on the params of the callsite to calls within the |
| 2805 | // inlined function which use the same param. |
| 2806 | AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo); |
| 2807 | |
| 2808 | propagateMemProfMetadata( |
| 2809 | Callee: CalledFunc, CB, ContainsMemProfMetadata: InlinedFunctionInfo.ContainsMemProfMetadata, VMap, ORE); |
| 2810 | |
| 2811 | // Propagate metadata on the callsite if necessary. |
| 2812 | PropagateCallSiteMetadata(CB, FStart: FirstNewBlock, FEnd: Caller->end()); |
| 2813 | |
| 2814 | // Register any cloned assumptions. |
| 2815 | if (IFI.GetAssumptionCache) |
| 2816 | for (BasicBlock &NewBlock : |
| 2817 | make_range(x: FirstNewBlock->getIterator(), y: Caller->end())) |
| 2818 | for (Instruction &I : NewBlock) |
| 2819 | if (auto *II = dyn_cast<AssumeInst>(Val: &I)) |
| 2820 | IFI.GetAssumptionCache(*Caller).registerAssumption(CI: II); |
| 2821 | } |
| 2822 | |
| 2823 | if (ConvergenceControlToken) { |
| 2824 | IntrinsicInst *IntrinsicCall = getConvergenceEntry(BB&: *FirstNewBlock); |
| 2825 | if (IntrinsicCall) { |
| 2826 | IntrinsicCall->replaceAllUsesWith(V: ConvergenceControlToken); |
| 2827 | IntrinsicCall->eraseFromParent(); |
| 2828 | } |
| 2829 | } |
| 2830 | |
| 2831 | // If there are any alloca instructions in the block that used to be the entry |
| 2832 | // block for the callee, move them to the entry block of the caller. First |
| 2833 | // calculate which instruction they should be inserted before. We insert the |
| 2834 | // instructions at the end of the current alloca list. |
| 2835 | { |
| 2836 | BasicBlock::iterator InsertPoint = Caller->begin()->begin(); |
| 2837 | for (BasicBlock::iterator I = FirstNewBlock->begin(), |
| 2838 | E = FirstNewBlock->end(); I != E; ) { |
| 2839 | AllocaInst *AI = dyn_cast<AllocaInst>(Val: I++); |
| 2840 | if (!AI) continue; |
| 2841 | |
| 2842 | // If the alloca is now dead, remove it. This often occurs due to code |
| 2843 | // specialization. |
| 2844 | if (AI->use_empty()) { |
| 2845 | AI->eraseFromParent(); |
| 2846 | continue; |
| 2847 | } |
| 2848 | |
| 2849 | if (!allocaWouldBeStaticInEntry(AI)) |
| 2850 | continue; |
| 2851 | |
| 2852 | // Keep track of the static allocas that we inline into the caller. |
| 2853 | IFI.StaticAllocas.push_back(Elt: AI); |
| 2854 | |
| 2855 | // Scan for the block of allocas that we can move over, and move them |
| 2856 | // all at once. |
| 2857 | while (isa<AllocaInst>(Val: I) && |
| 2858 | !cast<AllocaInst>(Val&: I)->use_empty() && |
| 2859 | allocaWouldBeStaticInEntry(AI: cast<AllocaInst>(Val&: I))) { |
| 2860 | IFI.StaticAllocas.push_back(Elt: cast<AllocaInst>(Val&: I)); |
| 2861 | ++I; |
| 2862 | } |
| 2863 | |
| 2864 | // Transfer all of the allocas over in a block. Using splice means |
| 2865 | // that the instructions aren't removed from the symbol table, then |
| 2866 | // reinserted. |
| 2867 | I.setTailBit(true); |
| 2868 | Caller->getEntryBlock().splice(ToIt: InsertPoint, FromBB: &*FirstNewBlock, |
| 2869 | FromBeginIt: AI->getIterator(), FromEndIt: I); |
| 2870 | } |
| 2871 | } |
| 2872 | |
| 2873 | SmallVector<Value*,4> VarArgsToForward; |
| 2874 | SmallVector<AttributeSet, 4> VarArgsAttrs; |
| 2875 | for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); |
| 2876 | i < CB.arg_size(); i++) { |
| 2877 | VarArgsToForward.push_back(Elt: CB.getArgOperand(i)); |
| 2878 | VarArgsAttrs.push_back(Elt: CB.getAttributes().getParamAttrs(ArgNo: i)); |
| 2879 | } |
| 2880 | |
| 2881 | bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; |
| 2882 | if (InlinedFunctionInfo.ContainsCalls) { |
| 2883 | CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; |
| 2884 | if (CallInst *CI = dyn_cast<CallInst>(Val: &CB)) |
| 2885 | CallSiteTailKind = CI->getTailCallKind(); |
| 2886 | |
| 2887 | // For inlining purposes, the "notail" marker is the same as no marker. |
| 2888 | if (CallSiteTailKind == CallInst::TCK_NoTail) |
| 2889 | CallSiteTailKind = CallInst::TCK_None; |
| 2890 | |
| 2891 | for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; |
| 2892 | ++BB) { |
| 2893 | for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) { |
| 2894 | CallInst *CI = dyn_cast<CallInst>(Val: &I); |
| 2895 | if (!CI) |
| 2896 | continue; |
| 2897 | |
| 2898 | // Forward varargs from inlined call site to calls to the |
| 2899 | // ForwardVarArgsTo function, if requested, and to musttail calls. |
| 2900 | if (!VarArgsToForward.empty() && |
| 2901 | ((ForwardVarArgsTo && |
| 2902 | CI->getCalledFunction() == ForwardVarArgsTo) || |
| 2903 | CI->isMustTailCall())) { |
| 2904 | // Collect attributes for non-vararg parameters. |
| 2905 | AttributeList Attrs = CI->getAttributes(); |
| 2906 | SmallVector<AttributeSet, 8> ArgAttrs; |
| 2907 | if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) { |
| 2908 | for (unsigned ArgNo = 0; |
| 2909 | ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo) |
| 2910 | ArgAttrs.push_back(Elt: Attrs.getParamAttrs(ArgNo)); |
| 2911 | } |
| 2912 | |
| 2913 | // Add VarArg attributes. |
| 2914 | ArgAttrs.append(in_start: VarArgsAttrs.begin(), in_end: VarArgsAttrs.end()); |
| 2915 | Attrs = AttributeList::get(C&: CI->getContext(), FnAttrs: Attrs.getFnAttrs(), |
| 2916 | RetAttrs: Attrs.getRetAttrs(), ArgAttrs); |
| 2917 | // Add VarArgs to existing parameters. |
| 2918 | SmallVector<Value *, 6> Params(CI->args()); |
| 2919 | Params.append(in_start: VarArgsToForward.begin(), in_end: VarArgsToForward.end()); |
| 2920 | CallInst *NewCI = CallInst::Create( |
| 2921 | Ty: CI->getFunctionType(), Func: CI->getCalledOperand(), Args: Params, NameStr: "" , InsertBefore: CI->getIterator()); |
| 2922 | NewCI->setDebugLoc(CI->getDebugLoc()); |
| 2923 | NewCI->setAttributes(Attrs); |
| 2924 | NewCI->setCallingConv(CI->getCallingConv()); |
| 2925 | CI->replaceAllUsesWith(V: NewCI); |
| 2926 | CI->eraseFromParent(); |
| 2927 | CI = NewCI; |
| 2928 | } |
| 2929 | |
| 2930 | if (Function *F = CI->getCalledFunction()) |
| 2931 | InlinedDeoptimizeCalls |= |
| 2932 | F->getIntrinsicID() == Intrinsic::experimental_deoptimize; |
| 2933 | |
| 2934 | // We need to reduce the strength of any inlined tail calls. For |
| 2935 | // musttail, we have to avoid introducing potential unbounded stack |
| 2936 | // growth. For example, if functions 'f' and 'g' are mutually recursive |
| 2937 | // with musttail, we can inline 'g' into 'f' so long as we preserve |
| 2938 | // musttail on the cloned call to 'f'. If either the inlined call site |
| 2939 | // or the cloned call site is *not* musttail, the program already has |
| 2940 | // one frame of stack growth, so it's safe to remove musttail. Here is |
| 2941 | // a table of example transformations: |
| 2942 | // |
| 2943 | // f -> musttail g -> musttail f ==> f -> musttail f |
| 2944 | // f -> musttail g -> tail f ==> f -> tail f |
| 2945 | // f -> g -> musttail f ==> f -> f |
| 2946 | // f -> g -> tail f ==> f -> f |
| 2947 | // |
| 2948 | // Inlined notail calls should remain notail calls. |
| 2949 | CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); |
| 2950 | if (ChildTCK != CallInst::TCK_NoTail) |
| 2951 | ChildTCK = std::min(a: CallSiteTailKind, b: ChildTCK); |
| 2952 | CI->setTailCallKind(ChildTCK); |
| 2953 | InlinedMustTailCalls |= CI->isMustTailCall(); |
| 2954 | |
| 2955 | // Call sites inlined through a 'nounwind' call site should be |
| 2956 | // 'nounwind' as well. However, avoid marking call sites explicitly |
| 2957 | // where possible. This helps expose more opportunities for CSE after |
| 2958 | // inlining, commonly when the callee is an intrinsic. |
| 2959 | if (MarkNoUnwind && !CI->doesNotThrow()) |
| 2960 | CI->setDoesNotThrow(); |
| 2961 | } |
| 2962 | } |
| 2963 | } |
| 2964 | |
| 2965 | // Leave lifetime markers for the static alloca's, scoping them to the |
| 2966 | // function we just inlined. |
| 2967 | // We need to insert lifetime intrinsics even at O0 to avoid invalid |
| 2968 | // access caused by multithreaded coroutines. The check |
| 2969 | // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only. |
| 2970 | if ((InsertLifetime || Caller->isPresplitCoroutine()) && |
| 2971 | !IFI.StaticAllocas.empty()) { |
| 2972 | IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin()); |
| 2973 | for (AllocaInst *AI : IFI.StaticAllocas) { |
| 2974 | // Don't mark swifterror allocas. They can't have bitcast uses. |
| 2975 | if (AI->isSwiftError()) |
| 2976 | continue; |
| 2977 | |
| 2978 | // If the alloca is already scoped to something smaller than the whole |
| 2979 | // function then there's no need to add redundant, less accurate markers. |
| 2980 | if (hasLifetimeMarkers(AI)) |
| 2981 | continue; |
| 2982 | |
| 2983 | // Try to determine the size of the allocation. |
| 2984 | ConstantInt *AllocaSize = nullptr; |
| 2985 | if (ConstantInt *AIArraySize = |
| 2986 | dyn_cast<ConstantInt>(Val: AI->getArraySize())) { |
| 2987 | auto &DL = Caller->getDataLayout(); |
| 2988 | Type *AllocaType = AI->getAllocatedType(); |
| 2989 | TypeSize AllocaTypeSize = DL.getTypeAllocSize(Ty: AllocaType); |
| 2990 | uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); |
| 2991 | |
| 2992 | // Don't add markers for zero-sized allocas. |
| 2993 | if (AllocaArraySize == 0) |
| 2994 | continue; |
| 2995 | |
| 2996 | // Check that array size doesn't saturate uint64_t and doesn't |
| 2997 | // overflow when it's multiplied by type size. |
| 2998 | if (!AllocaTypeSize.isScalable() && |
| 2999 | AllocaArraySize != std::numeric_limits<uint64_t>::max() && |
| 3000 | std::numeric_limits<uint64_t>::max() / AllocaArraySize >= |
| 3001 | AllocaTypeSize.getFixedValue()) { |
| 3002 | AllocaSize = ConstantInt::get(Ty: Type::getInt64Ty(C&: AI->getContext()), |
| 3003 | V: AllocaArraySize * AllocaTypeSize); |
| 3004 | } |
| 3005 | } |
| 3006 | |
| 3007 | builder.CreateLifetimeStart(Ptr: AI, Size: AllocaSize); |
| 3008 | for (ReturnInst *RI : Returns) { |
| 3009 | // Don't insert llvm.lifetime.end calls between a musttail or deoptimize |
| 3010 | // call and a return. The return kills all local allocas. |
| 3011 | if (InlinedMustTailCalls && |
| 3012 | RI->getParent()->getTerminatingMustTailCall()) |
| 3013 | continue; |
| 3014 | if (InlinedDeoptimizeCalls && |
| 3015 | RI->getParent()->getTerminatingDeoptimizeCall()) |
| 3016 | continue; |
| 3017 | IRBuilder<>(RI).CreateLifetimeEnd(Ptr: AI, Size: AllocaSize); |
| 3018 | } |
| 3019 | } |
| 3020 | } |
| 3021 | |
| 3022 | // If the inlined code contained dynamic alloca instructions, wrap the inlined |
| 3023 | // code with llvm.stacksave/llvm.stackrestore intrinsics. |
| 3024 | if (InlinedFunctionInfo.ContainsDynamicAllocas) { |
| 3025 | // Insert the llvm.stacksave. |
| 3026 | CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) |
| 3027 | .CreateStackSave(Name: "savedstack" ); |
| 3028 | |
| 3029 | // Insert a call to llvm.stackrestore before any return instructions in the |
| 3030 | // inlined function. |
| 3031 | for (ReturnInst *RI : Returns) { |
| 3032 | // Don't insert llvm.stackrestore calls between a musttail or deoptimize |
| 3033 | // call and a return. The return will restore the stack pointer. |
| 3034 | if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) |
| 3035 | continue; |
| 3036 | if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) |
| 3037 | continue; |
| 3038 | IRBuilder<>(RI).CreateStackRestore(Ptr: SavedPtr); |
| 3039 | } |
| 3040 | } |
| 3041 | |
| 3042 | // If we are inlining for an invoke instruction, we must make sure to rewrite |
| 3043 | // any call instructions into invoke instructions. This is sensitive to which |
| 3044 | // funclet pads were top-level in the inlinee, so must be done before |
| 3045 | // rewriting the "parent pad" links. |
| 3046 | if (auto *II = dyn_cast<InvokeInst>(Val: &CB)) { |
| 3047 | BasicBlock *UnwindDest = II->getUnwindDest(); |
| 3048 | BasicBlock::iterator FirstNonPHI = UnwindDest->getFirstNonPHIIt(); |
| 3049 | if (isa<LandingPadInst>(Val: FirstNonPHI)) { |
| 3050 | HandleInlinedLandingPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo); |
| 3051 | } else { |
| 3052 | HandleInlinedEHPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo); |
| 3053 | } |
| 3054 | } |
| 3055 | |
| 3056 | // Update the lexical scopes of the new funclets and callsites. |
| 3057 | // Anything that had 'none' as its parent is now nested inside the callsite's |
| 3058 | // EHPad. |
| 3059 | if (CallSiteEHPad) { |
| 3060 | for (Function::iterator BB = FirstNewBlock->getIterator(), |
| 3061 | E = Caller->end(); |
| 3062 | BB != E; ++BB) { |
| 3063 | // Add bundle operands to inlined call sites. |
| 3064 | PropagateOperandBundles(InlinedBB: BB, CallSiteEHPad); |
| 3065 | |
| 3066 | // It is problematic if the inlinee has a cleanupret which unwinds to |
| 3067 | // caller and we inline it into a call site which doesn't unwind but into |
| 3068 | // an EH pad that does. Such an edge must be dynamically unreachable. |
| 3069 | // As such, we replace the cleanupret with unreachable. |
| 3070 | if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator())) |
| 3071 | if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) |
| 3072 | changeToUnreachable(I: CleanupRet); |
| 3073 | |
| 3074 | BasicBlock::iterator I = BB->getFirstNonPHIIt(); |
| 3075 | if (!I->isEHPad()) |
| 3076 | continue; |
| 3077 | |
| 3078 | if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) { |
| 3079 | if (isa<ConstantTokenNone>(Val: CatchSwitch->getParentPad())) |
| 3080 | CatchSwitch->setParentPad(CallSiteEHPad); |
| 3081 | } else { |
| 3082 | auto *FPI = cast<FuncletPadInst>(Val&: I); |
| 3083 | if (isa<ConstantTokenNone>(Val: FPI->getParentPad())) |
| 3084 | FPI->setParentPad(CallSiteEHPad); |
| 3085 | } |
| 3086 | } |
| 3087 | } |
| 3088 | |
| 3089 | if (InlinedDeoptimizeCalls) { |
| 3090 | // We need to at least remove the deoptimizing returns from the Return set, |
| 3091 | // so that the control flow from those returns does not get merged into the |
| 3092 | // caller (but terminate it instead). If the caller's return type does not |
| 3093 | // match the callee's return type, we also need to change the return type of |
| 3094 | // the intrinsic. |
| 3095 | if (Caller->getReturnType() == CB.getType()) { |
| 3096 | llvm::erase_if(C&: Returns, P: [](ReturnInst *RI) { |
| 3097 | return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; |
| 3098 | }); |
| 3099 | } else { |
| 3100 | SmallVector<ReturnInst *, 8> NormalReturns; |
| 3101 | Function *NewDeoptIntrinsic = Intrinsic::getOrInsertDeclaration( |
| 3102 | M: Caller->getParent(), id: Intrinsic::experimental_deoptimize, |
| 3103 | Tys: {Caller->getReturnType()}); |
| 3104 | |
| 3105 | for (ReturnInst *RI : Returns) { |
| 3106 | CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); |
| 3107 | if (!DeoptCall) { |
| 3108 | NormalReturns.push_back(Elt: RI); |
| 3109 | continue; |
| 3110 | } |
| 3111 | |
| 3112 | // The calling convention on the deoptimize call itself may be bogus, |
| 3113 | // since the code we're inlining may have undefined behavior (and may |
| 3114 | // never actually execute at runtime); but all |
| 3115 | // @llvm.experimental.deoptimize declarations have to have the same |
| 3116 | // calling convention in a well-formed module. |
| 3117 | auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv(); |
| 3118 | NewDeoptIntrinsic->setCallingConv(CallingConv); |
| 3119 | auto *CurBB = RI->getParent(); |
| 3120 | RI->eraseFromParent(); |
| 3121 | |
| 3122 | SmallVector<Value *, 4> CallArgs(DeoptCall->args()); |
| 3123 | |
| 3124 | SmallVector<OperandBundleDef, 1> OpBundles; |
| 3125 | DeoptCall->getOperandBundlesAsDefs(Defs&: OpBundles); |
| 3126 | auto DeoptAttributes = DeoptCall->getAttributes(); |
| 3127 | DeoptCall->eraseFromParent(); |
| 3128 | assert(!OpBundles.empty() && |
| 3129 | "Expected at least the deopt operand bundle" ); |
| 3130 | |
| 3131 | IRBuilder<> Builder(CurBB); |
| 3132 | CallInst *NewDeoptCall = |
| 3133 | Builder.CreateCall(Callee: NewDeoptIntrinsic, Args: CallArgs, OpBundles); |
| 3134 | NewDeoptCall->setCallingConv(CallingConv); |
| 3135 | NewDeoptCall->setAttributes(DeoptAttributes); |
| 3136 | if (NewDeoptCall->getType()->isVoidTy()) |
| 3137 | Builder.CreateRetVoid(); |
| 3138 | else |
| 3139 | Builder.CreateRet(V: NewDeoptCall); |
| 3140 | // Since the ret type is changed, remove the incompatible attributes. |
| 3141 | NewDeoptCall->removeRetAttrs(AttrsToRemove: AttributeFuncs::typeIncompatible( |
| 3142 | Ty: NewDeoptCall->getType(), AS: NewDeoptCall->getRetAttributes())); |
| 3143 | } |
| 3144 | |
| 3145 | // Leave behind the normal returns so we can merge control flow. |
| 3146 | std::swap(LHS&: Returns, RHS&: NormalReturns); |
| 3147 | } |
| 3148 | } |
| 3149 | |
| 3150 | // Handle any inlined musttail call sites. In order for a new call site to be |
| 3151 | // musttail, the source of the clone and the inlined call site must have been |
| 3152 | // musttail. Therefore it's safe to return without merging control into the |
| 3153 | // phi below. |
| 3154 | if (InlinedMustTailCalls) { |
| 3155 | // Check if we need to bitcast the result of any musttail calls. |
| 3156 | Type *NewRetTy = Caller->getReturnType(); |
| 3157 | bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy; |
| 3158 | |
| 3159 | // Handle the returns preceded by musttail calls separately. |
| 3160 | SmallVector<ReturnInst *, 8> NormalReturns; |
| 3161 | for (ReturnInst *RI : Returns) { |
| 3162 | CallInst *ReturnedMustTail = |
| 3163 | RI->getParent()->getTerminatingMustTailCall(); |
| 3164 | if (!ReturnedMustTail) { |
| 3165 | NormalReturns.push_back(Elt: RI); |
| 3166 | continue; |
| 3167 | } |
| 3168 | if (!NeedBitCast) |
| 3169 | continue; |
| 3170 | |
| 3171 | // Delete the old return and any preceding bitcast. |
| 3172 | BasicBlock *CurBB = RI->getParent(); |
| 3173 | auto *OldCast = dyn_cast_or_null<BitCastInst>(Val: RI->getReturnValue()); |
| 3174 | RI->eraseFromParent(); |
| 3175 | if (OldCast) |
| 3176 | OldCast->eraseFromParent(); |
| 3177 | |
| 3178 | // Insert a new bitcast and return with the right type. |
| 3179 | IRBuilder<> Builder(CurBB); |
| 3180 | Builder.CreateRet(V: Builder.CreateBitCast(V: ReturnedMustTail, DestTy: NewRetTy)); |
| 3181 | } |
| 3182 | |
| 3183 | // Leave behind the normal returns so we can merge control flow. |
| 3184 | std::swap(LHS&: Returns, RHS&: NormalReturns); |
| 3185 | } |
| 3186 | |
| 3187 | // Now that all of the transforms on the inlined code have taken place but |
| 3188 | // before we splice the inlined code into the CFG and lose track of which |
| 3189 | // blocks were actually inlined, collect the call sites. We only do this if |
| 3190 | // call graph updates weren't requested, as those provide value handle based |
| 3191 | // tracking of inlined call sites instead. Calls to intrinsics are not |
| 3192 | // collected because they are not inlineable. |
| 3193 | if (InlinedFunctionInfo.ContainsCalls) { |
| 3194 | // Otherwise just collect the raw call sites that were inlined. |
| 3195 | for (BasicBlock &NewBB : |
| 3196 | make_range(x: FirstNewBlock->getIterator(), y: Caller->end())) |
| 3197 | for (Instruction &I : NewBB) |
| 3198 | if (auto *CB = dyn_cast<CallBase>(Val: &I)) |
| 3199 | if (!(CB->getCalledFunction() && |
| 3200 | CB->getCalledFunction()->isIntrinsic())) |
| 3201 | IFI.InlinedCallSites.push_back(Elt: CB); |
| 3202 | } |
| 3203 | |
| 3204 | // If we cloned in _exactly one_ basic block, and if that block ends in a |
| 3205 | // return instruction, we splice the body of the inlined callee directly into |
| 3206 | // the calling basic block. |
| 3207 | if (Returns.size() == 1 && std::distance(first: FirstNewBlock, last: Caller->end()) == 1) { |
| 3208 | // Move all of the instructions right before the call. |
| 3209 | OrigBB->splice(ToIt: CB.getIterator(), FromBB: &*FirstNewBlock, FromBeginIt: FirstNewBlock->begin(), |
| 3210 | FromEndIt: FirstNewBlock->end()); |
| 3211 | // Remove the cloned basic block. |
| 3212 | Caller->back().eraseFromParent(); |
| 3213 | |
| 3214 | // If the call site was an invoke instruction, add a branch to the normal |
| 3215 | // destination. |
| 3216 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) { |
| 3217 | BranchInst *NewBr = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator()); |
| 3218 | NewBr->setDebugLoc(Returns[0]->getDebugLoc()); |
| 3219 | } |
| 3220 | |
| 3221 | // If the return instruction returned a value, replace uses of the call with |
| 3222 | // uses of the returned value. |
| 3223 | if (!CB.use_empty()) { |
| 3224 | ReturnInst *R = Returns[0]; |
| 3225 | if (&CB == R->getReturnValue()) |
| 3226 | CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType())); |
| 3227 | else |
| 3228 | CB.replaceAllUsesWith(V: R->getReturnValue()); |
| 3229 | } |
| 3230 | // Since we are now done with the Call/Invoke, we can delete it. |
| 3231 | CB.eraseFromParent(); |
| 3232 | |
| 3233 | // Since we are now done with the return instruction, delete it also. |
| 3234 | Returns[0]->eraseFromParent(); |
| 3235 | |
| 3236 | if (MergeAttributes) |
| 3237 | AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc); |
| 3238 | |
| 3239 | // We are now done with the inlining. |
| 3240 | return InlineResult::success(); |
| 3241 | } |
| 3242 | |
| 3243 | // Otherwise, we have the normal case, of more than one block to inline or |
| 3244 | // multiple return sites. |
| 3245 | |
| 3246 | // We want to clone the entire callee function into the hole between the |
| 3247 | // "starter" and "ender" blocks. How we accomplish this depends on whether |
| 3248 | // this is an invoke instruction or a call instruction. |
| 3249 | BasicBlock *AfterCallBB; |
| 3250 | BranchInst *CreatedBranchToNormalDest = nullptr; |
| 3251 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) { |
| 3252 | |
| 3253 | // Add an unconditional branch to make this look like the CallInst case... |
| 3254 | CreatedBranchToNormalDest = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator()); |
| 3255 | // We intend to replace this DebugLoc with another later. |
| 3256 | CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getTemporary()); |
| 3257 | |
| 3258 | // Split the basic block. This guarantees that no PHI nodes will have to be |
| 3259 | // updated due to new incoming edges, and make the invoke case more |
| 3260 | // symmetric to the call case. |
| 3261 | AfterCallBB = |
| 3262 | OrigBB->splitBasicBlock(I: CreatedBranchToNormalDest->getIterator(), |
| 3263 | BBName: CalledFunc->getName() + ".exit" ); |
| 3264 | |
| 3265 | } else { // It's a call |
| 3266 | // If this is a call instruction, we need to split the basic block that |
| 3267 | // the call lives in. |
| 3268 | // |
| 3269 | AfterCallBB = OrigBB->splitBasicBlock(I: CB.getIterator(), |
| 3270 | BBName: CalledFunc->getName() + ".exit" ); |
| 3271 | } |
| 3272 | |
| 3273 | if (IFI.CallerBFI) { |
| 3274 | // Copy original BB's block frequency to AfterCallBB |
| 3275 | IFI.CallerBFI->setBlockFreq(BB: AfterCallBB, |
| 3276 | Freq: IFI.CallerBFI->getBlockFreq(BB: OrigBB)); |
| 3277 | } |
| 3278 | |
| 3279 | // Change the branch that used to go to AfterCallBB to branch to the first |
| 3280 | // basic block of the inlined function. |
| 3281 | // |
| 3282 | Instruction *Br = OrigBB->getTerminator(); |
| 3283 | assert(Br && Br->getOpcode() == Instruction::Br && |
| 3284 | "splitBasicBlock broken!" ); |
| 3285 | Br->setOperand(i: 0, Val: &*FirstNewBlock); |
| 3286 | |
| 3287 | // Now that the function is correct, make it a little bit nicer. In |
| 3288 | // particular, move the basic blocks inserted from the end of the function |
| 3289 | // into the space made by splitting the source basic block. |
| 3290 | Caller->splice(ToIt: AfterCallBB->getIterator(), FromF: Caller, FromBeginIt: FirstNewBlock, |
| 3291 | FromEndIt: Caller->end()); |
| 3292 | |
| 3293 | // Handle all of the return instructions that we just cloned in, and eliminate |
| 3294 | // any users of the original call/invoke instruction. |
| 3295 | Type *RTy = CalledFunc->getReturnType(); |
| 3296 | |
| 3297 | PHINode *PHI = nullptr; |
| 3298 | if (Returns.size() > 1) { |
| 3299 | // The PHI node should go at the front of the new basic block to merge all |
| 3300 | // possible incoming values. |
| 3301 | if (!CB.use_empty()) { |
| 3302 | PHI = PHINode::Create(Ty: RTy, NumReservedValues: Returns.size(), NameStr: CB.getName()); |
| 3303 | PHI->insertBefore(InsertPos: AfterCallBB->begin()); |
| 3304 | // Anything that used the result of the function call should now use the |
| 3305 | // PHI node as their operand. |
| 3306 | CB.replaceAllUsesWith(V: PHI); |
| 3307 | } |
| 3308 | |
| 3309 | // Loop over all of the return instructions adding entries to the PHI node |
| 3310 | // as appropriate. |
| 3311 | if (PHI) { |
| 3312 | for (ReturnInst *RI : Returns) { |
| 3313 | assert(RI->getReturnValue()->getType() == PHI->getType() && |
| 3314 | "Ret value not consistent in function!" ); |
| 3315 | PHI->addIncoming(V: RI->getReturnValue(), BB: RI->getParent()); |
| 3316 | } |
| 3317 | } |
| 3318 | |
| 3319 | // Add a branch to the merge points and remove return instructions. |
| 3320 | DebugLoc Loc; |
| 3321 | for (ReturnInst *RI : Returns) { |
| 3322 | BranchInst *BI = BranchInst::Create(IfTrue: AfterCallBB, InsertBefore: RI->getIterator()); |
| 3323 | Loc = RI->getDebugLoc(); |
| 3324 | BI->setDebugLoc(Loc); |
| 3325 | RI->eraseFromParent(); |
| 3326 | } |
| 3327 | // We need to set the debug location to *somewhere* inside the |
| 3328 | // inlined function. The line number may be nonsensical, but the |
| 3329 | // instruction will at least be associated with the right |
| 3330 | // function. |
| 3331 | if (CreatedBranchToNormalDest) |
| 3332 | CreatedBranchToNormalDest->setDebugLoc(Loc); |
| 3333 | } else if (!Returns.empty()) { |
| 3334 | // Otherwise, if there is exactly one return value, just replace anything |
| 3335 | // using the return value of the call with the computed value. |
| 3336 | if (!CB.use_empty()) { |
| 3337 | if (&CB == Returns[0]->getReturnValue()) |
| 3338 | CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType())); |
| 3339 | else |
| 3340 | CB.replaceAllUsesWith(V: Returns[0]->getReturnValue()); |
| 3341 | } |
| 3342 | |
| 3343 | // Update PHI nodes that use the ReturnBB to use the AfterCallBB. |
| 3344 | BasicBlock *ReturnBB = Returns[0]->getParent(); |
| 3345 | ReturnBB->replaceAllUsesWith(V: AfterCallBB); |
| 3346 | |
| 3347 | // Splice the code from the return block into the block that it will return |
| 3348 | // to, which contains the code that was after the call. |
| 3349 | AfterCallBB->splice(ToIt: AfterCallBB->begin(), FromBB: ReturnBB); |
| 3350 | |
| 3351 | if (CreatedBranchToNormalDest) |
| 3352 | CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); |
| 3353 | |
| 3354 | // Delete the return instruction now and empty ReturnBB now. |
| 3355 | Returns[0]->eraseFromParent(); |
| 3356 | ReturnBB->eraseFromParent(); |
| 3357 | } else if (!CB.use_empty()) { |
| 3358 | // In this case there are no returns to use, so there is no clear source |
| 3359 | // location for the "return". |
| 3360 | // FIXME: It may be correct to use the scope end line of the function here, |
| 3361 | // since this likely means we are falling out of the function. |
| 3362 | if (CreatedBranchToNormalDest) |
| 3363 | CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getUnknown()); |
| 3364 | // No returns, but something is using the return value of the call. Just |
| 3365 | // nuke the result. |
| 3366 | CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType())); |
| 3367 | } |
| 3368 | |
| 3369 | // Since we are now done with the Call/Invoke, we can delete it. |
| 3370 | CB.eraseFromParent(); |
| 3371 | |
| 3372 | // If we inlined any musttail calls and the original return is now |
| 3373 | // unreachable, delete it. It can only contain a bitcast and ret. |
| 3374 | if (InlinedMustTailCalls && pred_empty(BB: AfterCallBB)) |
| 3375 | AfterCallBB->eraseFromParent(); |
| 3376 | |
| 3377 | // We should always be able to fold the entry block of the function into the |
| 3378 | // single predecessor of the block... |
| 3379 | assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!" ); |
| 3380 | BasicBlock *CalleeEntry = cast<BranchInst>(Val: Br)->getSuccessor(i: 0); |
| 3381 | |
| 3382 | // Splice the code entry block into calling block, right before the |
| 3383 | // unconditional branch. |
| 3384 | CalleeEntry->replaceAllUsesWith(V: OrigBB); // Update PHI nodes |
| 3385 | OrigBB->splice(ToIt: Br->getIterator(), FromBB: CalleeEntry); |
| 3386 | |
| 3387 | // Remove the unconditional branch. |
| 3388 | Br->eraseFromParent(); |
| 3389 | |
| 3390 | // Now we can remove the CalleeEntry block, which is now empty. |
| 3391 | CalleeEntry->eraseFromParent(); |
| 3392 | |
| 3393 | // If we inserted a phi node, check to see if it has a single value (e.g. all |
| 3394 | // the entries are the same or undef). If so, remove the PHI so it doesn't |
| 3395 | // block other optimizations. |
| 3396 | if (PHI) { |
| 3397 | AssumptionCache *AC = |
| 3398 | IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; |
| 3399 | auto &DL = Caller->getDataLayout(); |
| 3400 | if (Value *V = simplifyInstruction(I: PHI, Q: {DL, nullptr, nullptr, AC})) { |
| 3401 | PHI->replaceAllUsesWith(V); |
| 3402 | PHI->eraseFromParent(); |
| 3403 | } |
| 3404 | } |
| 3405 | |
| 3406 | if (MergeAttributes) |
| 3407 | AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc); |
| 3408 | |
| 3409 | return InlineResult::success(); |
| 3410 | } |
| 3411 | |