1//===- InlineFunction.cpp - Code to perform function inlining -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements inlining of a function into a call site, resolving
10// parameters and the return value as appropriate.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/SetVector.h"
17#include "llvm/ADT/SmallPtrSet.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/ADT/iterator_range.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Analysis/AssumptionCache.h"
23#include "llvm/Analysis/BlockFrequencyInfo.h"
24#include "llvm/Analysis/CallGraph.h"
25#include "llvm/Analysis/CaptureTracking.h"
26#include "llvm/Analysis/IndirectCallVisitor.h"
27#include "llvm/Analysis/InstructionSimplify.h"
28#include "llvm/Analysis/MemoryProfileInfo.h"
29#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
30#include "llvm/Analysis/ObjCARCUtil.h"
31#include "llvm/Analysis/ProfileSummaryInfo.h"
32#include "llvm/Analysis/ValueTracking.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/IR/Argument.h"
35#include "llvm/IR/AttributeMask.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/ConstantRange.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/DebugInfo.h"
43#include "llvm/IR/DebugInfoMetadata.h"
44#include "llvm/IR/DebugLoc.h"
45#include "llvm/IR/DerivedTypes.h"
46#include "llvm/IR/Dominators.h"
47#include "llvm/IR/EHPersonalities.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/IRBuilder.h"
50#include "llvm/IR/InlineAsm.h"
51#include "llvm/IR/InstrTypes.h"
52#include "llvm/IR/Instruction.h"
53#include "llvm/IR/Instructions.h"
54#include "llvm/IR/IntrinsicInst.h"
55#include "llvm/IR/Intrinsics.h"
56#include "llvm/IR/LLVMContext.h"
57#include "llvm/IR/MDBuilder.h"
58#include "llvm/IR/Metadata.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/ProfDataUtils.h"
61#include "llvm/IR/Type.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/Support/Casting.h"
65#include "llvm/Support/CommandLine.h"
66#include "llvm/Support/ErrorHandling.h"
67#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
68#include "llvm/Transforms/Utils/Cloning.h"
69#include "llvm/Transforms/Utils/Local.h"
70#include "llvm/Transforms/Utils/ValueMapper.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <limits>
76#include <optional>
77#include <string>
78#include <utility>
79#include <vector>
80
81#define DEBUG_TYPE "inline-function"
82
83using namespace llvm;
84using namespace llvm::memprof;
85using ProfileCount = Function::ProfileCount;
86
87static cl::opt<bool>
88EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(Val: true),
89 cl::Hidden,
90 cl::desc("Convert noalias attributes to metadata during inlining."));
91
92static cl::opt<bool>
93 UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
94 cl::init(Val: true),
95 cl::desc("Use the llvm.experimental.noalias.scope.decl "
96 "intrinsic during inlining."));
97
98// Disabled by default, because the added alignment assumptions may increase
99// compile-time and block optimizations. This option is not suitable for use
100// with frontends that emit comprehensive parameter alignment annotations.
101static cl::opt<bool>
102PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
103 cl::init(Val: false), cl::Hidden,
104 cl::desc("Convert align attributes to assumptions during inlining."));
105
106static cl::opt<unsigned> InlinerAttributeWindow(
107 "max-inst-checked-for-throw-during-inlining", cl::Hidden,
108 cl::desc("the maximum number of instructions analyzed for may throw during "
109 "attribute inference in inlined body"),
110 cl::init(Val: 4));
111
112namespace {
113
114 /// A class for recording information about inlining a landing pad.
115 class LandingPadInliningInfo {
116 /// Destination of the invoke's unwind.
117 BasicBlock *OuterResumeDest;
118
119 /// Destination for the callee's resume.
120 BasicBlock *InnerResumeDest = nullptr;
121
122 /// LandingPadInst associated with the invoke.
123 LandingPadInst *CallerLPad = nullptr;
124
125 /// PHI for EH values from landingpad insts.
126 PHINode *InnerEHValuesPHI = nullptr;
127
128 SmallVector<Value*, 8> UnwindDestPHIValues;
129
130 public:
131 LandingPadInliningInfo(InvokeInst *II)
132 : OuterResumeDest(II->getUnwindDest()) {
133 // If there are PHI nodes in the unwind destination block, we need to keep
134 // track of which values came into them from the invoke before removing
135 // the edge from this block.
136 BasicBlock *InvokeBB = II->getParent();
137 BasicBlock::iterator I = OuterResumeDest->begin();
138 for (; isa<PHINode>(Val: I); ++I) {
139 // Save the value to use for this edge.
140 PHINode *PHI = cast<PHINode>(Val&: I);
141 UnwindDestPHIValues.push_back(Elt: PHI->getIncomingValueForBlock(BB: InvokeBB));
142 }
143
144 CallerLPad = cast<LandingPadInst>(Val&: I);
145 }
146
147 /// The outer unwind destination is the target of
148 /// unwind edges introduced for calls within the inlined function.
149 BasicBlock *getOuterResumeDest() const {
150 return OuterResumeDest;
151 }
152
153 BasicBlock *getInnerResumeDest();
154
155 LandingPadInst *getLandingPadInst() const { return CallerLPad; }
156
157 /// Forward the 'resume' instruction to the caller's landing pad block.
158 /// When the landing pad block has only one predecessor, this is
159 /// a simple branch. When there is more than one predecessor, we need to
160 /// split the landing pad block after the landingpad instruction and jump
161 /// to there.
162 void forwardResume(ResumeInst *RI,
163 SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
164
165 /// Add incoming-PHI values to the unwind destination block for the given
166 /// basic block, using the values for the original invoke's source block.
167 void addIncomingPHIValuesFor(BasicBlock *BB) const {
168 addIncomingPHIValuesForInto(src: BB, dest: OuterResumeDest);
169 }
170
171 void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
172 BasicBlock::iterator I = dest->begin();
173 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
174 PHINode *phi = cast<PHINode>(Val&: I);
175 phi->addIncoming(V: UnwindDestPHIValues[i], BB: src);
176 }
177 }
178 };
179
180} // end anonymous namespace
181
182/// Get or create a target for the branch from ResumeInsts.
183BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
184 if (InnerResumeDest) return InnerResumeDest;
185
186 // Split the landing pad.
187 BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
188 InnerResumeDest =
189 OuterResumeDest->splitBasicBlock(I: SplitPoint,
190 BBName: OuterResumeDest->getName() + ".body");
191
192 // The number of incoming edges we expect to the inner landing pad.
193 const unsigned PHICapacity = 2;
194
195 // Create corresponding new PHIs for all the PHIs in the outer landing pad.
196 BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
197 BasicBlock::iterator I = OuterResumeDest->begin();
198 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
199 PHINode *OuterPHI = cast<PHINode>(Val&: I);
200 PHINode *InnerPHI = PHINode::Create(Ty: OuterPHI->getType(), NumReservedValues: PHICapacity,
201 NameStr: OuterPHI->getName() + ".lpad-body");
202 InnerPHI->insertBefore(InsertPos: InsertPoint);
203 OuterPHI->replaceAllUsesWith(V: InnerPHI);
204 InnerPHI->addIncoming(V: OuterPHI, BB: OuterResumeDest);
205 }
206
207 // Create a PHI for the exception values.
208 InnerEHValuesPHI =
209 PHINode::Create(Ty: CallerLPad->getType(), NumReservedValues: PHICapacity, NameStr: "eh.lpad-body");
210 InnerEHValuesPHI->insertBefore(InsertPos: InsertPoint);
211 CallerLPad->replaceAllUsesWith(V: InnerEHValuesPHI);
212 InnerEHValuesPHI->addIncoming(V: CallerLPad, BB: OuterResumeDest);
213
214 // All done.
215 return InnerResumeDest;
216}
217
218/// Forward the 'resume' instruction to the caller's landing pad block.
219/// When the landing pad block has only one predecessor, this is a simple
220/// branch. When there is more than one predecessor, we need to split the
221/// landing pad block after the landingpad instruction and jump to there.
222void LandingPadInliningInfo::forwardResume(
223 ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
224 BasicBlock *Dest = getInnerResumeDest();
225 BasicBlock *Src = RI->getParent();
226
227 BranchInst::Create(IfTrue: Dest, InsertBefore: Src);
228
229 // Update the PHIs in the destination. They were inserted in an order which
230 // makes this work.
231 addIncomingPHIValuesForInto(src: Src, dest: Dest);
232
233 InnerEHValuesPHI->addIncoming(V: RI->getOperand(i_nocapture: 0), BB: Src);
234 RI->eraseFromParent();
235}
236
237/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
238static Value *getParentPad(Value *EHPad) {
239 if (auto *FPI = dyn_cast<FuncletPadInst>(Val: EHPad))
240 return FPI->getParentPad();
241 return cast<CatchSwitchInst>(Val: EHPad)->getParentPad();
242}
243
244using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;
245
246/// Helper for getUnwindDestToken that does the descendant-ward part of
247/// the search.
248static Value *getUnwindDestTokenHelper(Instruction *EHPad,
249 UnwindDestMemoTy &MemoMap) {
250 SmallVector<Instruction *, 8> Worklist(1, EHPad);
251
252 while (!Worklist.empty()) {
253 Instruction *CurrentPad = Worklist.pop_back_val();
254 // We only put pads on the worklist that aren't in the MemoMap. When
255 // we find an unwind dest for a pad we may update its ancestors, but
256 // the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
257 // so they should never get updated while queued on the worklist.
258 assert(!MemoMap.count(CurrentPad));
259 Value *UnwindDestToken = nullptr;
260 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: CurrentPad)) {
261 if (CatchSwitch->hasUnwindDest()) {
262 UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI();
263 } else {
264 // Catchswitch doesn't have a 'nounwind' variant, and one might be
265 // annotated as "unwinds to caller" when really it's nounwind (see
266 // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
267 // parent's unwind dest from this. We can check its catchpads'
268 // descendants, since they might include a cleanuppad with an
269 // "unwinds to caller" cleanupret, which can be trusted.
270 for (auto HI = CatchSwitch->handler_begin(),
271 HE = CatchSwitch->handler_end();
272 HI != HE && !UnwindDestToken; ++HI) {
273 BasicBlock *HandlerBlock = *HI;
274 auto *CatchPad = cast<CatchPadInst>(Val: HandlerBlock->getFirstNonPHI());
275 for (User *Child : CatchPad->users()) {
276 // Intentionally ignore invokes here -- since the catchswitch is
277 // marked "unwind to caller", it would be a verifier error if it
278 // contained an invoke which unwinds out of it, so any invoke we'd
279 // encounter must unwind to some child of the catch.
280 if (!isa<CleanupPadInst>(Val: Child) && !isa<CatchSwitchInst>(Val: Child))
281 continue;
282
283 Instruction *ChildPad = cast<Instruction>(Val: Child);
284 auto Memo = MemoMap.find(Val: ChildPad);
285 if (Memo == MemoMap.end()) {
286 // Haven't figured out this child pad yet; queue it.
287 Worklist.push_back(Elt: ChildPad);
288 continue;
289 }
290 // We've already checked this child, but might have found that
291 // it offers no proof either way.
292 Value *ChildUnwindDestToken = Memo->second;
293 if (!ChildUnwindDestToken)
294 continue;
295 // We already know the child's unwind dest, which can either
296 // be ConstantTokenNone to indicate unwind to caller, or can
297 // be another child of the catchpad. Only the former indicates
298 // the unwind dest of the catchswitch.
299 if (isa<ConstantTokenNone>(Val: ChildUnwindDestToken)) {
300 UnwindDestToken = ChildUnwindDestToken;
301 break;
302 }
303 assert(getParentPad(ChildUnwindDestToken) == CatchPad);
304 }
305 }
306 }
307 } else {
308 auto *CleanupPad = cast<CleanupPadInst>(Val: CurrentPad);
309 for (User *U : CleanupPad->users()) {
310 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: U)) {
311 if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
312 UnwindDestToken = RetUnwindDest->getFirstNonPHI();
313 else
314 UnwindDestToken = ConstantTokenNone::get(Context&: CleanupPad->getContext());
315 break;
316 }
317 Value *ChildUnwindDestToken;
318 if (auto *Invoke = dyn_cast<InvokeInst>(Val: U)) {
319 ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI();
320 } else if (isa<CleanupPadInst>(Val: U) || isa<CatchSwitchInst>(Val: U)) {
321 Instruction *ChildPad = cast<Instruction>(Val: U);
322 auto Memo = MemoMap.find(Val: ChildPad);
323 if (Memo == MemoMap.end()) {
324 // Haven't resolved this child yet; queue it and keep searching.
325 Worklist.push_back(Elt: ChildPad);
326 continue;
327 }
328 // We've checked this child, but still need to ignore it if it
329 // had no proof either way.
330 ChildUnwindDestToken = Memo->second;
331 if (!ChildUnwindDestToken)
332 continue;
333 } else {
334 // Not a relevant user of the cleanuppad
335 continue;
336 }
337 // In a well-formed program, the child/invoke must either unwind to
338 // an(other) child of the cleanup, or exit the cleanup. In the
339 // first case, continue searching.
340 if (isa<Instruction>(Val: ChildUnwindDestToken) &&
341 getParentPad(EHPad: ChildUnwindDestToken) == CleanupPad)
342 continue;
343 UnwindDestToken = ChildUnwindDestToken;
344 break;
345 }
346 }
347 // If we haven't found an unwind dest for CurrentPad, we may have queued its
348 // children, so move on to the next in the worklist.
349 if (!UnwindDestToken)
350 continue;
351
352 // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
353 // any ancestors of CurrentPad up to but not including UnwindDestToken's
354 // parent pad. Record this in the memo map, and check to see if the
355 // original EHPad being queried is one of the ones exited.
356 Value *UnwindParent;
357 if (auto *UnwindPad = dyn_cast<Instruction>(Val: UnwindDestToken))
358 UnwindParent = getParentPad(EHPad: UnwindPad);
359 else
360 UnwindParent = nullptr;
361 bool ExitedOriginalPad = false;
362 for (Instruction *ExitedPad = CurrentPad;
363 ExitedPad && ExitedPad != UnwindParent;
364 ExitedPad = dyn_cast<Instruction>(Val: getParentPad(EHPad: ExitedPad))) {
365 // Skip over catchpads since they just follow their catchswitches.
366 if (isa<CatchPadInst>(Val: ExitedPad))
367 continue;
368 MemoMap[ExitedPad] = UnwindDestToken;
369 ExitedOriginalPad |= (ExitedPad == EHPad);
370 }
371
372 if (ExitedOriginalPad)
373 return UnwindDestToken;
374
375 // Continue the search.
376 }
377
378 // No definitive information is contained within this funclet.
379 return nullptr;
380}
381
382/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
383/// return that pad instruction. If it unwinds to caller, return
384/// ConstantTokenNone. If it does not have a definitive unwind destination,
385/// return nullptr.
386///
387/// This routine gets invoked for calls in funclets in inlinees when inlining
388/// an invoke. Since many funclets don't have calls inside them, it's queried
389/// on-demand rather than building a map of pads to unwind dests up front.
390/// Determining a funclet's unwind dest may require recursively searching its
391/// descendants, and also ancestors and cousins if the descendants don't provide
392/// an answer. Since most funclets will have their unwind dest immediately
393/// available as the unwind dest of a catchswitch or cleanupret, this routine
394/// searches top-down from the given pad and then up. To avoid worst-case
395/// quadratic run-time given that approach, it uses a memo map to avoid
396/// re-processing funclet trees. The callers that rewrite the IR as they go
397/// take advantage of this, for correctness, by checking/forcing rewritten
398/// pads' entries to match the original callee view.
399static Value *getUnwindDestToken(Instruction *EHPad,
400 UnwindDestMemoTy &MemoMap) {
401 // Catchpads unwind to the same place as their catchswitch;
402 // redirct any queries on catchpads so the code below can
403 // deal with just catchswitches and cleanuppads.
404 if (auto *CPI = dyn_cast<CatchPadInst>(Val: EHPad))
405 EHPad = CPI->getCatchSwitch();
406
407 // Check if we've already determined the unwind dest for this pad.
408 auto Memo = MemoMap.find(Val: EHPad);
409 if (Memo != MemoMap.end())
410 return Memo->second;
411
412 // Search EHPad and, if necessary, its descendants.
413 Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
414 assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
415 if (UnwindDestToken)
416 return UnwindDestToken;
417
418 // No information is available for this EHPad from itself or any of its
419 // descendants. An unwind all the way out to a pad in the caller would
420 // need also to agree with the unwind dest of the parent funclet, so
421 // search up the chain to try to find a funclet with information. Put
422 // null entries in the memo map to avoid re-processing as we go up.
423 MemoMap[EHPad] = nullptr;
424#ifndef NDEBUG
425 SmallPtrSet<Instruction *, 4> TempMemos;
426 TempMemos.insert(EHPad);
427#endif
428 Instruction *LastUselessPad = EHPad;
429 Value *AncestorToken;
430 for (AncestorToken = getParentPad(EHPad);
431 auto *AncestorPad = dyn_cast<Instruction>(Val: AncestorToken);
432 AncestorToken = getParentPad(EHPad: AncestorToken)) {
433 // Skip over catchpads since they just follow their catchswitches.
434 if (isa<CatchPadInst>(Val: AncestorPad))
435 continue;
436 // If the MemoMap had an entry mapping AncestorPad to nullptr, since we
437 // haven't yet called getUnwindDestTokenHelper for AncestorPad in this
438 // call to getUnwindDestToken, that would mean that AncestorPad had no
439 // information in itself, its descendants, or its ancestors. If that
440 // were the case, then we should also have recorded the lack of information
441 // for the descendant that we're coming from. So assert that we don't
442 // find a null entry in the MemoMap for AncestorPad.
443 assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
444 auto AncestorMemo = MemoMap.find(Val: AncestorPad);
445 if (AncestorMemo == MemoMap.end()) {
446 UnwindDestToken = getUnwindDestTokenHelper(EHPad: AncestorPad, MemoMap);
447 } else {
448 UnwindDestToken = AncestorMemo->second;
449 }
450 if (UnwindDestToken)
451 break;
452 LastUselessPad = AncestorPad;
453 MemoMap[LastUselessPad] = nullptr;
454#ifndef NDEBUG
455 TempMemos.insert(LastUselessPad);
456#endif
457 }
458
459 // We know that getUnwindDestTokenHelper was called on LastUselessPad and
460 // returned nullptr (and likewise for EHPad and any of its ancestors up to
461 // LastUselessPad), so LastUselessPad has no information from below. Since
462 // getUnwindDestTokenHelper must investigate all downward paths through
463 // no-information nodes to prove that a node has no information like this,
464 // and since any time it finds information it records it in the MemoMap for
465 // not just the immediately-containing funclet but also any ancestors also
466 // exited, it must be the case that, walking downward from LastUselessPad,
467 // visiting just those nodes which have not been mapped to an unwind dest
468 // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
469 // they are just used to keep getUnwindDestTokenHelper from repeating work),
470 // any node visited must have been exhaustively searched with no information
471 // for it found.
472 SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
473 while (!Worklist.empty()) {
474 Instruction *UselessPad = Worklist.pop_back_val();
475 auto Memo = MemoMap.find(Val: UselessPad);
476 if (Memo != MemoMap.end() && Memo->second) {
477 // Here the name 'UselessPad' is a bit of a misnomer, because we've found
478 // that it is a funclet that does have information about unwinding to
479 // a particular destination; its parent was a useless pad.
480 // Since its parent has no information, the unwind edge must not escape
481 // the parent, and must target a sibling of this pad. This local unwind
482 // gives us no information about EHPad. Leave it and the subtree rooted
483 // at it alone.
484 assert(getParentPad(Memo->second) == getParentPad(UselessPad));
485 continue;
486 }
487 // We know we don't have information for UselesPad. If it has an entry in
488 // the MemoMap (mapping it to nullptr), it must be one of the TempMemos
489 // added on this invocation of getUnwindDestToken; if a previous invocation
490 // recorded nullptr, it would have had to prove that the ancestors of
491 // UselessPad, which include LastUselessPad, had no information, and that
492 // in turn would have required proving that the descendants of
493 // LastUselesPad, which include EHPad, have no information about
494 // LastUselessPad, which would imply that EHPad was mapped to nullptr in
495 // the MemoMap on that invocation, which isn't the case if we got here.
496 assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
497 // Assert as we enumerate users that 'UselessPad' doesn't have any unwind
498 // information that we'd be contradicting by making a map entry for it
499 // (which is something that getUnwindDestTokenHelper must have proved for
500 // us to get here). Just assert on is direct users here; the checks in
501 // this downward walk at its descendants will verify that they don't have
502 // any unwind edges that exit 'UselessPad' either (i.e. they either have no
503 // unwind edges or unwind to a sibling).
504 MemoMap[UselessPad] = UnwindDestToken;
505 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: UselessPad)) {
506 assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
507 for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
508 auto *CatchPad = HandlerBlock->getFirstNonPHI();
509 for (User *U : CatchPad->users()) {
510 assert(
511 (!isa<InvokeInst>(U) ||
512 (getParentPad(
513 cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
514 CatchPad)) &&
515 "Expected useless pad");
516 if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U))
517 Worklist.push_back(Elt: cast<Instruction>(Val: U));
518 }
519 }
520 } else {
521 assert(isa<CleanupPadInst>(UselessPad));
522 for (User *U : UselessPad->users()) {
523 assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
524 assert((!isa<InvokeInst>(U) ||
525 (getParentPad(
526 cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
527 UselessPad)) &&
528 "Expected useless pad");
529 if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U))
530 Worklist.push_back(Elt: cast<Instruction>(Val: U));
531 }
532 }
533 }
534
535 return UnwindDestToken;
536}
537
538/// When we inline a basic block into an invoke,
539/// we have to turn all of the calls that can throw into invokes.
540/// This function analyze BB to see if there are any calls, and if so,
541/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
542/// nodes in that block with the values specified in InvokeDestPHIValues.
543static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
544 BasicBlock *BB, BasicBlock *UnwindEdge,
545 UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
546 for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
547 // We only need to check for function calls: inlined invoke
548 // instructions require no special handling.
549 CallInst *CI = dyn_cast<CallInst>(Val: &I);
550
551 if (!CI || CI->doesNotThrow())
552 continue;
553
554 // We do not need to (and in fact, cannot) convert possibly throwing calls
555 // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
556 // invokes. The caller's "segment" of the deoptimization continuation
557 // attached to the newly inlined @llvm.experimental_deoptimize
558 // (resp. @llvm.experimental.guard) call should contain the exception
559 // handling logic, if any.
560 if (auto *F = CI->getCalledFunction())
561 if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
562 F->getIntrinsicID() == Intrinsic::experimental_guard)
563 continue;
564
565 if (auto FuncletBundle = CI->getOperandBundle(ID: LLVMContext::OB_funclet)) {
566 // This call is nested inside a funclet. If that funclet has an unwind
567 // destination within the inlinee, then unwinding out of this call would
568 // be UB. Rewriting this call to an invoke which targets the inlined
569 // invoke's unwind dest would give the call's parent funclet multiple
570 // unwind destinations, which is something that subsequent EH table
571 // generation can't handle and that the veirifer rejects. So when we
572 // see such a call, leave it as a call.
573 auto *FuncletPad = cast<Instruction>(Val: FuncletBundle->Inputs[0]);
574 Value *UnwindDestToken =
575 getUnwindDestToken(EHPad: FuncletPad, MemoMap&: *FuncletUnwindMap);
576 if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
577 continue;
578#ifndef NDEBUG
579 Instruction *MemoKey;
580 if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
581 MemoKey = CatchPad->getCatchSwitch();
582 else
583 MemoKey = FuncletPad;
584 assert(FuncletUnwindMap->count(MemoKey) &&
585 (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
586 "must get memoized to avoid confusing later searches");
587#endif // NDEBUG
588 }
589
590 changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
591 return BB;
592 }
593 return nullptr;
594}
595
596/// If we inlined an invoke site, we need to convert calls
597/// in the body of the inlined function into invokes.
598///
599/// II is the invoke instruction being inlined. FirstNewBlock is the first
600/// block of the inlined code (the last block is the end of the function),
601/// and InlineCodeInfo is information about the code that got inlined.
602static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
603 ClonedCodeInfo &InlinedCodeInfo) {
604 BasicBlock *InvokeDest = II->getUnwindDest();
605
606 Function *Caller = FirstNewBlock->getParent();
607
608 // The inlined code is currently at the end of the function, scan from the
609 // start of the inlined code to its end, checking for stuff we need to
610 // rewrite.
611 LandingPadInliningInfo Invoke(II);
612
613 // Get all of the inlined landing pad instructions.
614 SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
615 for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
616 I != E; ++I)
617 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: I->getTerminator()))
618 InlinedLPads.insert(Ptr: II->getLandingPadInst());
619
620 // Append the clauses from the outer landing pad instruction into the inlined
621 // landing pad instructions.
622 LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
623 for (LandingPadInst *InlinedLPad : InlinedLPads) {
624 unsigned OuterNum = OuterLPad->getNumClauses();
625 InlinedLPad->reserveClauses(Size: OuterNum);
626 for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
627 InlinedLPad->addClause(ClauseVal: OuterLPad->getClause(Idx: OuterIdx));
628 if (OuterLPad->isCleanup())
629 InlinedLPad->setCleanup(true);
630 }
631
632 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
633 BB != E; ++BB) {
634 if (InlinedCodeInfo.ContainsCalls)
635 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
636 BB: &*BB, UnwindEdge: Invoke.getOuterResumeDest()))
637 // Update any PHI nodes in the exceptional block to indicate that there
638 // is now a new entry in them.
639 Invoke.addIncomingPHIValuesFor(BB: NewBB);
640
641 // Forward any resumes that are remaining here.
642 if (ResumeInst *RI = dyn_cast<ResumeInst>(Val: BB->getTerminator()))
643 Invoke.forwardResume(RI, InlinedLPads);
644 }
645
646 // Now that everything is happy, we have one final detail. The PHI nodes in
647 // the exception destination block still have entries due to the original
648 // invoke instruction. Eliminate these entries (which might even delete the
649 // PHI node) now.
650 InvokeDest->removePredecessor(Pred: II->getParent());
651}
652
653/// If we inlined an invoke site, we need to convert calls
654/// in the body of the inlined function into invokes.
655///
656/// II is the invoke instruction being inlined. FirstNewBlock is the first
657/// block of the inlined code (the last block is the end of the function),
658/// and InlineCodeInfo is information about the code that got inlined.
659static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
660 ClonedCodeInfo &InlinedCodeInfo) {
661 BasicBlock *UnwindDest = II->getUnwindDest();
662 Function *Caller = FirstNewBlock->getParent();
663
664 assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!");
665
666 // If there are PHI nodes in the unwind destination block, we need to keep
667 // track of which values came into them from the invoke before removing the
668 // edge from this block.
669 SmallVector<Value *, 8> UnwindDestPHIValues;
670 BasicBlock *InvokeBB = II->getParent();
671 for (PHINode &PHI : UnwindDest->phis()) {
672 // Save the value to use for this edge.
673 UnwindDestPHIValues.push_back(Elt: PHI.getIncomingValueForBlock(BB: InvokeBB));
674 }
675
676 // Add incoming-PHI values to the unwind destination block for the given basic
677 // block, using the values for the original invoke's source block.
678 auto UpdatePHINodes = [&](BasicBlock *Src) {
679 BasicBlock::iterator I = UnwindDest->begin();
680 for (Value *V : UnwindDestPHIValues) {
681 PHINode *PHI = cast<PHINode>(Val&: I);
682 PHI->addIncoming(V, BB: Src);
683 ++I;
684 }
685 };
686
687 // This connects all the instructions which 'unwind to caller' to the invoke
688 // destination.
689 UnwindDestMemoTy FuncletUnwindMap;
690 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
691 BB != E; ++BB) {
692 if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator())) {
693 if (CRI->unwindsToCaller()) {
694 auto *CleanupPad = CRI->getCleanupPad();
695 CleanupReturnInst::Create(CleanupPad, UnwindBB: UnwindDest, InsertBefore: CRI->getIterator());
696 CRI->eraseFromParent();
697 UpdatePHINodes(&*BB);
698 // Finding a cleanupret with an unwind destination would confuse
699 // subsequent calls to getUnwindDestToken, so map the cleanuppad
700 // to short-circuit any such calls and recognize this as an "unwind
701 // to caller" cleanup.
702 assert(!FuncletUnwindMap.count(CleanupPad) ||
703 isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
704 FuncletUnwindMap[CleanupPad] =
705 ConstantTokenNone::get(Context&: Caller->getContext());
706 }
707 }
708
709 Instruction *I = BB->getFirstNonPHI();
710 if (!I->isEHPad())
711 continue;
712
713 Instruction *Replacement = nullptr;
714 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: I)) {
715 if (CatchSwitch->unwindsToCaller()) {
716 Value *UnwindDestToken;
717 if (auto *ParentPad =
718 dyn_cast<Instruction>(Val: CatchSwitch->getParentPad())) {
719 // This catchswitch is nested inside another funclet. If that
720 // funclet has an unwind destination within the inlinee, then
721 // unwinding out of this catchswitch would be UB. Rewriting this
722 // catchswitch to unwind to the inlined invoke's unwind dest would
723 // give the parent funclet multiple unwind destinations, which is
724 // something that subsequent EH table generation can't handle and
725 // that the veirifer rejects. So when we see such a call, leave it
726 // as "unwind to caller".
727 UnwindDestToken = getUnwindDestToken(EHPad: ParentPad, MemoMap&: FuncletUnwindMap);
728 if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
729 continue;
730 } else {
731 // This catchswitch has no parent to inherit constraints from, and
732 // none of its descendants can have an unwind edge that exits it and
733 // targets another funclet in the inlinee. It may or may not have a
734 // descendant that definitively has an unwind to caller. In either
735 // case, we'll have to assume that any unwinds out of it may need to
736 // be routed to the caller, so treat it as though it has a definitive
737 // unwind to caller.
738 UnwindDestToken = ConstantTokenNone::get(Context&: Caller->getContext());
739 }
740 auto *NewCatchSwitch = CatchSwitchInst::Create(
741 ParentPad: CatchSwitch->getParentPad(), UnwindDest,
742 NumHandlers: CatchSwitch->getNumHandlers(), NameStr: CatchSwitch->getName(),
743 InsertBefore: CatchSwitch->getIterator());
744 for (BasicBlock *PadBB : CatchSwitch->handlers())
745 NewCatchSwitch->addHandler(Dest: PadBB);
746 // Propagate info for the old catchswitch over to the new one in
747 // the unwind map. This also serves to short-circuit any subsequent
748 // checks for the unwind dest of this catchswitch, which would get
749 // confused if they found the outer handler in the callee.
750 FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
751 Replacement = NewCatchSwitch;
752 }
753 } else if (!isa<FuncletPadInst>(Val: I)) {
754 llvm_unreachable("unexpected EHPad!");
755 }
756
757 if (Replacement) {
758 Replacement->takeName(V: I);
759 I->replaceAllUsesWith(V: Replacement);
760 I->eraseFromParent();
761 UpdatePHINodes(&*BB);
762 }
763 }
764
765 if (InlinedCodeInfo.ContainsCalls)
766 for (Function::iterator BB = FirstNewBlock->getIterator(),
767 E = Caller->end();
768 BB != E; ++BB)
769 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
770 BB: &*BB, UnwindEdge: UnwindDest, FuncletUnwindMap: &FuncletUnwindMap))
771 // Update any PHI nodes in the exceptional block to indicate that there
772 // is now a new entry in them.
773 UpdatePHINodes(NewBB);
774
775 // Now that everything is happy, we have one final detail. The PHI nodes in
776 // the exception destination block still have entries due to the original
777 // invoke instruction. Eliminate these entries (which might even delete the
778 // PHI node) now.
779 UnwindDest->removePredecessor(Pred: InvokeBB);
780}
781
782static bool haveCommonPrefix(MDNode *MIBStackContext,
783 MDNode *CallsiteStackContext) {
784 assert(MIBStackContext->getNumOperands() > 0 &&
785 CallsiteStackContext->getNumOperands() > 0);
786 // Because of the context trimming performed during matching, the callsite
787 // context could have more stack ids than the MIB. We match up to the end of
788 // the shortest stack context.
789 for (auto MIBStackIter = MIBStackContext->op_begin(),
790 CallsiteStackIter = CallsiteStackContext->op_begin();
791 MIBStackIter != MIBStackContext->op_end() &&
792 CallsiteStackIter != CallsiteStackContext->op_end();
793 MIBStackIter++, CallsiteStackIter++) {
794 auto *Val1 = mdconst::dyn_extract<ConstantInt>(MD: *MIBStackIter);
795 auto *Val2 = mdconst::dyn_extract<ConstantInt>(MD: *CallsiteStackIter);
796 assert(Val1 && Val2);
797 if (Val1->getZExtValue() != Val2->getZExtValue())
798 return false;
799 }
800 return true;
801}
802
803static void removeMemProfMetadata(CallBase *Call) {
804 Call->setMetadata(KindID: LLVMContext::MD_memprof, Node: nullptr);
805}
806
807static void removeCallsiteMetadata(CallBase *Call) {
808 Call->setMetadata(KindID: LLVMContext::MD_callsite, Node: nullptr);
809}
810
811static void updateMemprofMetadata(CallBase *CI,
812 const std::vector<Metadata *> &MIBList) {
813 assert(!MIBList.empty());
814 // Remove existing memprof, which will either be replaced or may not be needed
815 // if we are able to use a single allocation type function attribute.
816 removeMemProfMetadata(Call: CI);
817 CallStackTrie CallStack;
818 for (Metadata *MIB : MIBList)
819 CallStack.addCallStack(MIB: cast<MDNode>(Val: MIB));
820 bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI);
821 assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof));
822 if (!MemprofMDAttached)
823 // If we used a function attribute remove the callsite metadata as well.
824 removeCallsiteMetadata(Call: CI);
825}
826
827// Update the metadata on the inlined copy ClonedCall of a call OrigCall in the
828// inlined callee body, based on the callsite metadata InlinedCallsiteMD from
829// the call that was inlined.
830static void propagateMemProfHelper(const CallBase *OrigCall,
831 CallBase *ClonedCall,
832 MDNode *InlinedCallsiteMD) {
833 MDNode *OrigCallsiteMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_callsite);
834 MDNode *ClonedCallsiteMD = nullptr;
835 // Check if the call originally had callsite metadata, and update it for the
836 // new call in the inlined body.
837 if (OrigCallsiteMD) {
838 // The cloned call's context is now the concatenation of the original call's
839 // callsite metadata and the callsite metadata on the call where it was
840 // inlined.
841 ClonedCallsiteMD = MDNode::concatenate(A: OrigCallsiteMD, B: InlinedCallsiteMD);
842 ClonedCall->setMetadata(KindID: LLVMContext::MD_callsite, Node: ClonedCallsiteMD);
843 }
844
845 // Update any memprof metadata on the cloned call.
846 MDNode *OrigMemProfMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_memprof);
847 if (!OrigMemProfMD)
848 return;
849 // We currently expect that allocations with memprof metadata also have
850 // callsite metadata for the allocation's part of the context.
851 assert(OrigCallsiteMD);
852
853 // New call's MIB list.
854 std::vector<Metadata *> NewMIBList;
855
856 // For each MIB metadata, check if its call stack context starts with the
857 // new clone's callsite metadata. If so, that MIB goes onto the cloned call in
858 // the inlined body. If not, it stays on the out-of-line original call.
859 for (auto &MIBOp : OrigMemProfMD->operands()) {
860 MDNode *MIB = dyn_cast<MDNode>(Val: MIBOp);
861 // Stack is first operand of MIB.
862 MDNode *StackMD = getMIBStackNode(MIB);
863 assert(StackMD);
864 // See if the new cloned callsite context matches this profiled context.
865 if (haveCommonPrefix(MIBStackContext: StackMD, CallsiteStackContext: ClonedCallsiteMD))
866 // Add it to the cloned call's MIB list.
867 NewMIBList.push_back(x: MIB);
868 }
869 if (NewMIBList.empty()) {
870 removeMemProfMetadata(Call: ClonedCall);
871 removeCallsiteMetadata(Call: ClonedCall);
872 return;
873 }
874 if (NewMIBList.size() < OrigMemProfMD->getNumOperands())
875 updateMemprofMetadata(CI: ClonedCall, MIBList: NewMIBList);
876}
877
878// Update memprof related metadata (!memprof and !callsite) based on the
879// inlining of Callee into the callsite at CB. The updates include merging the
880// inlined callee's callsite metadata with that of the inlined call,
881// and moving the subset of any memprof contexts to the inlined callee
882// allocations if they match the new inlined call stack.
883static void
884propagateMemProfMetadata(Function *Callee, CallBase &CB,
885 bool ContainsMemProfMetadata,
886 const ValueMap<const Value *, WeakTrackingVH> &VMap) {
887 MDNode *CallsiteMD = CB.getMetadata(KindID: LLVMContext::MD_callsite);
888 // Only need to update if the inlined callsite had callsite metadata, or if
889 // there was any memprof metadata inlined.
890 if (!CallsiteMD && !ContainsMemProfMetadata)
891 return;
892
893 // Propagate metadata onto the cloned calls in the inlined callee.
894 for (const auto &Entry : VMap) {
895 // See if this is a call that has been inlined and remapped, and not
896 // simplified away in the process.
897 auto *OrigCall = dyn_cast_or_null<CallBase>(Val: Entry.first);
898 auto *ClonedCall = dyn_cast_or_null<CallBase>(Val: Entry.second);
899 if (!OrigCall || !ClonedCall)
900 continue;
901 // If the inlined callsite did not have any callsite metadata, then it isn't
902 // involved in any profiled call contexts, and we can remove any memprof
903 // metadata on the cloned call.
904 if (!CallsiteMD) {
905 removeMemProfMetadata(Call: ClonedCall);
906 removeCallsiteMetadata(Call: ClonedCall);
907 continue;
908 }
909 propagateMemProfHelper(OrigCall, ClonedCall, InlinedCallsiteMD: CallsiteMD);
910 }
911}
912
913/// When inlining a call site that has !llvm.mem.parallel_loop_access,
914/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
915/// be propagated to all memory-accessing cloned instructions.
916static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
917 Function::iterator FEnd) {
918 MDNode *MemParallelLoopAccess =
919 CB.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
920 MDNode *AccessGroup = CB.getMetadata(KindID: LLVMContext::MD_access_group);
921 MDNode *AliasScope = CB.getMetadata(KindID: LLVMContext::MD_alias_scope);
922 MDNode *NoAlias = CB.getMetadata(KindID: LLVMContext::MD_noalias);
923 if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
924 return;
925
926 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
927 for (Instruction &I : BB) {
928 // This metadata is only relevant for instructions that access memory.
929 if (!I.mayReadOrWriteMemory())
930 continue;
931
932 if (MemParallelLoopAccess) {
933 // TODO: This probably should not overwrite MemParalleLoopAccess.
934 MemParallelLoopAccess = MDNode::concatenate(
935 A: I.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access),
936 B: MemParallelLoopAccess);
937 I.setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access,
938 Node: MemParallelLoopAccess);
939 }
940
941 if (AccessGroup)
942 I.setMetadata(KindID: LLVMContext::MD_access_group, Node: uniteAccessGroups(
943 AccGroups1: I.getMetadata(KindID: LLVMContext::MD_access_group), AccGroups2: AccessGroup));
944
945 if (AliasScope)
946 I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MDNode::concatenate(
947 A: I.getMetadata(KindID: LLVMContext::MD_alias_scope), B: AliasScope));
948
949 if (NoAlias)
950 I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::concatenate(
951 A: I.getMetadata(KindID: LLVMContext::MD_noalias), B: NoAlias));
952 }
953 }
954}
955
956/// Bundle operands of the inlined function must be added to inlined call sites.
957static void PropagateOperandBundles(Function::iterator InlinedBB,
958 Instruction *CallSiteEHPad) {
959 for (Instruction &II : llvm::make_early_inc_range(Range&: *InlinedBB)) {
960 CallBase *I = dyn_cast<CallBase>(Val: &II);
961 if (!I)
962 continue;
963 // Skip call sites which already have a "funclet" bundle.
964 if (I->getOperandBundle(ID: LLVMContext::OB_funclet))
965 continue;
966 // Skip call sites which are nounwind intrinsics (as long as they don't
967 // lower into regular function calls in the course of IR transformations).
968 auto *CalledFn =
969 dyn_cast<Function>(Val: I->getCalledOperand()->stripPointerCasts());
970 if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() &&
971 !IntrinsicInst::mayLowerToFunctionCall(IID: CalledFn->getIntrinsicID()))
972 continue;
973
974 SmallVector<OperandBundleDef, 1> OpBundles;
975 I->getOperandBundlesAsDefs(Defs&: OpBundles);
976 OpBundles.emplace_back(Args: "funclet", Args&: CallSiteEHPad);
977
978 Instruction *NewInst = CallBase::Create(CB: I, Bundles: OpBundles, InsertPt: I->getIterator());
979 NewInst->takeName(V: I);
980 I->replaceAllUsesWith(V: NewInst);
981 I->eraseFromParent();
982 }
983}
984
985namespace {
986/// Utility for cloning !noalias and !alias.scope metadata. When a code region
987/// using scoped alias metadata is inlined, the aliasing relationships may not
988/// hold between the two version. It is necessary to create a deep clone of the
989/// metadata, putting the two versions in separate scope domains.
990class ScopedAliasMetadataDeepCloner {
991 using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;
992 SetVector<const MDNode *> MD;
993 MetadataMap MDMap;
994 void addRecursiveMetadataUses();
995
996public:
997 ScopedAliasMetadataDeepCloner(const Function *F);
998
999 /// Create a new clone of the scoped alias metadata, which will be used by
1000 /// subsequent remap() calls.
1001 void clone();
1002
1003 /// Remap instructions in the given range from the original to the cloned
1004 /// metadata.
1005 void remap(Function::iterator FStart, Function::iterator FEnd);
1006};
1007} // namespace
1008
1009ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
1010 const Function *F) {
1011 for (const BasicBlock &BB : *F) {
1012 for (const Instruction &I : BB) {
1013 if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1014 MD.insert(X: M);
1015 if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1016 MD.insert(X: M);
1017
1018 // We also need to clone the metadata in noalias intrinsics.
1019 if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1020 MD.insert(X: Decl->getScopeList());
1021 }
1022 }
1023 addRecursiveMetadataUses();
1024}
1025
1026void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {
1027 SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
1028 while (!Queue.empty()) {
1029 const MDNode *M = cast<MDNode>(Val: Queue.pop_back_val());
1030 for (const Metadata *Op : M->operands())
1031 if (const MDNode *OpMD = dyn_cast<MDNode>(Val: Op))
1032 if (MD.insert(X: OpMD))
1033 Queue.push_back(Elt: OpMD);
1034 }
1035}
1036
1037void ScopedAliasMetadataDeepCloner::clone() {
1038 assert(MDMap.empty() && "clone() already called ?");
1039
1040 SmallVector<TempMDTuple, 16> DummyNodes;
1041 for (const MDNode *I : MD) {
1042 DummyNodes.push_back(Elt: MDTuple::getTemporary(Context&: I->getContext(), MDs: std::nullopt));
1043 MDMap[I].reset(MD: DummyNodes.back().get());
1044 }
1045
1046 // Create new metadata nodes to replace the dummy nodes, replacing old
1047 // metadata references with either a dummy node or an already-created new
1048 // node.
1049 SmallVector<Metadata *, 4> NewOps;
1050 for (const MDNode *I : MD) {
1051 for (const Metadata *Op : I->operands()) {
1052 if (const MDNode *M = dyn_cast<MDNode>(Val: Op))
1053 NewOps.push_back(Elt: MDMap[M]);
1054 else
1055 NewOps.push_back(Elt: const_cast<Metadata *>(Op));
1056 }
1057
1058 MDNode *NewM = MDNode::get(Context&: I->getContext(), MDs: NewOps);
1059 MDTuple *TempM = cast<MDTuple>(Val&: MDMap[I]);
1060 assert(TempM->isTemporary() && "Expected temporary node");
1061
1062 TempM->replaceAllUsesWith(MD: NewM);
1063 NewOps.clear();
1064 }
1065}
1066
1067void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
1068 Function::iterator FEnd) {
1069 if (MDMap.empty())
1070 return; // Nothing to do.
1071
1072 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
1073 for (Instruction &I : BB) {
1074 // TODO: The null checks for the MDMap.lookup() results should no longer
1075 // be necessary.
1076 if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1077 if (MDNode *MNew = MDMap.lookup(Val: M))
1078 I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MNew);
1079
1080 if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1081 if (MDNode *MNew = MDMap.lookup(Val: M))
1082 I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MNew);
1083
1084 if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1085 if (MDNode *MNew = MDMap.lookup(Val: Decl->getScopeList()))
1086 Decl->setScopeList(MNew);
1087 }
1088 }
1089}
1090
1091/// If the inlined function has noalias arguments,
1092/// then add new alias scopes for each noalias argument, tag the mapped noalias
1093/// parameters with noalias metadata specifying the new scope, and tag all
1094/// non-derived loads, stores and memory intrinsics with the new alias scopes.
1095static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
1096 const DataLayout &DL, AAResults *CalleeAAR,
1097 ClonedCodeInfo &InlinedFunctionInfo) {
1098 if (!EnableNoAliasConversion)
1099 return;
1100
1101 const Function *CalledFunc = CB.getCalledFunction();
1102 SmallVector<const Argument *, 4> NoAliasArgs;
1103
1104 for (const Argument &Arg : CalledFunc->args())
1105 if (CB.paramHasAttr(ArgNo: Arg.getArgNo(), Kind: Attribute::NoAlias) && !Arg.use_empty())
1106 NoAliasArgs.push_back(Elt: &Arg);
1107
1108 if (NoAliasArgs.empty())
1109 return;
1110
1111 // To do a good job, if a noalias variable is captured, we need to know if
1112 // the capture point dominates the particular use we're considering.
1113 DominatorTree DT;
1114 DT.recalculate(Func&: const_cast<Function&>(*CalledFunc));
1115
1116 // noalias indicates that pointer values based on the argument do not alias
1117 // pointer values which are not based on it. So we add a new "scope" for each
1118 // noalias function argument. Accesses using pointers based on that argument
1119 // become part of that alias scope, accesses using pointers not based on that
1120 // argument are tagged as noalias with that scope.
1121
1122 DenseMap<const Argument *, MDNode *> NewScopes;
1123 MDBuilder MDB(CalledFunc->getContext());
1124
1125 // Create a new scope domain for this function.
1126 MDNode *NewDomain =
1127 MDB.createAnonymousAliasScopeDomain(Name: CalledFunc->getName());
1128 for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
1129 const Argument *A = NoAliasArgs[i];
1130
1131 std::string Name = std::string(CalledFunc->getName());
1132 if (A->hasName()) {
1133 Name += ": %";
1134 Name += A->getName();
1135 } else {
1136 Name += ": argument ";
1137 Name += utostr(X: i);
1138 }
1139
1140 // Note: We always create a new anonymous root here. This is true regardless
1141 // of the linkage of the callee because the aliasing "scope" is not just a
1142 // property of the callee, but also all control dependencies in the caller.
1143 MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name);
1144 NewScopes.insert(KV: std::make_pair(x&: A, y&: NewScope));
1145
1146 if (UseNoAliasIntrinsic) {
1147 // Introduce a llvm.experimental.noalias.scope.decl for the noalias
1148 // argument.
1149 MDNode *AScopeList = MDNode::get(Context&: CalledFunc->getContext(), MDs: NewScope);
1150 auto *NoAliasDecl =
1151 IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(ScopeTag: AScopeList);
1152 // Ignore the result for now. The result will be used when the
1153 // llvm.noalias intrinsic is introduced.
1154 (void)NoAliasDecl;
1155 }
1156 }
1157
1158 // Iterate over all new instructions in the map; for all memory-access
1159 // instructions, add the alias scope metadata.
1160 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
1161 VMI != VMIE; ++VMI) {
1162 if (const Instruction *I = dyn_cast<Instruction>(Val: VMI->first)) {
1163 if (!VMI->second)
1164 continue;
1165
1166 Instruction *NI = dyn_cast<Instruction>(Val&: VMI->second);
1167 if (!NI || InlinedFunctionInfo.isSimplified(From: I, To: NI))
1168 continue;
1169
1170 bool IsArgMemOnlyCall = false, IsFuncCall = false;
1171 SmallVector<const Value *, 2> PtrArgs;
1172
1173 if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I))
1174 PtrArgs.push_back(Elt: LI->getPointerOperand());
1175 else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1176 PtrArgs.push_back(Elt: SI->getPointerOperand());
1177 else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(Val: I))
1178 PtrArgs.push_back(Elt: VAAI->getPointerOperand());
1179 else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Val: I))
1180 PtrArgs.push_back(Elt: CXI->getPointerOperand());
1181 else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Val: I))
1182 PtrArgs.push_back(Elt: RMWI->getPointerOperand());
1183 else if (const auto *Call = dyn_cast<CallBase>(Val: I)) {
1184 // If we know that the call does not access memory, then we'll still
1185 // know that about the inlined clone of this call site, and we don't
1186 // need to add metadata.
1187 if (Call->doesNotAccessMemory())
1188 continue;
1189
1190 IsFuncCall = true;
1191 if (CalleeAAR) {
1192 MemoryEffects ME = CalleeAAR->getMemoryEffects(Call);
1193
1194 // We'll retain this knowledge without additional metadata.
1195 if (ME.onlyAccessesInaccessibleMem())
1196 continue;
1197
1198 if (ME.onlyAccessesArgPointees())
1199 IsArgMemOnlyCall = true;
1200 }
1201
1202 for (Value *Arg : Call->args()) {
1203 // Only care about pointer arguments. If a noalias argument is
1204 // accessed through a non-pointer argument, it must be captured
1205 // first (e.g. via ptrtoint), and we protect against captures below.
1206 if (!Arg->getType()->isPointerTy())
1207 continue;
1208
1209 PtrArgs.push_back(Elt: Arg);
1210 }
1211 }
1212
1213 // If we found no pointers, then this instruction is not suitable for
1214 // pairing with an instruction to receive aliasing metadata.
1215 // However, if this is a call, this we might just alias with none of the
1216 // noalias arguments.
1217 if (PtrArgs.empty() && !IsFuncCall)
1218 continue;
1219
1220 // It is possible that there is only one underlying object, but you
1221 // need to go through several PHIs to see it, and thus could be
1222 // repeated in the Objects list.
1223 SmallPtrSet<const Value *, 4> ObjSet;
1224 SmallVector<Metadata *, 4> Scopes, NoAliases;
1225
1226 for (const Value *V : PtrArgs) {
1227 SmallVector<const Value *, 4> Objects;
1228 getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
1229
1230 for (const Value *O : Objects)
1231 ObjSet.insert(Ptr: O);
1232 }
1233
1234 // Figure out if we're derived from anything that is not a noalias
1235 // argument.
1236 bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false,
1237 UsesUnknownObject = false;
1238 for (const Value *V : ObjSet) {
1239 // Is this value a constant that cannot be derived from any pointer
1240 // value (we need to exclude constant expressions, for example, that
1241 // are formed from arithmetic on global symbols).
1242 bool IsNonPtrConst = isa<ConstantInt>(Val: V) || isa<ConstantFP>(Val: V) ||
1243 isa<ConstantPointerNull>(Val: V) ||
1244 isa<ConstantDataVector>(Val: V) || isa<UndefValue>(Val: V);
1245 if (IsNonPtrConst)
1246 continue;
1247
1248 // If this is anything other than a noalias argument, then we cannot
1249 // completely describe the aliasing properties using alias.scope
1250 // metadata (and, thus, won't add any).
1251 if (const Argument *A = dyn_cast<Argument>(Val: V)) {
1252 if (!CB.paramHasAttr(ArgNo: A->getArgNo(), Kind: Attribute::NoAlias))
1253 UsesAliasingPtr = true;
1254 } else {
1255 UsesAliasingPtr = true;
1256 }
1257
1258 if (isEscapeSource(V)) {
1259 // An escape source can only alias with a noalias argument if it has
1260 // been captured beforehand.
1261 RequiresNoCaptureBefore = true;
1262 } else if (!isa<Argument>(Val: V) && !isIdentifiedObject(V)) {
1263 // If this is neither an escape source, nor some identified object
1264 // (which cannot directly alias a noalias argument), nor some other
1265 // argument (which, by definition, also cannot alias a noalias
1266 // argument), conservatively do not make any assumptions.
1267 UsesUnknownObject = true;
1268 }
1269 }
1270
1271 // Nothing we can do if the used underlying object cannot be reliably
1272 // determined.
1273 if (UsesUnknownObject)
1274 continue;
1275
1276 // A function call can always get captured noalias pointers (via other
1277 // parameters, globals, etc.).
1278 if (IsFuncCall && !IsArgMemOnlyCall)
1279 RequiresNoCaptureBefore = true;
1280
1281 // First, we want to figure out all of the sets with which we definitely
1282 // don't alias. Iterate over all noalias set, and add those for which:
1283 // 1. The noalias argument is not in the set of objects from which we
1284 // definitely derive.
1285 // 2. The noalias argument has not yet been captured.
1286 // An arbitrary function that might load pointers could see captured
1287 // noalias arguments via other noalias arguments or globals, and so we
1288 // must always check for prior capture.
1289 for (const Argument *A : NoAliasArgs) {
1290 if (ObjSet.contains(Ptr: A))
1291 continue; // May be based on a noalias argument.
1292
1293 // It might be tempting to skip the PointerMayBeCapturedBefore check if
1294 // A->hasNoCaptureAttr() is true, but this is incorrect because
1295 // nocapture only guarantees that no copies outlive the function, not
1296 // that the value cannot be locally captured.
1297 if (!RequiresNoCaptureBefore ||
1298 !PointerMayBeCapturedBefore(V: A, /* ReturnCaptures */ false,
1299 /* StoreCaptures */ false, I, DT: &DT))
1300 NoAliases.push_back(Elt: NewScopes[A]);
1301 }
1302
1303 if (!NoAliases.empty())
1304 NI->setMetadata(KindID: LLVMContext::MD_noalias,
1305 Node: MDNode::concatenate(
1306 A: NI->getMetadata(KindID: LLVMContext::MD_noalias),
1307 B: MDNode::get(Context&: CalledFunc->getContext(), MDs: NoAliases)));
1308
1309 // Next, we want to figure out all of the sets to which we might belong.
1310 // We might belong to a set if the noalias argument is in the set of
1311 // underlying objects. If there is some non-noalias argument in our list
1312 // of underlying objects, then we cannot add a scope because the fact
1313 // that some access does not alias with any set of our noalias arguments
1314 // cannot itself guarantee that it does not alias with this access
1315 // (because there is some pointer of unknown origin involved and the
1316 // other access might also depend on this pointer). We also cannot add
1317 // scopes to arbitrary functions unless we know they don't access any
1318 // non-parameter pointer-values.
1319 bool CanAddScopes = !UsesAliasingPtr;
1320 if (CanAddScopes && IsFuncCall)
1321 CanAddScopes = IsArgMemOnlyCall;
1322
1323 if (CanAddScopes)
1324 for (const Argument *A : NoAliasArgs) {
1325 if (ObjSet.count(Ptr: A))
1326 Scopes.push_back(Elt: NewScopes[A]);
1327 }
1328
1329 if (!Scopes.empty())
1330 NI->setMetadata(
1331 KindID: LLVMContext::MD_alias_scope,
1332 Node: MDNode::concatenate(A: NI->getMetadata(KindID: LLVMContext::MD_alias_scope),
1333 B: MDNode::get(Context&: CalledFunc->getContext(), MDs: Scopes)));
1334 }
1335 }
1336}
1337
1338static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
1339 ReturnInst *End) {
1340
1341 assert(Begin->getParent() == End->getParent() &&
1342 "Expected to be in same basic block!");
1343 auto BeginIt = Begin->getIterator();
1344 assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator");
1345 return !llvm::isGuaranteedToTransferExecutionToSuccessor(
1346 Begin: ++BeginIt, End: End->getIterator(), ScanLimit: InlinerAttributeWindow + 1);
1347}
1348
1349// Add attributes from CB params and Fn attributes that can always be propagated
1350// to the corresponding argument / inner callbases.
1351static void AddParamAndFnBasicAttributes(const CallBase &CB,
1352 ValueToValueMapTy &VMap) {
1353 auto *CalledFunction = CB.getCalledFunction();
1354 auto &Context = CalledFunction->getContext();
1355
1356 // Collect valid attributes for all params.
1357 SmallVector<AttrBuilder> ValidParamAttrs;
1358 bool HasAttrToPropagate = false;
1359
1360 for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
1361 ValidParamAttrs.emplace_back(Args: AttrBuilder{CB.getContext()});
1362 // Access attributes can be propagated to any param with the same underlying
1363 // object as the argument.
1364 if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadNone))
1365 ValidParamAttrs.back().addAttribute(Val: Attribute::ReadNone);
1366 if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadOnly))
1367 ValidParamAttrs.back().addAttribute(Val: Attribute::ReadOnly);
1368 HasAttrToPropagate |= ValidParamAttrs.back().hasAttributes();
1369 }
1370
1371 // Won't be able to propagate anything.
1372 if (!HasAttrToPropagate)
1373 return;
1374
1375 for (BasicBlock &BB : *CalledFunction) {
1376 for (Instruction &Ins : BB) {
1377 const auto *InnerCB = dyn_cast<CallBase>(Val: &Ins);
1378 if (!InnerCB)
1379 continue;
1380 auto *NewInnerCB = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: InnerCB));
1381 if (!NewInnerCB)
1382 continue;
1383 AttributeList AL = NewInnerCB->getAttributes();
1384 for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
1385 // Check if the underlying value for the parameter is an argument.
1386 const Value *UnderlyingV =
1387 getUnderlyingObject(V: InnerCB->getArgOperand(i: I));
1388 const Argument *Arg = dyn_cast<Argument>(Val: UnderlyingV);
1389 if (!Arg)
1390 continue;
1391
1392 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ByVal))
1393 // It's unsound to propagate memory attributes to byval arguments.
1394 // Even if CalledFunction doesn't e.g. write to the argument,
1395 // the call to NewInnerCB may write to its by-value copy.
1396 continue;
1397
1398 unsigned ArgNo = Arg->getArgNo();
1399 // If so, propagate its access attributes.
1400 AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: ValidParamAttrs[ArgNo]);
1401 // We can have conflicting attributes from the inner callsite and
1402 // to-be-inlined callsite. In that case, choose the most
1403 // restrictive.
1404
1405 // readonly + writeonly means we can never deref so make readnone.
1406 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) &&
1407 AL.hasParamAttr(ArgNo: I, Kind: Attribute::WriteOnly))
1408 AL = AL.addParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadNone);
1409
1410 // If have readnone, need to clear readonly/writeonly
1411 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone)) {
1412 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadOnly);
1413 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::WriteOnly);
1414 }
1415
1416 // Writable cannot exist in conjunction w/ readonly/readnone
1417 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) ||
1418 AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone))
1419 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::Writable);
1420 }
1421 NewInnerCB->setAttributes(AL);
1422 }
1423 }
1424}
1425
1426// Only allow these white listed attributes to be propagated back to the
1427// callee. This is because other attributes may only be valid on the call
1428// itself, i.e. attributes such as signext and zeroext.
1429
1430// Attributes that are always okay to propagate as if they are violated its
1431// immediate UB.
1432static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) {
1433 AttrBuilder Valid(CB.getContext());
1434 if (auto DerefBytes = CB.getRetDereferenceableBytes())
1435 Valid.addDereferenceableAttr(Bytes: DerefBytes);
1436 if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes())
1437 Valid.addDereferenceableOrNullAttr(Bytes: DerefOrNullBytes);
1438 if (CB.hasRetAttr(Kind: Attribute::NoAlias))
1439 Valid.addAttribute(Val: Attribute::NoAlias);
1440 if (CB.hasRetAttr(Kind: Attribute::NoUndef))
1441 Valid.addAttribute(Val: Attribute::NoUndef);
1442 return Valid;
1443}
1444
1445// Attributes that need additional checks as propagating them may change
1446// behavior or cause new UB.
1447static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {
1448 AttrBuilder Valid(CB.getContext());
1449 if (CB.hasRetAttr(Kind: Attribute::NonNull))
1450 Valid.addAttribute(Val: Attribute::NonNull);
1451 if (CB.hasRetAttr(Kind: Attribute::Alignment))
1452 Valid.addAlignmentAttr(Align: CB.getRetAlign());
1453 if (std::optional<ConstantRange> Range = CB.getRange())
1454 Valid.addRangeAttr(CR: *Range);
1455 return Valid;
1456}
1457
1458static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
1459 AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);
1460 AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
1461 if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes())
1462 return;
1463 auto *CalledFunction = CB.getCalledFunction();
1464 auto &Context = CalledFunction->getContext();
1465
1466 for (auto &BB : *CalledFunction) {
1467 auto *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator());
1468 if (!RI || !isa<CallBase>(Val: RI->getOperand(i_nocapture: 0)))
1469 continue;
1470 auto *RetVal = cast<CallBase>(Val: RI->getOperand(i_nocapture: 0));
1471 // Check that the cloned RetVal exists and is a call, otherwise we cannot
1472 // add the attributes on the cloned RetVal. Simplification during inlining
1473 // could have transformed the cloned instruction.
1474 auto *NewRetVal = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: RetVal));
1475 if (!NewRetVal)
1476 continue;
1477 // Backward propagation of attributes to the returned value may be incorrect
1478 // if it is control flow dependent.
1479 // Consider:
1480 // @callee {
1481 // %rv = call @foo()
1482 // %rv2 = call @bar()
1483 // if (%rv2 != null)
1484 // return %rv2
1485 // if (%rv == null)
1486 // exit()
1487 // return %rv
1488 // }
1489 // caller() {
1490 // %val = call nonnull @callee()
1491 // }
1492 // Here we cannot add the nonnull attribute on either foo or bar. So, we
1493 // limit the check to both RetVal and RI are in the same basic block and
1494 // there are no throwing/exiting instructions between these instructions.
1495 if (RI->getParent() != RetVal->getParent() ||
1496 MayContainThrowingOrExitingCallAfterCB(Begin: RetVal, End: RI))
1497 continue;
1498 // Add to the existing attributes of NewRetVal, i.e. the cloned call
1499 // instruction.
1500 // NB! When we have the same attribute already existing on NewRetVal, but
1501 // with a differing value, the AttributeList's merge API honours the already
1502 // existing attribute value (i.e. attributes such as dereferenceable,
1503 // dereferenceable_or_null etc). See AttrBuilder::merge for more details.
1504 AttributeList AL = NewRetVal->getAttributes();
1505 if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes())
1506 ValidUB.removeAttribute(Val: Attribute::Dereferenceable);
1507 if (ValidUB.getDereferenceableOrNullBytes() <
1508 AL.getRetDereferenceableOrNullBytes())
1509 ValidUB.removeAttribute(Val: Attribute::DereferenceableOrNull);
1510 AttributeList NewAL = AL.addRetAttributes(C&: Context, B: ValidUB);
1511 // Attributes that may generate poison returns are a bit tricky. If we
1512 // propagate them, other uses of the callsite might have their behavior
1513 // change or cause UB (if they have noundef) b.c of the new potential
1514 // poison.
1515 // Take the following three cases:
1516 //
1517 // 1)
1518 // define nonnull ptr @foo() {
1519 // %p = call ptr @bar()
1520 // call void @use(ptr %p) willreturn nounwind
1521 // ret ptr %p
1522 // }
1523 //
1524 // 2)
1525 // define noundef nonnull ptr @foo() {
1526 // %p = call ptr @bar()
1527 // call void @use(ptr %p) willreturn nounwind
1528 // ret ptr %p
1529 // }
1530 //
1531 // 3)
1532 // define nonnull ptr @foo() {
1533 // %p = call noundef ptr @bar()
1534 // ret ptr %p
1535 // }
1536 //
1537 // In case 1, we can't propagate nonnull because poison value in @use may
1538 // change behavior or trigger UB.
1539 // In case 2, we don't need to be concerned about propagating nonnull, as
1540 // any new poison at @use will trigger UB anyways.
1541 // In case 3, we can never propagate nonnull because it may create UB due to
1542 // the noundef on @bar.
1543 if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne())
1544 ValidPG.removeAttribute(Val: Attribute::Alignment);
1545 if (ValidPG.hasAttributes()) {
1546 Attribute CBRange = ValidPG.getAttribute(Kind: Attribute::Range);
1547 if (CBRange.isValid()) {
1548 Attribute NewRange = AL.getRetAttr(Kind: Attribute::Range);
1549 if (NewRange.isValid()) {
1550 ValidPG.addRangeAttr(
1551 CR: CBRange.getRange().intersectWith(CR: NewRange.getRange()));
1552 }
1553 }
1554 // Three checks.
1555 // If the callsite has `noundef`, then a poison due to violating the
1556 // return attribute will create UB anyways so we can always propagate.
1557 // Otherwise, if the return value (callee to be inlined) has `noundef`, we
1558 // can't propagate as a new poison return will cause UB.
1559 // Finally, check if the return value has no uses whose behavior may
1560 // change/may cause UB if we potentially return poison. At the moment this
1561 // is implemented overly conservatively with a single-use check.
1562 // TODO: Update the single-use check to iterate through uses and only bail
1563 // if we have a potentially dangerous use.
1564
1565 if (CB.hasRetAttr(Kind: Attribute::NoUndef) ||
1566 (RetVal->hasOneUse() && !RetVal->hasRetAttr(Kind: Attribute::NoUndef)))
1567 NewAL = NewAL.addRetAttributes(C&: Context, B: ValidPG);
1568 }
1569 NewRetVal->setAttributes(NewAL);
1570 }
1571}
1572
1573/// If the inlined function has non-byval align arguments, then
1574/// add @llvm.assume-based alignment assumptions to preserve this information.
1575static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
1576 if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
1577 return;
1578
1579 AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller());
1580 auto &DL = CB.getDataLayout();
1581
1582 // To avoid inserting redundant assumptions, we should check for assumptions
1583 // already in the caller. To do this, we might need a DT of the caller.
1584 DominatorTree DT;
1585 bool DTCalculated = false;
1586
1587 Function *CalledFunc = CB.getCalledFunction();
1588 for (Argument &Arg : CalledFunc->args()) {
1589 if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() ||
1590 Arg.hasNUses(N: 0))
1591 continue;
1592 MaybeAlign Alignment = Arg.getParamAlign();
1593 if (!Alignment)
1594 continue;
1595
1596 if (!DTCalculated) {
1597 DT.recalculate(Func&: *CB.getCaller());
1598 DTCalculated = true;
1599 }
1600 // If we can already prove the asserted alignment in the context of the
1601 // caller, then don't bother inserting the assumption.
1602 Value *ArgVal = CB.getArgOperand(i: Arg.getArgNo());
1603 if (getKnownAlignment(V: ArgVal, DL, CxtI: &CB, AC, DT: &DT) >= *Alignment)
1604 continue;
1605
1606 CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption(
1607 DL, PtrValue: ArgVal, Alignment: Alignment->value());
1608 AC->registerAssumption(CI: cast<AssumeInst>(Val: NewAsmp));
1609 }
1610}
1611
1612static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
1613 Module *M, BasicBlock *InsertBlock,
1614 InlineFunctionInfo &IFI,
1615 Function *CalledFunc) {
1616 IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
1617
1618 Value *Size =
1619 Builder.getInt64(C: M->getDataLayout().getTypeStoreSize(Ty: ByValType));
1620
1621 // Always generate a memcpy of alignment 1 here because we don't know
1622 // the alignment of the src pointer. Other optimizations can infer
1623 // better alignment.
1624 CallInst *CI = Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,
1625 /*SrcAlign*/ Align(1), Size);
1626
1627 // The verifier requires that all calls of debug-info-bearing functions
1628 // from debug-info-bearing functions have a debug location (for inlining
1629 // purposes). Assign a dummy location to satisfy the constraint.
1630 if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram())
1631 if (DISubprogram *SP = CalledFunc->getSubprogram())
1632 CI->setDebugLoc(DILocation::get(Context&: SP->getContext(), Line: 0, Column: 0, Scope: SP));
1633}
1634
1635/// When inlining a call site that has a byval argument,
1636/// we have to make the implicit memcpy explicit by adding it.
1637static Value *HandleByValArgument(Type *ByValType, Value *Arg,
1638 Instruction *TheCall,
1639 const Function *CalledFunc,
1640 InlineFunctionInfo &IFI,
1641 MaybeAlign ByValAlignment) {
1642 Function *Caller = TheCall->getFunction();
1643 const DataLayout &DL = Caller->getDataLayout();
1644
1645 // If the called function is readonly, then it could not mutate the caller's
1646 // copy of the byval'd memory. In this case, it is safe to elide the copy and
1647 // temporary.
1648 if (CalledFunc->onlyReadsMemory()) {
1649 // If the byval argument has a specified alignment that is greater than the
1650 // passed in pointer, then we either have to round up the input pointer or
1651 // give up on this transformation.
1652 if (ByValAlignment.valueOrOne() == 1)
1653 return Arg;
1654
1655 AssumptionCache *AC =
1656 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
1657
1658 // If the pointer is already known to be sufficiently aligned, or if we can
1659 // round it up to a larger alignment, then we don't need a temporary.
1660 if (getOrEnforceKnownAlignment(V: Arg, PrefAlign: *ByValAlignment, DL, CxtI: TheCall, AC) >=
1661 *ByValAlignment)
1662 return Arg;
1663
1664 // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
1665 // for code quality, but rarely happens and is required for correctness.
1666 }
1667
1668 // Create the alloca. If we have DataLayout, use nice alignment.
1669 Align Alignment = DL.getPrefTypeAlign(Ty: ByValType);
1670
1671 // If the byval had an alignment specified, we *must* use at least that
1672 // alignment, as it is required by the byval argument (and uses of the
1673 // pointer inside the callee).
1674 if (ByValAlignment)
1675 Alignment = std::max(a: Alignment, b: *ByValAlignment);
1676
1677 AllocaInst *NewAlloca =
1678 new AllocaInst(ByValType, Arg->getType()->getPointerAddressSpace(),
1679 nullptr, Alignment, Arg->getName());
1680 NewAlloca->insertBefore(InsertPos: Caller->begin()->begin());
1681 IFI.StaticAllocas.push_back(Elt: NewAlloca);
1682
1683 // Uses of the argument in the function should use our new alloca
1684 // instead.
1685 return NewAlloca;
1686}
1687
1688// Check whether this Value is used by a lifetime intrinsic.
1689static bool isUsedByLifetimeMarker(Value *V) {
1690 for (User *U : V->users())
1691 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U))
1692 if (II->isLifetimeStartOrEnd())
1693 return true;
1694 return false;
1695}
1696
1697// Check whether the given alloca already has
1698// lifetime.start or lifetime.end intrinsics.
1699static bool hasLifetimeMarkers(AllocaInst *AI) {
1700 Type *Ty = AI->getType();
1701 Type *Int8PtrTy =
1702 PointerType::get(C&: Ty->getContext(), AddressSpace: Ty->getPointerAddressSpace());
1703 if (Ty == Int8PtrTy)
1704 return isUsedByLifetimeMarker(V: AI);
1705
1706 // Do a scan to find all the casts to i8*.
1707 for (User *U : AI->users()) {
1708 if (U->getType() != Int8PtrTy) continue;
1709 if (U->stripPointerCasts() != AI) continue;
1710 if (isUsedByLifetimeMarker(V: U))
1711 return true;
1712 }
1713 return false;
1714}
1715
1716/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
1717/// block. Allocas used in inalloca calls and allocas of dynamic array size
1718/// cannot be static.
1719static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
1720 return isa<Constant>(Val: AI->getArraySize()) && !AI->isUsedWithInAlloca();
1721}
1722
1723/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
1724/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
1725static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
1726 LLVMContext &Ctx,
1727 DenseMap<const MDNode *, MDNode *> &IANodes) {
1728 auto IA = DebugLoc::appendInlinedAt(DL: OrigDL, InlinedAt, Ctx, Cache&: IANodes);
1729 return DILocation::get(Context&: Ctx, Line: OrigDL.getLine(), Column: OrigDL.getCol(),
1730 Scope: OrigDL.getScope(), InlinedAt: IA);
1731}
1732
1733/// Update inlined instructions' line numbers to
1734/// to encode location where these instructions are inlined.
1735static void fixupLineNumbers(Function *Fn, Function::iterator FI,
1736 Instruction *TheCall, bool CalleeHasDebugInfo) {
1737 const DebugLoc &TheCallDL = TheCall->getDebugLoc();
1738 if (!TheCallDL)
1739 return;
1740
1741 auto &Ctx = Fn->getContext();
1742 DILocation *InlinedAtNode = TheCallDL;
1743
1744 // Create a unique call site, not to be confused with any other call from the
1745 // same location.
1746 InlinedAtNode = DILocation::getDistinct(
1747 Context&: Ctx, Line: InlinedAtNode->getLine(), Column: InlinedAtNode->getColumn(),
1748 Scope: InlinedAtNode->getScope(), InlinedAt: InlinedAtNode->getInlinedAt());
1749
1750 // Cache the inlined-at nodes as they're built so they are reused, without
1751 // this every instruction's inlined-at chain would become distinct from each
1752 // other.
1753 DenseMap<const MDNode *, MDNode *> IANodes;
1754
1755 // Check if we are not generating inline line tables and want to use
1756 // the call site location instead.
1757 bool NoInlineLineTables = Fn->hasFnAttribute(Kind: "no-inline-line-tables");
1758
1759 // Helper-util for updating the metadata attached to an instruction.
1760 auto UpdateInst = [&](Instruction &I) {
1761 // Loop metadata needs to be updated so that the start and end locs
1762 // reference inlined-at locations.
1763 auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
1764 &IANodes](Metadata *MD) -> Metadata * {
1765 if (auto *Loc = dyn_cast_or_null<DILocation>(Val: MD))
1766 return inlineDebugLoc(OrigDL: Loc, InlinedAt: InlinedAtNode, Ctx, IANodes).get();
1767 return MD;
1768 };
1769 updateLoopMetadataDebugLocations(I, Updater: updateLoopInfoLoc);
1770
1771 if (!NoInlineLineTables)
1772 if (DebugLoc DL = I.getDebugLoc()) {
1773 DebugLoc IDL =
1774 inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode, Ctx&: I.getContext(), IANodes);
1775 I.setDebugLoc(IDL);
1776 return;
1777 }
1778
1779 if (CalleeHasDebugInfo && !NoInlineLineTables)
1780 return;
1781
1782 // If the inlined instruction has no line number, or if inline info
1783 // is not being generated, make it look as if it originates from the call
1784 // location. This is important for ((__always_inline, __nodebug__))
1785 // functions which must use caller location for all instructions in their
1786 // function body.
1787
1788 // Don't update static allocas, as they may get moved later.
1789 if (auto *AI = dyn_cast<AllocaInst>(Val: &I))
1790 if (allocaWouldBeStaticInEntry(AI))
1791 return;
1792
1793 // Do not force a debug loc for pseudo probes, since they do not need to
1794 // be debuggable, and also they are expected to have a zero/null dwarf
1795 // discriminator at this point which could be violated otherwise.
1796 if (isa<PseudoProbeInst>(Val: I))
1797 return;
1798
1799 I.setDebugLoc(TheCallDL);
1800 };
1801
1802 // Helper-util for updating debug-info records attached to instructions.
1803 auto UpdateDVR = [&](DbgRecord *DVR) {
1804 assert(DVR->getDebugLoc() && "Debug Value must have debug loc");
1805 if (NoInlineLineTables) {
1806 DVR->setDebugLoc(TheCallDL);
1807 return;
1808 }
1809 DebugLoc DL = DVR->getDebugLoc();
1810 DebugLoc IDL =
1811 inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode,
1812 Ctx&: DVR->getMarker()->getParent()->getContext(), IANodes);
1813 DVR->setDebugLoc(IDL);
1814 };
1815
1816 // Iterate over all instructions, updating metadata and debug-info records.
1817 for (; FI != Fn->end(); ++FI) {
1818 for (Instruction &I : *FI) {
1819 UpdateInst(I);
1820 for (DbgRecord &DVR : I.getDbgRecordRange()) {
1821 UpdateDVR(&DVR);
1822 }
1823 }
1824
1825 // Remove debug info intrinsics if we're not keeping inline info.
1826 if (NoInlineLineTables) {
1827 BasicBlock::iterator BI = FI->begin();
1828 while (BI != FI->end()) {
1829 if (isa<DbgInfoIntrinsic>(Val: BI)) {
1830 BI = BI->eraseFromParent();
1831 continue;
1832 } else {
1833 BI->dropDbgRecords();
1834 }
1835 ++BI;
1836 }
1837 }
1838 }
1839}
1840
1841#undef DEBUG_TYPE
1842#define DEBUG_TYPE "assignment-tracking"
1843/// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB.
1844static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL,
1845 const CallBase &CB) {
1846 at::StorageToVarsMap EscapedLocals;
1847 SmallPtrSet<const Value *, 4> SeenBases;
1848
1849 LLVM_DEBUG(
1850 errs() << "# Finding caller local variables escaped by callee\n");
1851 for (const Value *Arg : CB.args()) {
1852 LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n");
1853 if (!Arg->getType()->isPointerTy()) {
1854 LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n");
1855 continue;
1856 }
1857
1858 const Instruction *I = dyn_cast<Instruction>(Val: Arg);
1859 if (!I) {
1860 LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n");
1861 continue;
1862 }
1863
1864 // Walk back to the base storage.
1865 assert(Arg->getType()->isPtrOrPtrVectorTy());
1866 APInt TmpOffset(DL.getIndexTypeSizeInBits(Ty: Arg->getType()), 0, false);
1867 const AllocaInst *Base = dyn_cast<AllocaInst>(
1868 Val: Arg->stripAndAccumulateConstantOffsets(DL, Offset&: TmpOffset, AllowNonInbounds: true));
1869 if (!Base) {
1870 LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n");
1871 continue;
1872 }
1873
1874 assert(Base);
1875 LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n");
1876 // We only need to process each base address once - skip any duplicates.
1877 if (!SeenBases.insert(Ptr: Base).second)
1878 continue;
1879
1880 // Find all local variables associated with the backing storage.
1881 auto CollectAssignsForStorage = [&](auto *DbgAssign) {
1882 // Skip variables from inlined functions - they are not local variables.
1883 if (DbgAssign->getDebugLoc().getInlinedAt())
1884 return;
1885 LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n");
1886 EscapedLocals[Base].insert(X: at::VarRecord(DbgAssign));
1887 };
1888 for_each(Range: at::getAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage);
1889 for_each(Range: at::getDVRAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage);
1890 }
1891 return EscapedLocals;
1892}
1893
1894static void trackInlinedStores(Function::iterator Start, Function::iterator End,
1895 const CallBase &CB) {
1896 LLVM_DEBUG(errs() << "trackInlinedStores into "
1897 << Start->getParent()->getName() << " from "
1898 << CB.getCalledFunction()->getName() << "\n");
1899 std::unique_ptr<DataLayout> DL = std::make_unique<DataLayout>(args: CB.getModule());
1900 at::trackAssignments(Start, End, Vars: collectEscapedLocals(DL: *DL, CB), DL: *DL);
1901}
1902
1903/// Update inlined instructions' DIAssignID metadata. We need to do this
1904/// otherwise a function inlined more than once into the same function
1905/// will cause DIAssignID to be shared by many instructions.
1906static void fixupAssignments(Function::iterator Start, Function::iterator End) {
1907 DenseMap<DIAssignID *, DIAssignID *> Map;
1908 // Loop over all the inlined instructions. If we find a DIAssignID
1909 // attachment or use, replace it with a new version.
1910 for (auto BBI = Start; BBI != End; ++BBI) {
1911 for (Instruction &I : *BBI)
1912 at::remapAssignID(Map, I);
1913 }
1914}
1915#undef DEBUG_TYPE
1916#define DEBUG_TYPE "inline-function"
1917
1918/// Update the block frequencies of the caller after a callee has been inlined.
1919///
1920/// Each block cloned into the caller has its block frequency scaled by the
1921/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
1922/// callee's entry block gets the same frequency as the callsite block and the
1923/// relative frequencies of all cloned blocks remain the same after cloning.
1924static void updateCallerBFI(BasicBlock *CallSiteBlock,
1925 const ValueToValueMapTy &VMap,
1926 BlockFrequencyInfo *CallerBFI,
1927 BlockFrequencyInfo *CalleeBFI,
1928 const BasicBlock &CalleeEntryBlock) {
1929 SmallPtrSet<BasicBlock *, 16> ClonedBBs;
1930 for (auto Entry : VMap) {
1931 if (!isa<BasicBlock>(Val: Entry.first) || !Entry.second)
1932 continue;
1933 auto *OrigBB = cast<BasicBlock>(Val: Entry.first);
1934 auto *ClonedBB = cast<BasicBlock>(Val: Entry.second);
1935 BlockFrequency Freq = CalleeBFI->getBlockFreq(BB: OrigBB);
1936 if (!ClonedBBs.insert(Ptr: ClonedBB).second) {
1937 // Multiple blocks in the callee might get mapped to one cloned block in
1938 // the caller since we prune the callee as we clone it. When that happens,
1939 // we want to use the maximum among the original blocks' frequencies.
1940 BlockFrequency NewFreq = CallerBFI->getBlockFreq(BB: ClonedBB);
1941 if (NewFreq > Freq)
1942 Freq = NewFreq;
1943 }
1944 CallerBFI->setBlockFreq(BB: ClonedBB, Freq);
1945 }
1946 BasicBlock *EntryClone = cast<BasicBlock>(Val: VMap.lookup(Val: &CalleeEntryBlock));
1947 CallerBFI->setBlockFreqAndScale(
1948 ReferenceBB: EntryClone, Freq: CallerBFI->getBlockFreq(BB: CallSiteBlock), BlocksToScale&: ClonedBBs);
1949}
1950
1951/// Update the branch metadata for cloned call instructions.
1952static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
1953 const ProfileCount &CalleeEntryCount,
1954 const CallBase &TheCall, ProfileSummaryInfo *PSI,
1955 BlockFrequencyInfo *CallerBFI) {
1956 if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1)
1957 return;
1958 auto CallSiteCount =
1959 PSI ? PSI->getProfileCount(CallInst: TheCall, BFI: CallerBFI) : std::nullopt;
1960 int64_t CallCount =
1961 std::min(a: CallSiteCount.value_or(u: 0), b: CalleeEntryCount.getCount());
1962 updateProfileCallee(Callee, EntryDelta: -CallCount, VMap: &VMap);
1963}
1964
1965void llvm::updateProfileCallee(
1966 Function *Callee, int64_t EntryDelta,
1967 const ValueMap<const Value *, WeakTrackingVH> *VMap) {
1968 auto CalleeCount = Callee->getEntryCount();
1969 if (!CalleeCount)
1970 return;
1971
1972 const uint64_t PriorEntryCount = CalleeCount->getCount();
1973
1974 // Since CallSiteCount is an estimate, it could exceed the original callee
1975 // count and has to be set to 0 so guard against underflow.
1976 const uint64_t NewEntryCount =
1977 (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount)
1978 ? 0
1979 : PriorEntryCount + EntryDelta;
1980
1981 auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount,
1982 const uint64_t PriorEntryCount) {
1983 Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB);
1984 if (VPtr)
1985 scaleProfData(I&: *VPtr, S: NewEntryCount, T: PriorEntryCount);
1986 };
1987
1988 // During inlining ?
1989 if (VMap) {
1990 uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
1991 for (auto Entry : *VMap) {
1992 if (isa<CallInst>(Val: Entry.first))
1993 if (auto *CI = dyn_cast_or_null<CallInst>(Val: Entry.second)) {
1994 CI->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount);
1995 updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount);
1996 }
1997
1998 if (isa<InvokeInst>(Val: Entry.first))
1999 if (auto *II = dyn_cast_or_null<InvokeInst>(Val: Entry.second)) {
2000 II->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount);
2001 updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount);
2002 }
2003 }
2004 }
2005
2006 if (EntryDelta) {
2007 Callee->setEntryCount(Count: NewEntryCount);
2008
2009 for (BasicBlock &BB : *Callee)
2010 // No need to update the callsite if it is pruned during inlining.
2011 if (!VMap || VMap->count(Val: &BB))
2012 for (Instruction &I : BB) {
2013 if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
2014 CI->updateProfWeight(S: NewEntryCount, T: PriorEntryCount);
2015 updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount);
2016 }
2017 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &I)) {
2018 II->updateProfWeight(S: NewEntryCount, T: PriorEntryCount);
2019 updateVTableProfWeight(II, NewEntryCount, PriorEntryCount);
2020 }
2021 }
2022 }
2023}
2024
2025/// An operand bundle "clang.arc.attachedcall" on a call indicates the call
2026/// result is implicitly consumed by a call to retainRV or claimRV immediately
2027/// after the call. This function inlines the retainRV/claimRV calls.
2028///
2029/// There are three cases to consider:
2030///
2031/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned
2032/// object in the callee return block, the autoreleaseRV call and the
2033/// retainRV/claimRV call in the caller cancel out. If the call in the caller
2034/// is a claimRV call, a call to objc_release is emitted.
2035///
2036/// 2. If there is a call in the callee return block that doesn't have operand
2037/// bundle "clang.arc.attachedcall", the operand bundle on the original call
2038/// is transferred to the call in the callee.
2039///
2040/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is
2041/// a retainRV call.
2042static void
2043inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
2044 const SmallVectorImpl<ReturnInst *> &Returns) {
2045 Module *Mod = CB.getModule();
2046 assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
2047 bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
2048 IsUnsafeClaimRV = !IsRetainRV;
2049
2050 for (auto *RI : Returns) {
2051 Value *RetOpnd = objcarc::GetRCIdentityRoot(V: RI->getOperand(i_nocapture: 0));
2052 bool InsertRetainCall = IsRetainRV;
2053 IRBuilder<> Builder(RI->getContext());
2054
2055 // Walk backwards through the basic block looking for either a matching
2056 // autoreleaseRV call or an unannotated call.
2057 auto InstRange = llvm::make_range(x: ++(RI->getIterator().getReverse()),
2058 y: RI->getParent()->rend());
2059 for (Instruction &I : llvm::make_early_inc_range(Range&: InstRange)) {
2060 // Ignore casts.
2061 if (isa<CastInst>(Val: I))
2062 continue;
2063
2064 if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) {
2065 if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue ||
2066 !II->hasNUses(N: 0) ||
2067 objcarc::GetRCIdentityRoot(V: II->getOperand(i_nocapture: 0)) != RetOpnd)
2068 break;
2069
2070 // If we've found a matching authoreleaseRV call:
2071 // - If claimRV is attached to the call, insert a call to objc_release
2072 // and erase the autoreleaseRV call.
2073 // - If retainRV is attached to the call, just erase the autoreleaseRV
2074 // call.
2075 if (IsUnsafeClaimRV) {
2076 Builder.SetInsertPoint(II);
2077 Function *IFn =
2078 Intrinsic::getDeclaration(M: Mod, id: Intrinsic::objc_release);
2079 Builder.CreateCall(Callee: IFn, Args: RetOpnd, Name: "");
2080 }
2081 II->eraseFromParent();
2082 InsertRetainCall = false;
2083 break;
2084 }
2085
2086 auto *CI = dyn_cast<CallInst>(Val: &I);
2087
2088 if (!CI)
2089 break;
2090
2091 if (objcarc::GetRCIdentityRoot(V: CI) != RetOpnd ||
2092 objcarc::hasAttachedCallOpBundle(CB: CI))
2093 break;
2094
2095 // If we've found an unannotated call that defines RetOpnd, add a
2096 // "clang.arc.attachedcall" operand bundle.
2097 Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(CB: &CB)};
2098 OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
2099 auto *NewCall = CallBase::addOperandBundle(
2100 CB: CI, ID: LLVMContext::OB_clang_arc_attachedcall, OB, InsertPt: CI->getIterator());
2101 NewCall->copyMetadata(SrcInst: *CI);
2102 CI->replaceAllUsesWith(V: NewCall);
2103 CI->eraseFromParent();
2104 InsertRetainCall = false;
2105 break;
2106 }
2107
2108 if (InsertRetainCall) {
2109 // The retainRV is attached to the call and we've failed to find a
2110 // matching autoreleaseRV or an annotated call in the callee. Emit a call
2111 // to objc_retain.
2112 Builder.SetInsertPoint(RI);
2113 Function *IFn = Intrinsic::getDeclaration(M: Mod, id: Intrinsic::objc_retain);
2114 Builder.CreateCall(Callee: IFn, Args: RetOpnd, Name: "");
2115 }
2116 }
2117}
2118
2119/// This function inlines the called function into the basic block of the
2120/// caller. This returns false if it is not possible to inline this call.
2121/// The program is still in a well defined state if this occurs though.
2122///
2123/// Note that this only does one level of inlining. For example, if the
2124/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
2125/// exists in the instruction stream. Similarly this will inline a recursive
2126/// function by one level.
2127llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
2128 bool MergeAttributes,
2129 AAResults *CalleeAAR,
2130 bool InsertLifetime,
2131 Function *ForwardVarArgsTo) {
2132 assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
2133
2134 // FIXME: we don't inline callbr yet.
2135 if (isa<CallBrInst>(Val: CB))
2136 return InlineResult::failure(Reason: "We don't inline callbr yet.");
2137
2138 // If IFI has any state in it, zap it before we fill it in.
2139 IFI.reset();
2140
2141 Function *CalledFunc = CB.getCalledFunction();
2142 if (!CalledFunc || // Can't inline external function or indirect
2143 CalledFunc->isDeclaration()) // call!
2144 return InlineResult::failure(Reason: "external or indirect");
2145
2146 // The inliner does not know how to inline through calls with operand bundles
2147 // in general ...
2148 Value *ConvergenceControlToken = nullptr;
2149 if (CB.hasOperandBundles()) {
2150 for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
2151 auto OBUse = CB.getOperandBundleAt(Index: i);
2152 uint32_t Tag = OBUse.getTagID();
2153 // ... but it knows how to inline through "deopt" operand bundles ...
2154 if (Tag == LLVMContext::OB_deopt)
2155 continue;
2156 // ... and "funclet" operand bundles.
2157 if (Tag == LLVMContext::OB_funclet)
2158 continue;
2159 if (Tag == LLVMContext::OB_clang_arc_attachedcall)
2160 continue;
2161 if (Tag == LLVMContext::OB_kcfi)
2162 continue;
2163 if (Tag == LLVMContext::OB_convergencectrl) {
2164 ConvergenceControlToken = OBUse.Inputs[0].get();
2165 continue;
2166 }
2167
2168 return InlineResult::failure(Reason: "unsupported operand bundle");
2169 }
2170 }
2171
2172 // FIXME: The check below is redundant and incomplete. According to spec, if a
2173 // convergent call is missing a token, then the caller is using uncontrolled
2174 // convergence. If the callee has an entry intrinsic, then the callee is using
2175 // controlled convergence, and the call cannot be inlined. A proper
2176 // implemenation of this check requires a whole new analysis that identifies
2177 // convergence in every function. For now, we skip that and just do this one
2178 // cursory check. The underlying assumption is that in a compiler flow that
2179 // fully implements convergence control tokens, there is no mixing of
2180 // controlled and uncontrolled convergent operations in the whole program.
2181 if (CB.isConvergent()) {
2182 auto *I = CalledFunc->getEntryBlock().getFirstNonPHI();
2183 if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(Val: I)) {
2184 if (IntrinsicCall->getIntrinsicID() ==
2185 Intrinsic::experimental_convergence_entry) {
2186 if (!ConvergenceControlToken) {
2187 return InlineResult::failure(
2188 Reason: "convergent call needs convergencectrl operand");
2189 }
2190 }
2191 }
2192 }
2193
2194 // If the call to the callee cannot throw, set the 'nounwind' flag on any
2195 // calls that we inline.
2196 bool MarkNoUnwind = CB.doesNotThrow();
2197
2198 BasicBlock *OrigBB = CB.getParent();
2199 Function *Caller = OrigBB->getParent();
2200
2201 // GC poses two hazards to inlining, which only occur when the callee has GC:
2202 // 1. If the caller has no GC, then the callee's GC must be propagated to the
2203 // caller.
2204 // 2. If the caller has a differing GC, it is invalid to inline.
2205 if (CalledFunc->hasGC()) {
2206 if (!Caller->hasGC())
2207 Caller->setGC(CalledFunc->getGC());
2208 else if (CalledFunc->getGC() != Caller->getGC())
2209 return InlineResult::failure(Reason: "incompatible GC");
2210 }
2211
2212 // Get the personality function from the callee if it contains a landing pad.
2213 Constant *CalledPersonality =
2214 CalledFunc->hasPersonalityFn()
2215 ? CalledFunc->getPersonalityFn()->stripPointerCasts()
2216 : nullptr;
2217
2218 // Find the personality function used by the landing pads of the caller. If it
2219 // exists, then check to see that it matches the personality function used in
2220 // the callee.
2221 Constant *CallerPersonality =
2222 Caller->hasPersonalityFn()
2223 ? Caller->getPersonalityFn()->stripPointerCasts()
2224 : nullptr;
2225 if (CalledPersonality) {
2226 if (!CallerPersonality)
2227 Caller->setPersonalityFn(CalledPersonality);
2228 // If the personality functions match, then we can perform the
2229 // inlining. Otherwise, we can't inline.
2230 // TODO: This isn't 100% true. Some personality functions are proper
2231 // supersets of others and can be used in place of the other.
2232 else if (CalledPersonality != CallerPersonality)
2233 return InlineResult::failure(Reason: "incompatible personality");
2234 }
2235
2236 // We need to figure out which funclet the callsite was in so that we may
2237 // properly nest the callee.
2238 Instruction *CallSiteEHPad = nullptr;
2239 if (CallerPersonality) {
2240 EHPersonality Personality = classifyEHPersonality(Pers: CallerPersonality);
2241 if (isScopedEHPersonality(Pers: Personality)) {
2242 std::optional<OperandBundleUse> ParentFunclet =
2243 CB.getOperandBundle(ID: LLVMContext::OB_funclet);
2244 if (ParentFunclet)
2245 CallSiteEHPad = cast<FuncletPadInst>(Val: ParentFunclet->Inputs.front());
2246
2247 // OK, the inlining site is legal. What about the target function?
2248
2249 if (CallSiteEHPad) {
2250 if (Personality == EHPersonality::MSVC_CXX) {
2251 // The MSVC personality cannot tolerate catches getting inlined into
2252 // cleanup funclets.
2253 if (isa<CleanupPadInst>(Val: CallSiteEHPad)) {
2254 // Ok, the call site is within a cleanuppad. Let's check the callee
2255 // for catchpads.
2256 for (const BasicBlock &CalledBB : *CalledFunc) {
2257 if (isa<CatchSwitchInst>(Val: CalledBB.getFirstNonPHI()))
2258 return InlineResult::failure(Reason: "catch in cleanup funclet");
2259 }
2260 }
2261 } else if (isAsynchronousEHPersonality(Pers: Personality)) {
2262 // SEH is even less tolerant, there may not be any sort of exceptional
2263 // funclet in the callee.
2264 for (const BasicBlock &CalledBB : *CalledFunc) {
2265 if (CalledBB.isEHPad())
2266 return InlineResult::failure(Reason: "SEH in cleanup funclet");
2267 }
2268 }
2269 }
2270 }
2271 }
2272
2273 // Determine if we are dealing with a call in an EHPad which does not unwind
2274 // to caller.
2275 bool EHPadForCallUnwindsLocally = false;
2276 if (CallSiteEHPad && isa<CallInst>(Val: CB)) {
2277 UnwindDestMemoTy FuncletUnwindMap;
2278 Value *CallSiteUnwindDestToken =
2279 getUnwindDestToken(EHPad: CallSiteEHPad, MemoMap&: FuncletUnwindMap);
2280
2281 EHPadForCallUnwindsLocally =
2282 CallSiteUnwindDestToken &&
2283 !isa<ConstantTokenNone>(Val: CallSiteUnwindDestToken);
2284 }
2285
2286 // Get an iterator to the last basic block in the function, which will have
2287 // the new function inlined after it.
2288 Function::iterator LastBlock = --Caller->end();
2289
2290 // Make sure to capture all of the return instructions from the cloned
2291 // function.
2292 SmallVector<ReturnInst*, 8> Returns;
2293 ClonedCodeInfo InlinedFunctionInfo;
2294 Function::iterator FirstNewBlock;
2295
2296 { // Scope to destroy VMap after cloning.
2297 ValueToValueMapTy VMap;
2298 struct ByValInit {
2299 Value *Dst;
2300 Value *Src;
2301 Type *Ty;
2302 };
2303 // Keep a list of pair (dst, src) to emit byval initializations.
2304 SmallVector<ByValInit, 4> ByValInits;
2305
2306 // When inlining a function that contains noalias scope metadata,
2307 // this metadata needs to be cloned so that the inlined blocks
2308 // have different "unique scopes" at every call site.
2309 // Track the metadata that must be cloned. Do this before other changes to
2310 // the function, so that we do not get in trouble when inlining caller ==
2311 // callee.
2312 ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());
2313
2314 auto &DL = Caller->getDataLayout();
2315
2316 // Calculate the vector of arguments to pass into the function cloner, which
2317 // matches up the formal to the actual argument values.
2318 auto AI = CB.arg_begin();
2319 unsigned ArgNo = 0;
2320 for (Function::arg_iterator I = CalledFunc->arg_begin(),
2321 E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
2322 Value *ActualArg = *AI;
2323
2324 // When byval arguments actually inlined, we need to make the copy implied
2325 // by them explicit. However, we don't do this if the callee is readonly
2326 // or readnone, because the copy would be unneeded: the callee doesn't
2327 // modify the struct.
2328 if (CB.isByValArgument(ArgNo)) {
2329 ActualArg = HandleByValArgument(ByValType: CB.getParamByValType(ArgNo), Arg: ActualArg,
2330 TheCall: &CB, CalledFunc, IFI,
2331 ByValAlignment: CalledFunc->getParamAlign(ArgNo));
2332 if (ActualArg != *AI)
2333 ByValInits.push_back(
2334 Elt: {.Dst: ActualArg, .Src: (Value *)*AI, .Ty: CB.getParamByValType(ArgNo)});
2335 }
2336
2337 VMap[&*I] = ActualArg;
2338 }
2339
2340 // TODO: Remove this when users have been updated to the assume bundles.
2341 // Add alignment assumptions if necessary. We do this before the inlined
2342 // instructions are actually cloned into the caller so that we can easily
2343 // check what will be known at the start of the inlined code.
2344 AddAlignmentAssumptions(CB, IFI);
2345
2346 AssumptionCache *AC =
2347 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
2348
2349 /// Preserve all attributes on of the call and its parameters.
2350 salvageKnowledge(I: &CB, AC);
2351
2352 // We want the inliner to prune the code as it copies. We would LOVE to
2353 // have no dead or constant instructions leftover after inlining occurs
2354 // (which can happen, e.g., because an argument was constant), but we'll be
2355 // happy with whatever the cloner can do.
2356 CloneAndPruneFunctionInto(NewFunc: Caller, OldFunc: CalledFunc, VMap,
2357 /*ModuleLevelChanges=*/false, Returns, NameSuffix: ".i",
2358 CodeInfo: &InlinedFunctionInfo);
2359 // Remember the first block that is newly cloned over.
2360 FirstNewBlock = LastBlock; ++FirstNewBlock;
2361
2362 // Insert retainRV/clainRV runtime calls.
2363 objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(CB: &CB);
2364 if (RVCallKind != objcarc::ARCInstKind::None)
2365 inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns);
2366
2367 // Updated caller/callee profiles only when requested. For sample loader
2368 // inlining, the context-sensitive inlinee profile doesn't need to be
2369 // subtracted from callee profile, and the inlined clone also doesn't need
2370 // to be scaled based on call site count.
2371 if (IFI.UpdateProfile) {
2372 if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
2373 // Update the BFI of blocks cloned into the caller.
2374 updateCallerBFI(CallSiteBlock: OrigBB, VMap, CallerBFI: IFI.CallerBFI, CalleeBFI: IFI.CalleeBFI,
2375 CalleeEntryBlock: CalledFunc->front());
2376
2377 if (auto Profile = CalledFunc->getEntryCount())
2378 updateCallProfile(Callee: CalledFunc, VMap, CalleeEntryCount: *Profile, TheCall: CB, PSI: IFI.PSI,
2379 CallerBFI: IFI.CallerBFI);
2380 }
2381
2382 // Inject byval arguments initialization.
2383 for (ByValInit &Init : ByValInits)
2384 HandleByValArgumentInit(ByValType: Init.Ty, Dst: Init.Dst, Src: Init.Src, M: Caller->getParent(),
2385 InsertBlock: &*FirstNewBlock, IFI, CalledFunc);
2386
2387 std::optional<OperandBundleUse> ParentDeopt =
2388 CB.getOperandBundle(ID: LLVMContext::OB_deopt);
2389 if (ParentDeopt) {
2390 SmallVector<OperandBundleDef, 2> OpDefs;
2391
2392 for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
2393 CallBase *ICS = dyn_cast_or_null<CallBase>(Val&: VH);
2394 if (!ICS)
2395 continue; // instruction was DCE'd or RAUW'ed to undef
2396
2397 OpDefs.clear();
2398
2399 OpDefs.reserve(N: ICS->getNumOperandBundles());
2400
2401 for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe;
2402 ++COBi) {
2403 auto ChildOB = ICS->getOperandBundleAt(Index: COBi);
2404 if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
2405 // If the inlined call has other operand bundles, let them be
2406 OpDefs.emplace_back(Args&: ChildOB);
2407 continue;
2408 }
2409
2410 // It may be useful to separate this logic (of handling operand
2411 // bundles) out to a separate "policy" component if this gets crowded.
2412 // Prepend the parent's deoptimization continuation to the newly
2413 // inlined call's deoptimization continuation.
2414 std::vector<Value *> MergedDeoptArgs;
2415 MergedDeoptArgs.reserve(n: ParentDeopt->Inputs.size() +
2416 ChildOB.Inputs.size());
2417
2418 llvm::append_range(C&: MergedDeoptArgs, R&: ParentDeopt->Inputs);
2419 llvm::append_range(C&: MergedDeoptArgs, R&: ChildOB.Inputs);
2420
2421 OpDefs.emplace_back(Args: "deopt", Args: std::move(MergedDeoptArgs));
2422 }
2423
2424 Instruction *NewI = CallBase::Create(CB: ICS, Bundles: OpDefs, InsertPt: ICS->getIterator());
2425
2426 // Note: the RAUW does the appropriate fixup in VMap, so we need to do
2427 // this even if the call returns void.
2428 ICS->replaceAllUsesWith(V: NewI);
2429
2430 VH = nullptr;
2431 ICS->eraseFromParent();
2432 }
2433 }
2434
2435 // For 'nodebug' functions, the associated DISubprogram is always null.
2436 // Conservatively avoid propagating the callsite debug location to
2437 // instructions inlined from a function whose DISubprogram is not null.
2438 fixupLineNumbers(Fn: Caller, FI: FirstNewBlock, TheCall: &CB,
2439 CalleeHasDebugInfo: CalledFunc->getSubprogram() != nullptr);
2440
2441 if (isAssignmentTrackingEnabled(M: *Caller->getParent())) {
2442 // Interpret inlined stores to caller-local variables as assignments.
2443 trackInlinedStores(Start: FirstNewBlock, End: Caller->end(), CB);
2444
2445 // Update DIAssignID metadata attachments and uses so that they are
2446 // unique to this inlined instance.
2447 fixupAssignments(Start: FirstNewBlock, End: Caller->end());
2448 }
2449
2450 // Now clone the inlined noalias scope metadata.
2451 SAMetadataCloner.clone();
2452 SAMetadataCloner.remap(FStart: FirstNewBlock, FEnd: Caller->end());
2453
2454 // Add noalias metadata if necessary.
2455 AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);
2456
2457 // Clone return attributes on the callsite into the calls within the inlined
2458 // function which feed into its return value.
2459 AddReturnAttributes(CB, VMap);
2460
2461 // Clone attributes on the params of the callsite to calls within the
2462 // inlined function which use the same param.
2463 AddParamAndFnBasicAttributes(CB, VMap);
2464
2465 propagateMemProfMetadata(Callee: CalledFunc, CB,
2466 ContainsMemProfMetadata: InlinedFunctionInfo.ContainsMemProfMetadata, VMap);
2467
2468 // Propagate metadata on the callsite if necessary.
2469 PropagateCallSiteMetadata(CB, FStart: FirstNewBlock, FEnd: Caller->end());
2470
2471 // Register any cloned assumptions.
2472 if (IFI.GetAssumptionCache)
2473 for (BasicBlock &NewBlock :
2474 make_range(x: FirstNewBlock->getIterator(), y: Caller->end()))
2475 for (Instruction &I : NewBlock)
2476 if (auto *II = dyn_cast<AssumeInst>(Val: &I))
2477 IFI.GetAssumptionCache(*Caller).registerAssumption(CI: II);
2478 }
2479
2480 if (ConvergenceControlToken) {
2481 auto *I = FirstNewBlock->getFirstNonPHI();
2482 if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(Val: I)) {
2483 if (IntrinsicCall->getIntrinsicID() ==
2484 Intrinsic::experimental_convergence_entry) {
2485 IntrinsicCall->replaceAllUsesWith(V: ConvergenceControlToken);
2486 IntrinsicCall->eraseFromParent();
2487 }
2488 }
2489 }
2490
2491 // If there are any alloca instructions in the block that used to be the entry
2492 // block for the callee, move them to the entry block of the caller. First
2493 // calculate which instruction they should be inserted before. We insert the
2494 // instructions at the end of the current alloca list.
2495 {
2496 BasicBlock::iterator InsertPoint = Caller->begin()->begin();
2497 for (BasicBlock::iterator I = FirstNewBlock->begin(),
2498 E = FirstNewBlock->end(); I != E; ) {
2499 AllocaInst *AI = dyn_cast<AllocaInst>(Val: I++);
2500 if (!AI) continue;
2501
2502 // If the alloca is now dead, remove it. This often occurs due to code
2503 // specialization.
2504 if (AI->use_empty()) {
2505 AI->eraseFromParent();
2506 continue;
2507 }
2508
2509 if (!allocaWouldBeStaticInEntry(AI))
2510 continue;
2511
2512 // Keep track of the static allocas that we inline into the caller.
2513 IFI.StaticAllocas.push_back(Elt: AI);
2514
2515 // Scan for the block of allocas that we can move over, and move them
2516 // all at once.
2517 while (isa<AllocaInst>(Val: I) &&
2518 !cast<AllocaInst>(Val&: I)->use_empty() &&
2519 allocaWouldBeStaticInEntry(AI: cast<AllocaInst>(Val&: I))) {
2520 IFI.StaticAllocas.push_back(Elt: cast<AllocaInst>(Val&: I));
2521 ++I;
2522 }
2523
2524 // Transfer all of the allocas over in a block. Using splice means
2525 // that the instructions aren't removed from the symbol table, then
2526 // reinserted.
2527 I.setTailBit(true);
2528 Caller->getEntryBlock().splice(ToIt: InsertPoint, FromBB: &*FirstNewBlock,
2529 FromBeginIt: AI->getIterator(), FromEndIt: I);
2530 }
2531 }
2532
2533 SmallVector<Value*,4> VarArgsToForward;
2534 SmallVector<AttributeSet, 4> VarArgsAttrs;
2535 for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
2536 i < CB.arg_size(); i++) {
2537 VarArgsToForward.push_back(Elt: CB.getArgOperand(i));
2538 VarArgsAttrs.push_back(Elt: CB.getAttributes().getParamAttrs(ArgNo: i));
2539 }
2540
2541 bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
2542 if (InlinedFunctionInfo.ContainsCalls) {
2543 CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
2544 if (CallInst *CI = dyn_cast<CallInst>(Val: &CB))
2545 CallSiteTailKind = CI->getTailCallKind();
2546
2547 // For inlining purposes, the "notail" marker is the same as no marker.
2548 if (CallSiteTailKind == CallInst::TCK_NoTail)
2549 CallSiteTailKind = CallInst::TCK_None;
2550
2551 for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
2552 ++BB) {
2553 for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
2554 CallInst *CI = dyn_cast<CallInst>(Val: &I);
2555 if (!CI)
2556 continue;
2557
2558 // Forward varargs from inlined call site to calls to the
2559 // ForwardVarArgsTo function, if requested, and to musttail calls.
2560 if (!VarArgsToForward.empty() &&
2561 ((ForwardVarArgsTo &&
2562 CI->getCalledFunction() == ForwardVarArgsTo) ||
2563 CI->isMustTailCall())) {
2564 // Collect attributes for non-vararg parameters.
2565 AttributeList Attrs = CI->getAttributes();
2566 SmallVector<AttributeSet, 8> ArgAttrs;
2567 if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
2568 for (unsigned ArgNo = 0;
2569 ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
2570 ArgAttrs.push_back(Elt: Attrs.getParamAttrs(ArgNo));
2571 }
2572
2573 // Add VarArg attributes.
2574 ArgAttrs.append(in_start: VarArgsAttrs.begin(), in_end: VarArgsAttrs.end());
2575 Attrs = AttributeList::get(C&: CI->getContext(), FnAttrs: Attrs.getFnAttrs(),
2576 RetAttrs: Attrs.getRetAttrs(), ArgAttrs);
2577 // Add VarArgs to existing parameters.
2578 SmallVector<Value *, 6> Params(CI->args());
2579 Params.append(in_start: VarArgsToForward.begin(), in_end: VarArgsToForward.end());
2580 CallInst *NewCI = CallInst::Create(
2581 Ty: CI->getFunctionType(), Func: CI->getCalledOperand(), Args: Params, NameStr: "", InsertBefore: CI->getIterator());
2582 NewCI->setDebugLoc(CI->getDebugLoc());
2583 NewCI->setAttributes(Attrs);
2584 NewCI->setCallingConv(CI->getCallingConv());
2585 CI->replaceAllUsesWith(V: NewCI);
2586 CI->eraseFromParent();
2587 CI = NewCI;
2588 }
2589
2590 if (Function *F = CI->getCalledFunction())
2591 InlinedDeoptimizeCalls |=
2592 F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
2593
2594 // We need to reduce the strength of any inlined tail calls. For
2595 // musttail, we have to avoid introducing potential unbounded stack
2596 // growth. For example, if functions 'f' and 'g' are mutually recursive
2597 // with musttail, we can inline 'g' into 'f' so long as we preserve
2598 // musttail on the cloned call to 'f'. If either the inlined call site
2599 // or the cloned call site is *not* musttail, the program already has
2600 // one frame of stack growth, so it's safe to remove musttail. Here is
2601 // a table of example transformations:
2602 //
2603 // f -> musttail g -> musttail f ==> f -> musttail f
2604 // f -> musttail g -> tail f ==> f -> tail f
2605 // f -> g -> musttail f ==> f -> f
2606 // f -> g -> tail f ==> f -> f
2607 //
2608 // Inlined notail calls should remain notail calls.
2609 CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
2610 if (ChildTCK != CallInst::TCK_NoTail)
2611 ChildTCK = std::min(a: CallSiteTailKind, b: ChildTCK);
2612 CI->setTailCallKind(ChildTCK);
2613 InlinedMustTailCalls |= CI->isMustTailCall();
2614
2615 // Call sites inlined through a 'nounwind' call site should be
2616 // 'nounwind' as well. However, avoid marking call sites explicitly
2617 // where possible. This helps expose more opportunities for CSE after
2618 // inlining, commonly when the callee is an intrinsic.
2619 if (MarkNoUnwind && !CI->doesNotThrow())
2620 CI->setDoesNotThrow();
2621 }
2622 }
2623 }
2624
2625 // Leave lifetime markers for the static alloca's, scoping them to the
2626 // function we just inlined.
2627 // We need to insert lifetime intrinsics even at O0 to avoid invalid
2628 // access caused by multithreaded coroutines. The check
2629 // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.
2630 if ((InsertLifetime || Caller->isPresplitCoroutine()) &&
2631 !IFI.StaticAllocas.empty()) {
2632 IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin());
2633 for (AllocaInst *AI : IFI.StaticAllocas) {
2634 // Don't mark swifterror allocas. They can't have bitcast uses.
2635 if (AI->isSwiftError())
2636 continue;
2637
2638 // If the alloca is already scoped to something smaller than the whole
2639 // function then there's no need to add redundant, less accurate markers.
2640 if (hasLifetimeMarkers(AI))
2641 continue;
2642
2643 // Try to determine the size of the allocation.
2644 ConstantInt *AllocaSize = nullptr;
2645 if (ConstantInt *AIArraySize =
2646 dyn_cast<ConstantInt>(Val: AI->getArraySize())) {
2647 auto &DL = Caller->getDataLayout();
2648 Type *AllocaType = AI->getAllocatedType();
2649 TypeSize AllocaTypeSize = DL.getTypeAllocSize(Ty: AllocaType);
2650 uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
2651
2652 // Don't add markers for zero-sized allocas.
2653 if (AllocaArraySize == 0)
2654 continue;
2655
2656 // Check that array size doesn't saturate uint64_t and doesn't
2657 // overflow when it's multiplied by type size.
2658 if (!AllocaTypeSize.isScalable() &&
2659 AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
2660 std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
2661 AllocaTypeSize.getFixedValue()) {
2662 AllocaSize = ConstantInt::get(Ty: Type::getInt64Ty(C&: AI->getContext()),
2663 V: AllocaArraySize * AllocaTypeSize);
2664 }
2665 }
2666
2667 builder.CreateLifetimeStart(Ptr: AI, Size: AllocaSize);
2668 for (ReturnInst *RI : Returns) {
2669 // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
2670 // call and a return. The return kills all local allocas.
2671 if (InlinedMustTailCalls &&
2672 RI->getParent()->getTerminatingMustTailCall())
2673 continue;
2674 if (InlinedDeoptimizeCalls &&
2675 RI->getParent()->getTerminatingDeoptimizeCall())
2676 continue;
2677 IRBuilder<>(RI).CreateLifetimeEnd(Ptr: AI, Size: AllocaSize);
2678 }
2679 }
2680 }
2681
2682 // If the inlined code contained dynamic alloca instructions, wrap the inlined
2683 // code with llvm.stacksave/llvm.stackrestore intrinsics.
2684 if (InlinedFunctionInfo.ContainsDynamicAllocas) {
2685 // Insert the llvm.stacksave.
2686 CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
2687 .CreateStackSave(Name: "savedstack");
2688
2689 // Insert a call to llvm.stackrestore before any return instructions in the
2690 // inlined function.
2691 for (ReturnInst *RI : Returns) {
2692 // Don't insert llvm.stackrestore calls between a musttail or deoptimize
2693 // call and a return. The return will restore the stack pointer.
2694 if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
2695 continue;
2696 if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
2697 continue;
2698 IRBuilder<>(RI).CreateStackRestore(Ptr: SavedPtr);
2699 }
2700 }
2701
2702 // If we are inlining for an invoke instruction, we must make sure to rewrite
2703 // any call instructions into invoke instructions. This is sensitive to which
2704 // funclet pads were top-level in the inlinee, so must be done before
2705 // rewriting the "parent pad" links.
2706 if (auto *II = dyn_cast<InvokeInst>(Val: &CB)) {
2707 BasicBlock *UnwindDest = II->getUnwindDest();
2708 Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
2709 if (isa<LandingPadInst>(Val: FirstNonPHI)) {
2710 HandleInlinedLandingPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
2711 } else {
2712 HandleInlinedEHPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
2713 }
2714 }
2715
2716 // Update the lexical scopes of the new funclets and callsites.
2717 // Anything that had 'none' as its parent is now nested inside the callsite's
2718 // EHPad.
2719 if (CallSiteEHPad) {
2720 for (Function::iterator BB = FirstNewBlock->getIterator(),
2721 E = Caller->end();
2722 BB != E; ++BB) {
2723 // Add bundle operands to inlined call sites.
2724 PropagateOperandBundles(InlinedBB: BB, CallSiteEHPad);
2725
2726 // It is problematic if the inlinee has a cleanupret which unwinds to
2727 // caller and we inline it into a call site which doesn't unwind but into
2728 // an EH pad that does. Such an edge must be dynamically unreachable.
2729 // As such, we replace the cleanupret with unreachable.
2730 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator()))
2731 if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
2732 changeToUnreachable(I: CleanupRet);
2733
2734 Instruction *I = BB->getFirstNonPHI();
2735 if (!I->isEHPad())
2736 continue;
2737
2738 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: I)) {
2739 if (isa<ConstantTokenNone>(Val: CatchSwitch->getParentPad()))
2740 CatchSwitch->setParentPad(CallSiteEHPad);
2741 } else {
2742 auto *FPI = cast<FuncletPadInst>(Val: I);
2743 if (isa<ConstantTokenNone>(Val: FPI->getParentPad()))
2744 FPI->setParentPad(CallSiteEHPad);
2745 }
2746 }
2747 }
2748
2749 if (InlinedDeoptimizeCalls) {
2750 // We need to at least remove the deoptimizing returns from the Return set,
2751 // so that the control flow from those returns does not get merged into the
2752 // caller (but terminate it instead). If the caller's return type does not
2753 // match the callee's return type, we also need to change the return type of
2754 // the intrinsic.
2755 if (Caller->getReturnType() == CB.getType()) {
2756 llvm::erase_if(C&: Returns, P: [](ReturnInst *RI) {
2757 return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
2758 });
2759 } else {
2760 SmallVector<ReturnInst *, 8> NormalReturns;
2761 Function *NewDeoptIntrinsic = Intrinsic::getDeclaration(
2762 M: Caller->getParent(), id: Intrinsic::experimental_deoptimize,
2763 Tys: {Caller->getReturnType()});
2764
2765 for (ReturnInst *RI : Returns) {
2766 CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
2767 if (!DeoptCall) {
2768 NormalReturns.push_back(Elt: RI);
2769 continue;
2770 }
2771
2772 // The calling convention on the deoptimize call itself may be bogus,
2773 // since the code we're inlining may have undefined behavior (and may
2774 // never actually execute at runtime); but all
2775 // @llvm.experimental.deoptimize declarations have to have the same
2776 // calling convention in a well-formed module.
2777 auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
2778 NewDeoptIntrinsic->setCallingConv(CallingConv);
2779 auto *CurBB = RI->getParent();
2780 RI->eraseFromParent();
2781
2782 SmallVector<Value *, 4> CallArgs(DeoptCall->args());
2783
2784 SmallVector<OperandBundleDef, 1> OpBundles;
2785 DeoptCall->getOperandBundlesAsDefs(Defs&: OpBundles);
2786 auto DeoptAttributes = DeoptCall->getAttributes();
2787 DeoptCall->eraseFromParent();
2788 assert(!OpBundles.empty() &&
2789 "Expected at least the deopt operand bundle");
2790
2791 IRBuilder<> Builder(CurBB);
2792 CallInst *NewDeoptCall =
2793 Builder.CreateCall(Callee: NewDeoptIntrinsic, Args: CallArgs, OpBundles);
2794 NewDeoptCall->setCallingConv(CallingConv);
2795 NewDeoptCall->setAttributes(DeoptAttributes);
2796 if (NewDeoptCall->getType()->isVoidTy())
2797 Builder.CreateRetVoid();
2798 else
2799 Builder.CreateRet(V: NewDeoptCall);
2800 // Since the ret type is changed, remove the incompatible attributes.
2801 NewDeoptCall->removeRetAttrs(
2802 AttrsToRemove: AttributeFuncs::typeIncompatible(Ty: NewDeoptCall->getType()));
2803 }
2804
2805 // Leave behind the normal returns so we can merge control flow.
2806 std::swap(LHS&: Returns, RHS&: NormalReturns);
2807 }
2808 }
2809
2810 // Handle any inlined musttail call sites. In order for a new call site to be
2811 // musttail, the source of the clone and the inlined call site must have been
2812 // musttail. Therefore it's safe to return without merging control into the
2813 // phi below.
2814 if (InlinedMustTailCalls) {
2815 // Check if we need to bitcast the result of any musttail calls.
2816 Type *NewRetTy = Caller->getReturnType();
2817 bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy;
2818
2819 // Handle the returns preceded by musttail calls separately.
2820 SmallVector<ReturnInst *, 8> NormalReturns;
2821 for (ReturnInst *RI : Returns) {
2822 CallInst *ReturnedMustTail =
2823 RI->getParent()->getTerminatingMustTailCall();
2824 if (!ReturnedMustTail) {
2825 NormalReturns.push_back(Elt: RI);
2826 continue;
2827 }
2828 if (!NeedBitCast)
2829 continue;
2830
2831 // Delete the old return and any preceding bitcast.
2832 BasicBlock *CurBB = RI->getParent();
2833 auto *OldCast = dyn_cast_or_null<BitCastInst>(Val: RI->getReturnValue());
2834 RI->eraseFromParent();
2835 if (OldCast)
2836 OldCast->eraseFromParent();
2837
2838 // Insert a new bitcast and return with the right type.
2839 IRBuilder<> Builder(CurBB);
2840 Builder.CreateRet(V: Builder.CreateBitCast(V: ReturnedMustTail, DestTy: NewRetTy));
2841 }
2842
2843 // Leave behind the normal returns so we can merge control flow.
2844 std::swap(LHS&: Returns, RHS&: NormalReturns);
2845 }
2846
2847 // Now that all of the transforms on the inlined code have taken place but
2848 // before we splice the inlined code into the CFG and lose track of which
2849 // blocks were actually inlined, collect the call sites. We only do this if
2850 // call graph updates weren't requested, as those provide value handle based
2851 // tracking of inlined call sites instead. Calls to intrinsics are not
2852 // collected because they are not inlineable.
2853 if (InlinedFunctionInfo.ContainsCalls) {
2854 // Otherwise just collect the raw call sites that were inlined.
2855 for (BasicBlock &NewBB :
2856 make_range(x: FirstNewBlock->getIterator(), y: Caller->end()))
2857 for (Instruction &I : NewBB)
2858 if (auto *CB = dyn_cast<CallBase>(Val: &I))
2859 if (!(CB->getCalledFunction() &&
2860 CB->getCalledFunction()->isIntrinsic()))
2861 IFI.InlinedCallSites.push_back(Elt: CB);
2862 }
2863
2864 // If we cloned in _exactly one_ basic block, and if that block ends in a
2865 // return instruction, we splice the body of the inlined callee directly into
2866 // the calling basic block.
2867 if (Returns.size() == 1 && std::distance(first: FirstNewBlock, last: Caller->end()) == 1) {
2868 // Move all of the instructions right before the call.
2869 OrigBB->splice(ToIt: CB.getIterator(), FromBB: &*FirstNewBlock, FromBeginIt: FirstNewBlock->begin(),
2870 FromEndIt: FirstNewBlock->end());
2871 // Remove the cloned basic block.
2872 Caller->back().eraseFromParent();
2873
2874 // If the call site was an invoke instruction, add a branch to the normal
2875 // destination.
2876 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
2877 BranchInst *NewBr = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator());
2878 NewBr->setDebugLoc(Returns[0]->getDebugLoc());
2879 }
2880
2881 // If the return instruction returned a value, replace uses of the call with
2882 // uses of the returned value.
2883 if (!CB.use_empty()) {
2884 ReturnInst *R = Returns[0];
2885 if (&CB == R->getReturnValue())
2886 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
2887 else
2888 CB.replaceAllUsesWith(V: R->getReturnValue());
2889 }
2890 // Since we are now done with the Call/Invoke, we can delete it.
2891 CB.eraseFromParent();
2892
2893 // Since we are now done with the return instruction, delete it also.
2894 Returns[0]->eraseFromParent();
2895
2896 if (MergeAttributes)
2897 AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc);
2898
2899 // We are now done with the inlining.
2900 return InlineResult::success();
2901 }
2902
2903 // Otherwise, we have the normal case, of more than one block to inline or
2904 // multiple return sites.
2905
2906 // We want to clone the entire callee function into the hole between the
2907 // "starter" and "ender" blocks. How we accomplish this depends on whether
2908 // this is an invoke instruction or a call instruction.
2909 BasicBlock *AfterCallBB;
2910 BranchInst *CreatedBranchToNormalDest = nullptr;
2911 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
2912
2913 // Add an unconditional branch to make this look like the CallInst case...
2914 CreatedBranchToNormalDest = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator());
2915
2916 // Split the basic block. This guarantees that no PHI nodes will have to be
2917 // updated due to new incoming edges, and make the invoke case more
2918 // symmetric to the call case.
2919 AfterCallBB =
2920 OrigBB->splitBasicBlock(I: CreatedBranchToNormalDest->getIterator(),
2921 BBName: CalledFunc->getName() + ".exit");
2922
2923 } else { // It's a call
2924 // If this is a call instruction, we need to split the basic block that
2925 // the call lives in.
2926 //
2927 AfterCallBB = OrigBB->splitBasicBlock(I: CB.getIterator(),
2928 BBName: CalledFunc->getName() + ".exit");
2929 }
2930
2931 if (IFI.CallerBFI) {
2932 // Copy original BB's block frequency to AfterCallBB
2933 IFI.CallerBFI->setBlockFreq(BB: AfterCallBB,
2934 Freq: IFI.CallerBFI->getBlockFreq(BB: OrigBB));
2935 }
2936
2937 // Change the branch that used to go to AfterCallBB to branch to the first
2938 // basic block of the inlined function.
2939 //
2940 Instruction *Br = OrigBB->getTerminator();
2941 assert(Br && Br->getOpcode() == Instruction::Br &&
2942 "splitBasicBlock broken!");
2943 Br->setOperand(i: 0, Val: &*FirstNewBlock);
2944
2945 // Now that the function is correct, make it a little bit nicer. In
2946 // particular, move the basic blocks inserted from the end of the function
2947 // into the space made by splitting the source basic block.
2948 Caller->splice(ToIt: AfterCallBB->getIterator(), FromF: Caller, FromBeginIt: FirstNewBlock,
2949 FromEndIt: Caller->end());
2950
2951 // Handle all of the return instructions that we just cloned in, and eliminate
2952 // any users of the original call/invoke instruction.
2953 Type *RTy = CalledFunc->getReturnType();
2954
2955 PHINode *PHI = nullptr;
2956 if (Returns.size() > 1) {
2957 // The PHI node should go at the front of the new basic block to merge all
2958 // possible incoming values.
2959 if (!CB.use_empty()) {
2960 PHI = PHINode::Create(Ty: RTy, NumReservedValues: Returns.size(), NameStr: CB.getName());
2961 PHI->insertBefore(InsertPos: AfterCallBB->begin());
2962 // Anything that used the result of the function call should now use the
2963 // PHI node as their operand.
2964 CB.replaceAllUsesWith(V: PHI);
2965 }
2966
2967 // Loop over all of the return instructions adding entries to the PHI node
2968 // as appropriate.
2969 if (PHI) {
2970 for (ReturnInst *RI : Returns) {
2971 assert(RI->getReturnValue()->getType() == PHI->getType() &&
2972 "Ret value not consistent in function!");
2973 PHI->addIncoming(V: RI->getReturnValue(), BB: RI->getParent());
2974 }
2975 }
2976
2977 // Add a branch to the merge points and remove return instructions.
2978 DebugLoc Loc;
2979 for (ReturnInst *RI : Returns) {
2980 BranchInst *BI = BranchInst::Create(IfTrue: AfterCallBB, InsertBefore: RI->getIterator());
2981 Loc = RI->getDebugLoc();
2982 BI->setDebugLoc(Loc);
2983 RI->eraseFromParent();
2984 }
2985 // We need to set the debug location to *somewhere* inside the
2986 // inlined function. The line number may be nonsensical, but the
2987 // instruction will at least be associated with the right
2988 // function.
2989 if (CreatedBranchToNormalDest)
2990 CreatedBranchToNormalDest->setDebugLoc(Loc);
2991 } else if (!Returns.empty()) {
2992 // Otherwise, if there is exactly one return value, just replace anything
2993 // using the return value of the call with the computed value.
2994 if (!CB.use_empty()) {
2995 if (&CB == Returns[0]->getReturnValue())
2996 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
2997 else
2998 CB.replaceAllUsesWith(V: Returns[0]->getReturnValue());
2999 }
3000
3001 // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
3002 BasicBlock *ReturnBB = Returns[0]->getParent();
3003 ReturnBB->replaceAllUsesWith(V: AfterCallBB);
3004
3005 // Splice the code from the return block into the block that it will return
3006 // to, which contains the code that was after the call.
3007 AfterCallBB->splice(ToIt: AfterCallBB->begin(), FromBB: ReturnBB);
3008
3009 if (CreatedBranchToNormalDest)
3010 CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
3011
3012 // Delete the return instruction now and empty ReturnBB now.
3013 Returns[0]->eraseFromParent();
3014 ReturnBB->eraseFromParent();
3015 } else if (!CB.use_empty()) {
3016 // No returns, but something is using the return value of the call. Just
3017 // nuke the result.
3018 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3019 }
3020
3021 // Since we are now done with the Call/Invoke, we can delete it.
3022 CB.eraseFromParent();
3023
3024 // If we inlined any musttail calls and the original return is now
3025 // unreachable, delete it. It can only contain a bitcast and ret.
3026 if (InlinedMustTailCalls && pred_empty(BB: AfterCallBB))
3027 AfterCallBB->eraseFromParent();
3028
3029 // We should always be able to fold the entry block of the function into the
3030 // single predecessor of the block...
3031 assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
3032 BasicBlock *CalleeEntry = cast<BranchInst>(Val: Br)->getSuccessor(i: 0);
3033
3034 // Splice the code entry block into calling block, right before the
3035 // unconditional branch.
3036 CalleeEntry->replaceAllUsesWith(V: OrigBB); // Update PHI nodes
3037 OrigBB->splice(ToIt: Br->getIterator(), FromBB: CalleeEntry);
3038
3039 // Remove the unconditional branch.
3040 Br->eraseFromParent();
3041
3042 // Now we can remove the CalleeEntry block, which is now empty.
3043 CalleeEntry->eraseFromParent();
3044
3045 // If we inserted a phi node, check to see if it has a single value (e.g. all
3046 // the entries are the same or undef). If so, remove the PHI so it doesn't
3047 // block other optimizations.
3048 if (PHI) {
3049 AssumptionCache *AC =
3050 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
3051 auto &DL = Caller->getDataLayout();
3052 if (Value *V = simplifyInstruction(I: PHI, Q: {DL, nullptr, nullptr, AC})) {
3053 PHI->replaceAllUsesWith(V);
3054 PHI->eraseFromParent();
3055 }
3056 }
3057
3058 if (MergeAttributes)
3059 AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc);
3060
3061 return InlineResult::success();
3062}
3063