1//===- InlineFunction.cpp - Code to perform function inlining -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements inlining of a function into a call site, resolving
10// parameters and the return value as appropriate.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/SetVector.h"
17#include "llvm/ADT/SmallPtrSet.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/ADT/iterator_range.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Analysis/AssumptionCache.h"
23#include "llvm/Analysis/BlockFrequencyInfo.h"
24#include "llvm/Analysis/CallGraph.h"
25#include "llvm/Analysis/CaptureTracking.h"
26#include "llvm/Analysis/CtxProfAnalysis.h"
27#include "llvm/Analysis/IndirectCallVisitor.h"
28#include "llvm/Analysis/InstructionSimplify.h"
29#include "llvm/Analysis/MemoryProfileInfo.h"
30#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
31#include "llvm/Analysis/ObjCARCUtil.h"
32#include "llvm/Analysis/ProfileSummaryInfo.h"
33#include "llvm/Analysis/ValueTracking.h"
34#include "llvm/Analysis/VectorUtils.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/AttributeMask.h"
37#include "llvm/IR/Attributes.h"
38#include "llvm/IR/BasicBlock.h"
39#include "llvm/IR/CFG.h"
40#include "llvm/IR/Constant.h"
41#include "llvm/IR/ConstantRange.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/DataLayout.h"
44#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/DebugInfoMetadata.h"
46#include "llvm/IR/DebugLoc.h"
47#include "llvm/IR/DerivedTypes.h"
48#include "llvm/IR/Dominators.h"
49#include "llvm/IR/EHPersonalities.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalVariable.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InlineAsm.h"
54#include "llvm/IR/InstrTypes.h"
55#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Instructions.h"
57#include "llvm/IR/IntrinsicInst.h"
58#include "llvm/IR/Intrinsics.h"
59#include "llvm/IR/LLVMContext.h"
60#include "llvm/IR/MDBuilder.h"
61#include "llvm/IR/Metadata.h"
62#include "llvm/IR/Module.h"
63#include "llvm/IR/PatternMatch.h"
64#include "llvm/IR/ProfDataUtils.h"
65#include "llvm/IR/Type.h"
66#include "llvm/IR/User.h"
67#include "llvm/IR/Value.h"
68#include "llvm/Support/Casting.h"
69#include "llvm/Support/CommandLine.h"
70#include "llvm/Support/ErrorHandling.h"
71#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
72#include "llvm/Transforms/Utils/Cloning.h"
73#include "llvm/Transforms/Utils/Local.h"
74#include "llvm/Transforms/Utils/ValueMapper.h"
75#include <algorithm>
76#include <cassert>
77#include <cstdint>
78#include <deque>
79#include <iterator>
80#include <optional>
81#include <string>
82#include <utility>
83#include <vector>
84
85#define DEBUG_TYPE "inline-function"
86
87using namespace llvm;
88using namespace llvm::memprof;
89
90static cl::opt<bool>
91EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(Val: true),
92 cl::Hidden,
93 cl::desc("Convert noalias attributes to metadata during inlining."));
94
95static cl::opt<bool>
96 UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
97 cl::init(Val: true),
98 cl::desc("Use the llvm.experimental.noalias.scope.decl "
99 "intrinsic during inlining."));
100
101// Disabled by default, because the added alignment assumptions may increase
102// compile-time and block optimizations. This option is not suitable for use
103// with frontends that emit comprehensive parameter alignment annotations.
104static cl::opt<bool>
105PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
106 cl::init(Val: false), cl::Hidden,
107 cl::desc("Convert align attributes to assumptions during inlining."));
108
109static cl::opt<unsigned> InlinerAttributeWindow(
110 "max-inst-checked-for-throw-during-inlining", cl::Hidden,
111 cl::desc("the maximum number of instructions analyzed for may throw during "
112 "attribute inference in inlined body"),
113 cl::init(Val: 4));
114
115namespace {
116
117 /// A class for recording information about inlining a landing pad.
118 class LandingPadInliningInfo {
119 /// Destination of the invoke's unwind.
120 BasicBlock *OuterResumeDest;
121
122 /// Destination for the callee's resume.
123 BasicBlock *InnerResumeDest = nullptr;
124
125 /// LandingPadInst associated with the invoke.
126 LandingPadInst *CallerLPad = nullptr;
127
128 /// PHI for EH values from landingpad insts.
129 PHINode *InnerEHValuesPHI = nullptr;
130
131 SmallVector<Value*, 8> UnwindDestPHIValues;
132
133 public:
134 LandingPadInliningInfo(InvokeInst *II)
135 : OuterResumeDest(II->getUnwindDest()) {
136 // If there are PHI nodes in the unwind destination block, we need to keep
137 // track of which values came into them from the invoke before removing
138 // the edge from this block.
139 BasicBlock *InvokeBB = II->getParent();
140 BasicBlock::iterator I = OuterResumeDest->begin();
141 for (; isa<PHINode>(Val: I); ++I) {
142 // Save the value to use for this edge.
143 PHINode *PHI = cast<PHINode>(Val&: I);
144 UnwindDestPHIValues.push_back(Elt: PHI->getIncomingValueForBlock(BB: InvokeBB));
145 }
146
147 CallerLPad = cast<LandingPadInst>(Val&: I);
148 }
149
150 /// The outer unwind destination is the target of
151 /// unwind edges introduced for calls within the inlined function.
152 BasicBlock *getOuterResumeDest() const {
153 return OuterResumeDest;
154 }
155
156 BasicBlock *getInnerResumeDest();
157
158 LandingPadInst *getLandingPadInst() const { return CallerLPad; }
159
160 /// Forward the 'resume' instruction to the caller's landing pad block.
161 /// When the landing pad block has only one predecessor, this is
162 /// a simple branch. When there is more than one predecessor, we need to
163 /// split the landing pad block after the landingpad instruction and jump
164 /// to there.
165 void forwardResume(ResumeInst *RI,
166 SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
167
168 /// Add incoming-PHI values to the unwind destination block for the given
169 /// basic block, using the values for the original invoke's source block.
170 void addIncomingPHIValuesFor(BasicBlock *BB) const {
171 addIncomingPHIValuesForInto(src: BB, dest: OuterResumeDest);
172 }
173
174 void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
175 BasicBlock::iterator I = dest->begin();
176 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
177 PHINode *phi = cast<PHINode>(Val&: I);
178 phi->addIncoming(V: UnwindDestPHIValues[i], BB: src);
179 }
180 }
181 };
182} // end anonymous namespace
183
184static IntrinsicInst *getConvergenceEntry(BasicBlock &BB) {
185 BasicBlock::iterator It = BB.getFirstNonPHIIt();
186 while (It != BB.end()) {
187 if (auto *IntrinsicCall = dyn_cast<ConvergenceControlInst>(Val&: It)) {
188 if (IntrinsicCall->isEntry()) {
189 return IntrinsicCall;
190 }
191 }
192 It = std::next(x: It);
193 }
194 return nullptr;
195}
196
197/// Get or create a target for the branch from ResumeInsts.
198BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
199 if (InnerResumeDest) return InnerResumeDest;
200
201 // Split the landing pad.
202 BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
203 InnerResumeDest =
204 OuterResumeDest->splitBasicBlock(I: SplitPoint,
205 BBName: OuterResumeDest->getName() + ".body");
206
207 // The number of incoming edges we expect to the inner landing pad.
208 const unsigned PHICapacity = 2;
209
210 // Create corresponding new PHIs for all the PHIs in the outer landing pad.
211 BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
212 BasicBlock::iterator I = OuterResumeDest->begin();
213 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
214 PHINode *OuterPHI = cast<PHINode>(Val&: I);
215 PHINode *InnerPHI = PHINode::Create(Ty: OuterPHI->getType(), NumReservedValues: PHICapacity,
216 NameStr: OuterPHI->getName() + ".lpad-body");
217 InnerPHI->insertBefore(InsertPos: InsertPoint);
218 OuterPHI->replaceAllUsesWith(V: InnerPHI);
219 InnerPHI->addIncoming(V: OuterPHI, BB: OuterResumeDest);
220 }
221
222 // Create a PHI for the exception values.
223 InnerEHValuesPHI =
224 PHINode::Create(Ty: CallerLPad->getType(), NumReservedValues: PHICapacity, NameStr: "eh.lpad-body");
225 InnerEHValuesPHI->insertBefore(InsertPos: InsertPoint);
226 CallerLPad->replaceAllUsesWith(V: InnerEHValuesPHI);
227 InnerEHValuesPHI->addIncoming(V: CallerLPad, BB: OuterResumeDest);
228
229 // All done.
230 return InnerResumeDest;
231}
232
233/// Forward the 'resume' instruction to the caller's landing pad block.
234/// When the landing pad block has only one predecessor, this is a simple
235/// branch. When there is more than one predecessor, we need to split the
236/// landing pad block after the landingpad instruction and jump to there.
237void LandingPadInliningInfo::forwardResume(
238 ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
239 BasicBlock *Dest = getInnerResumeDest();
240 BasicBlock *Src = RI->getParent();
241
242 auto *BI = UncondBrInst::Create(Target: Dest, InsertBefore: Src);
243 BI->setDebugLoc(RI->getDebugLoc());
244
245 // Update the PHIs in the destination. They were inserted in an order which
246 // makes this work.
247 addIncomingPHIValuesForInto(src: Src, dest: Dest);
248
249 InnerEHValuesPHI->addIncoming(V: RI->getOperand(i_nocapture: 0), BB: Src);
250 RI->eraseFromParent();
251}
252
253/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
254static Value *getParentPad(Value *EHPad) {
255 if (auto *FPI = dyn_cast<FuncletPadInst>(Val: EHPad))
256 return FPI->getParentPad();
257 return cast<CatchSwitchInst>(Val: EHPad)->getParentPad();
258}
259
260using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;
261
262/// Helper for getUnwindDestToken that does the descendant-ward part of
263/// the search.
264static Value *getUnwindDestTokenHelper(Instruction *EHPad,
265 UnwindDestMemoTy &MemoMap) {
266 SmallVector<Instruction *, 8> Worklist(1, EHPad);
267
268 while (!Worklist.empty()) {
269 Instruction *CurrentPad = Worklist.pop_back_val();
270 // We only put pads on the worklist that aren't in the MemoMap. When
271 // we find an unwind dest for a pad we may update its ancestors, but
272 // the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
273 // so they should never get updated while queued on the worklist.
274 assert(!MemoMap.count(CurrentPad));
275 Value *UnwindDestToken = nullptr;
276 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: CurrentPad)) {
277 if (CatchSwitch->hasUnwindDest()) {
278 UnwindDestToken = &*CatchSwitch->getUnwindDest()->getFirstNonPHIIt();
279 } else {
280 // Catchswitch doesn't have a 'nounwind' variant, and one might be
281 // annotated as "unwinds to caller" when really it's nounwind (see
282 // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
283 // parent's unwind dest from this. We can check its catchpads'
284 // descendants, since they might include a cleanuppad with an
285 // "unwinds to caller" cleanupret, which can be trusted.
286 for (auto HI = CatchSwitch->handler_begin(),
287 HE = CatchSwitch->handler_end();
288 HI != HE && !UnwindDestToken; ++HI) {
289 BasicBlock *HandlerBlock = *HI;
290 auto *CatchPad =
291 cast<CatchPadInst>(Val: &*HandlerBlock->getFirstNonPHIIt());
292 for (User *Child : CatchPad->users()) {
293 // Intentionally ignore invokes here -- since the catchswitch is
294 // marked "unwind to caller", it would be a verifier error if it
295 // contained an invoke which unwinds out of it, so any invoke we'd
296 // encounter must unwind to some child of the catch.
297 if (!isa<CleanupPadInst>(Val: Child) && !isa<CatchSwitchInst>(Val: Child))
298 continue;
299
300 Instruction *ChildPad = cast<Instruction>(Val: Child);
301 auto Memo = MemoMap.find(Val: ChildPad);
302 if (Memo == MemoMap.end()) {
303 // Haven't figured out this child pad yet; queue it.
304 Worklist.push_back(Elt: ChildPad);
305 continue;
306 }
307 // We've already checked this child, but might have found that
308 // it offers no proof either way.
309 Value *ChildUnwindDestToken = Memo->second;
310 if (!ChildUnwindDestToken)
311 continue;
312 // We already know the child's unwind dest, which can either
313 // be ConstantTokenNone to indicate unwind to caller, or can
314 // be another child of the catchpad. Only the former indicates
315 // the unwind dest of the catchswitch.
316 if (isa<ConstantTokenNone>(Val: ChildUnwindDestToken)) {
317 UnwindDestToken = ChildUnwindDestToken;
318 break;
319 }
320 assert(getParentPad(ChildUnwindDestToken) == CatchPad);
321 }
322 }
323 }
324 } else {
325 auto *CleanupPad = cast<CleanupPadInst>(Val: CurrentPad);
326 for (User *U : CleanupPad->users()) {
327 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: U)) {
328 if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
329 UnwindDestToken = &*RetUnwindDest->getFirstNonPHIIt();
330 else
331 UnwindDestToken = ConstantTokenNone::get(Context&: CleanupPad->getContext());
332 break;
333 }
334 Value *ChildUnwindDestToken;
335 if (auto *Invoke = dyn_cast<InvokeInst>(Val: U)) {
336 ChildUnwindDestToken = &*Invoke->getUnwindDest()->getFirstNonPHIIt();
337 } else if (isa<CleanupPadInst>(Val: U) || isa<CatchSwitchInst>(Val: U)) {
338 Instruction *ChildPad = cast<Instruction>(Val: U);
339 auto Memo = MemoMap.find(Val: ChildPad);
340 if (Memo == MemoMap.end()) {
341 // Haven't resolved this child yet; queue it and keep searching.
342 Worklist.push_back(Elt: ChildPad);
343 continue;
344 }
345 // We've checked this child, but still need to ignore it if it
346 // had no proof either way.
347 ChildUnwindDestToken = Memo->second;
348 if (!ChildUnwindDestToken)
349 continue;
350 } else {
351 // Not a relevant user of the cleanuppad
352 continue;
353 }
354 // In a well-formed program, the child/invoke must either unwind to
355 // an(other) child of the cleanup, or exit the cleanup. In the
356 // first case, continue searching.
357 if (isa<Instruction>(Val: ChildUnwindDestToken) &&
358 getParentPad(EHPad: ChildUnwindDestToken) == CleanupPad)
359 continue;
360 UnwindDestToken = ChildUnwindDestToken;
361 break;
362 }
363 }
364 // If we haven't found an unwind dest for CurrentPad, we may have queued its
365 // children, so move on to the next in the worklist.
366 if (!UnwindDestToken)
367 continue;
368
369 // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
370 // any ancestors of CurrentPad up to but not including UnwindDestToken's
371 // parent pad. Record this in the memo map, and check to see if the
372 // original EHPad being queried is one of the ones exited.
373 Value *UnwindParent;
374 if (auto *UnwindPad = dyn_cast<Instruction>(Val: UnwindDestToken))
375 UnwindParent = getParentPad(EHPad: UnwindPad);
376 else
377 UnwindParent = nullptr;
378 bool ExitedOriginalPad = false;
379 for (Instruction *ExitedPad = CurrentPad;
380 ExitedPad && ExitedPad != UnwindParent;
381 ExitedPad = dyn_cast<Instruction>(Val: getParentPad(EHPad: ExitedPad))) {
382 // Skip over catchpads since they just follow their catchswitches.
383 if (isa<CatchPadInst>(Val: ExitedPad))
384 continue;
385 MemoMap[ExitedPad] = UnwindDestToken;
386 ExitedOriginalPad |= (ExitedPad == EHPad);
387 }
388
389 if (ExitedOriginalPad)
390 return UnwindDestToken;
391
392 // Continue the search.
393 }
394
395 // No definitive information is contained within this funclet.
396 return nullptr;
397}
398
399/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
400/// return that pad instruction. If it unwinds to caller, return
401/// ConstantTokenNone. If it does not have a definitive unwind destination,
402/// return nullptr.
403///
404/// This routine gets invoked for calls in funclets in inlinees when inlining
405/// an invoke. Since many funclets don't have calls inside them, it's queried
406/// on-demand rather than building a map of pads to unwind dests up front.
407/// Determining a funclet's unwind dest may require recursively searching its
408/// descendants, and also ancestors and cousins if the descendants don't provide
409/// an answer. Since most funclets will have their unwind dest immediately
410/// available as the unwind dest of a catchswitch or cleanupret, this routine
411/// searches top-down from the given pad and then up. To avoid worst-case
412/// quadratic run-time given that approach, it uses a memo map to avoid
413/// re-processing funclet trees. The callers that rewrite the IR as they go
414/// take advantage of this, for correctness, by checking/forcing rewritten
415/// pads' entries to match the original callee view.
416static Value *getUnwindDestToken(Instruction *EHPad,
417 UnwindDestMemoTy &MemoMap) {
418 // Catchpads unwind to the same place as their catchswitch;
419 // redirct any queries on catchpads so the code below can
420 // deal with just catchswitches and cleanuppads.
421 if (auto *CPI = dyn_cast<CatchPadInst>(Val: EHPad))
422 EHPad = CPI->getCatchSwitch();
423
424 // Check if we've already determined the unwind dest for this pad.
425 auto Memo = MemoMap.find(Val: EHPad);
426 if (Memo != MemoMap.end())
427 return Memo->second;
428
429 // Search EHPad and, if necessary, its descendants.
430 Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
431 assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
432 if (UnwindDestToken)
433 return UnwindDestToken;
434
435 // No information is available for this EHPad from itself or any of its
436 // descendants. An unwind all the way out to a pad in the caller would
437 // need also to agree with the unwind dest of the parent funclet, so
438 // search up the chain to try to find a funclet with information. Put
439 // null entries in the memo map to avoid re-processing as we go up.
440 MemoMap[EHPad] = nullptr;
441#ifndef NDEBUG
442 SmallPtrSet<Instruction *, 4> TempMemos;
443 TempMemos.insert(EHPad);
444#endif
445 Instruction *LastUselessPad = EHPad;
446 Value *AncestorToken;
447 for (AncestorToken = getParentPad(EHPad);
448 auto *AncestorPad = dyn_cast<Instruction>(Val: AncestorToken);
449 AncestorToken = getParentPad(EHPad: AncestorToken)) {
450 // Skip over catchpads since they just follow their catchswitches.
451 if (isa<CatchPadInst>(Val: AncestorPad))
452 continue;
453 // If the MemoMap had an entry mapping AncestorPad to nullptr, since we
454 // haven't yet called getUnwindDestTokenHelper for AncestorPad in this
455 // call to getUnwindDestToken, that would mean that AncestorPad had no
456 // information in itself, its descendants, or its ancestors. If that
457 // were the case, then we should also have recorded the lack of information
458 // for the descendant that we're coming from. So assert that we don't
459 // find a null entry in the MemoMap for AncestorPad.
460 assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
461 auto AncestorMemo = MemoMap.find(Val: AncestorPad);
462 if (AncestorMemo == MemoMap.end()) {
463 UnwindDestToken = getUnwindDestTokenHelper(EHPad: AncestorPad, MemoMap);
464 } else {
465 UnwindDestToken = AncestorMemo->second;
466 }
467 if (UnwindDestToken)
468 break;
469 LastUselessPad = AncestorPad;
470 MemoMap[LastUselessPad] = nullptr;
471#ifndef NDEBUG
472 TempMemos.insert(LastUselessPad);
473#endif
474 }
475
476 // We know that getUnwindDestTokenHelper was called on LastUselessPad and
477 // returned nullptr (and likewise for EHPad and any of its ancestors up to
478 // LastUselessPad), so LastUselessPad has no information from below. Since
479 // getUnwindDestTokenHelper must investigate all downward paths through
480 // no-information nodes to prove that a node has no information like this,
481 // and since any time it finds information it records it in the MemoMap for
482 // not just the immediately-containing funclet but also any ancestors also
483 // exited, it must be the case that, walking downward from LastUselessPad,
484 // visiting just those nodes which have not been mapped to an unwind dest
485 // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
486 // they are just used to keep getUnwindDestTokenHelper from repeating work),
487 // any node visited must have been exhaustively searched with no information
488 // for it found.
489 SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
490 while (!Worklist.empty()) {
491 Instruction *UselessPad = Worklist.pop_back_val();
492 auto Memo = MemoMap.find(Val: UselessPad);
493 if (Memo != MemoMap.end() && Memo->second) {
494 // Here the name 'UselessPad' is a bit of a misnomer, because we've found
495 // that it is a funclet that does have information about unwinding to
496 // a particular destination; its parent was a useless pad.
497 // Since its parent has no information, the unwind edge must not escape
498 // the parent, and must target a sibling of this pad. This local unwind
499 // gives us no information about EHPad. Leave it and the subtree rooted
500 // at it alone.
501 assert(getParentPad(Memo->second) == getParentPad(UselessPad));
502 continue;
503 }
504 // We know we don't have information for UselesPad. If it has an entry in
505 // the MemoMap (mapping it to nullptr), it must be one of the TempMemos
506 // added on this invocation of getUnwindDestToken; if a previous invocation
507 // recorded nullptr, it would have had to prove that the ancestors of
508 // UselessPad, which include LastUselessPad, had no information, and that
509 // in turn would have required proving that the descendants of
510 // LastUselesPad, which include EHPad, have no information about
511 // LastUselessPad, which would imply that EHPad was mapped to nullptr in
512 // the MemoMap on that invocation, which isn't the case if we got here.
513 assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
514 // Assert as we enumerate users that 'UselessPad' doesn't have any unwind
515 // information that we'd be contradicting by making a map entry for it
516 // (which is something that getUnwindDestTokenHelper must have proved for
517 // us to get here). Just assert on is direct users here; the checks in
518 // this downward walk at its descendants will verify that they don't have
519 // any unwind edges that exit 'UselessPad' either (i.e. they either have no
520 // unwind edges or unwind to a sibling).
521 MemoMap[UselessPad] = UnwindDestToken;
522 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: UselessPad)) {
523 assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
524 for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
525 auto *CatchPad = &*HandlerBlock->getFirstNonPHIIt();
526 for (User *U : CatchPad->users()) {
527 assert((!isa<InvokeInst>(U) ||
528 (getParentPad(&*cast<InvokeInst>(U)
529 ->getUnwindDest()
530 ->getFirstNonPHIIt()) == CatchPad)) &&
531 "Expected useless pad");
532 if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U))
533 Worklist.push_back(Elt: cast<Instruction>(Val: U));
534 }
535 }
536 } else {
537 assert(isa<CleanupPadInst>(UselessPad));
538 for (User *U : UselessPad->users()) {
539 assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
540 assert(
541 (!isa<InvokeInst>(U) ||
542 (getParentPad(
543 &*cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHIIt()) ==
544 UselessPad)) &&
545 "Expected useless pad");
546 if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U))
547 Worklist.push_back(Elt: cast<Instruction>(Val: U));
548 }
549 }
550 }
551
552 return UnwindDestToken;
553}
554
555/// When we inline a basic block into an invoke,
556/// we have to turn all of the calls that can throw into invokes.
557/// This function analyze BB to see if there are any calls, and if so,
558/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
559/// nodes in that block with the values specified in InvokeDestPHIValues.
560static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
561 BasicBlock *BB, BasicBlock *UnwindEdge,
562 SmallSetVector<const Value *, 4> &OriginallyIndirectCalls,
563 UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
564 for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
565 // We only need to check for function calls: inlined invoke
566 // instructions require no special handling.
567 CallInst *CI = dyn_cast<CallInst>(Val: &I);
568
569 if (!CI || CI->doesNotThrow())
570 continue;
571
572 // We do not need to (and in fact, cannot) convert possibly throwing calls
573 // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
574 // invokes. The caller's "segment" of the deoptimization continuation
575 // attached to the newly inlined @llvm.experimental_deoptimize
576 // (resp. @llvm.experimental.guard) call should contain the exception
577 // handling logic, if any.
578 if (auto *F = CI->getCalledFunction())
579 if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
580 F->getIntrinsicID() == Intrinsic::experimental_guard)
581 continue;
582
583 if (auto FuncletBundle = CI->getOperandBundle(ID: LLVMContext::OB_funclet)) {
584 // This call is nested inside a funclet. If that funclet has an unwind
585 // destination within the inlinee, then unwinding out of this call would
586 // be UB. Rewriting this call to an invoke which targets the inlined
587 // invoke's unwind dest would give the call's parent funclet multiple
588 // unwind destinations, which is something that subsequent EH table
589 // generation can't handle and that the veirifer rejects. So when we
590 // see such a call, leave it as a call.
591 auto *FuncletPad = cast<Instruction>(Val: FuncletBundle->Inputs[0]);
592 Value *UnwindDestToken =
593 getUnwindDestToken(EHPad: FuncletPad, MemoMap&: *FuncletUnwindMap);
594 if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
595 continue;
596#ifndef NDEBUG
597 Instruction *MemoKey;
598 if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
599 MemoKey = CatchPad->getCatchSwitch();
600 else
601 MemoKey = FuncletPad;
602 assert(FuncletUnwindMap->count(MemoKey) &&
603 (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
604 "must get memoized to avoid confusing later searches");
605#endif // NDEBUG
606 }
607
608 bool WasIndirect = OriginallyIndirectCalls.remove(X: CI);
609 changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
610 if (WasIndirect)
611 OriginallyIndirectCalls.insert(X: BB->getTerminator());
612 return BB;
613 }
614 return nullptr;
615}
616
617/// If we inlined an invoke site, we need to convert calls
618/// in the body of the inlined function into invokes.
619///
620/// II is the invoke instruction being inlined. FirstNewBlock is the first
621/// block of the inlined code (the last block is the end of the function),
622/// and InlineCodeInfo is information about the code that got inlined.
623static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
624 ClonedCodeInfo &InlinedCodeInfo) {
625 BasicBlock *InvokeDest = II->getUnwindDest();
626
627 Function *Caller = FirstNewBlock->getParent();
628
629 // The inlined code is currently at the end of the function, scan from the
630 // start of the inlined code to its end, checking for stuff we need to
631 // rewrite.
632 LandingPadInliningInfo Invoke(II);
633
634 // Get all of the inlined landing pad instructions.
635 SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
636 for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
637 I != E; ++I)
638 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: I->getTerminator()))
639 InlinedLPads.insert(Ptr: II->getLandingPadInst());
640
641 // Append the clauses from the outer landing pad instruction into the inlined
642 // landing pad instructions.
643 LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
644 for (LandingPadInst *InlinedLPad : InlinedLPads) {
645 unsigned OuterNum = OuterLPad->getNumClauses();
646 InlinedLPad->reserveClauses(Size: OuterNum);
647 for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
648 InlinedLPad->addClause(ClauseVal: OuterLPad->getClause(Idx: OuterIdx));
649 if (OuterLPad->isCleanup())
650 InlinedLPad->setCleanup(true);
651 }
652
653 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
654 BB != E; ++BB) {
655 if (InlinedCodeInfo.ContainsCalls)
656 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
657 BB: &*BB, UnwindEdge: Invoke.getOuterResumeDest(),
658 OriginallyIndirectCalls&: InlinedCodeInfo.OriginallyIndirectCalls))
659 // Update any PHI nodes in the exceptional block to indicate that there
660 // is now a new entry in them.
661 Invoke.addIncomingPHIValuesFor(BB: NewBB);
662
663 // Forward any resumes that are remaining here.
664 if (ResumeInst *RI = dyn_cast<ResumeInst>(Val: BB->getTerminator()))
665 Invoke.forwardResume(RI, InlinedLPads);
666 }
667
668 // Now that everything is happy, we have one final detail. The PHI nodes in
669 // the exception destination block still have entries due to the original
670 // invoke instruction. Eliminate these entries (which might even delete the
671 // PHI node) now.
672 InvokeDest->removePredecessor(Pred: II->getParent());
673}
674
675/// If we inlined an invoke site, we need to convert calls
676/// in the body of the inlined function into invokes.
677///
678/// II is the invoke instruction being inlined. FirstNewBlock is the first
679/// block of the inlined code (the last block is the end of the function),
680/// and InlineCodeInfo is information about the code that got inlined.
681static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
682 ClonedCodeInfo &InlinedCodeInfo) {
683 BasicBlock *UnwindDest = II->getUnwindDest();
684 Function *Caller = FirstNewBlock->getParent();
685
686 assert(UnwindDest->getFirstNonPHIIt()->isEHPad() && "unexpected BasicBlock!");
687
688 // If there are PHI nodes in the unwind destination block, we need to keep
689 // track of which values came into them from the invoke before removing the
690 // edge from this block.
691 SmallVector<Value *, 8> UnwindDestPHIValues;
692 BasicBlock *InvokeBB = II->getParent();
693 for (PHINode &PHI : UnwindDest->phis()) {
694 // Save the value to use for this edge.
695 UnwindDestPHIValues.push_back(Elt: PHI.getIncomingValueForBlock(BB: InvokeBB));
696 }
697
698 // Add incoming-PHI values to the unwind destination block for the given basic
699 // block, using the values for the original invoke's source block.
700 auto UpdatePHINodes = [&](BasicBlock *Src) {
701 BasicBlock::iterator I = UnwindDest->begin();
702 for (Value *V : UnwindDestPHIValues) {
703 PHINode *PHI = cast<PHINode>(Val&: I);
704 PHI->addIncoming(V, BB: Src);
705 ++I;
706 }
707 };
708
709 // This connects all the instructions which 'unwind to caller' to the invoke
710 // destination.
711 UnwindDestMemoTy FuncletUnwindMap;
712 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
713 BB != E; ++BB) {
714 if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator())) {
715 if (CRI->unwindsToCaller()) {
716 auto *CleanupPad = CRI->getCleanupPad();
717 CleanupReturnInst::Create(CleanupPad, UnwindBB: UnwindDest, InsertBefore: CRI->getIterator());
718 CRI->eraseFromParent();
719 UpdatePHINodes(&*BB);
720 // Finding a cleanupret with an unwind destination would confuse
721 // subsequent calls to getUnwindDestToken, so map the cleanuppad
722 // to short-circuit any such calls and recognize this as an "unwind
723 // to caller" cleanup.
724 assert(!FuncletUnwindMap.count(CleanupPad) ||
725 isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
726 FuncletUnwindMap[CleanupPad] =
727 ConstantTokenNone::get(Context&: Caller->getContext());
728 }
729 }
730
731 BasicBlock::iterator I = BB->getFirstNonPHIIt();
732 if (!I->isEHPad())
733 continue;
734
735 Instruction *Replacement = nullptr;
736 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) {
737 if (CatchSwitch->unwindsToCaller()) {
738 Value *UnwindDestToken;
739 if (auto *ParentPad =
740 dyn_cast<Instruction>(Val: CatchSwitch->getParentPad())) {
741 // This catchswitch is nested inside another funclet. If that
742 // funclet has an unwind destination within the inlinee, then
743 // unwinding out of this catchswitch would be UB. Rewriting this
744 // catchswitch to unwind to the inlined invoke's unwind dest would
745 // give the parent funclet multiple unwind destinations, which is
746 // something that subsequent EH table generation can't handle and
747 // that the veirifer rejects. So when we see such a call, leave it
748 // as "unwind to caller".
749 UnwindDestToken = getUnwindDestToken(EHPad: ParentPad, MemoMap&: FuncletUnwindMap);
750 if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
751 continue;
752 } else {
753 // This catchswitch has no parent to inherit constraints from, and
754 // none of its descendants can have an unwind edge that exits it and
755 // targets another funclet in the inlinee. It may or may not have a
756 // descendant that definitively has an unwind to caller. In either
757 // case, we'll have to assume that any unwinds out of it may need to
758 // be routed to the caller, so treat it as though it has a definitive
759 // unwind to caller.
760 UnwindDestToken = ConstantTokenNone::get(Context&: Caller->getContext());
761 }
762 auto *NewCatchSwitch = CatchSwitchInst::Create(
763 ParentPad: CatchSwitch->getParentPad(), UnwindDest,
764 NumHandlers: CatchSwitch->getNumHandlers(), NameStr: CatchSwitch->getName(),
765 InsertBefore: CatchSwitch->getIterator());
766 for (BasicBlock *PadBB : CatchSwitch->handlers())
767 NewCatchSwitch->addHandler(Dest: PadBB);
768 // Propagate info for the old catchswitch over to the new one in
769 // the unwind map. This also serves to short-circuit any subsequent
770 // checks for the unwind dest of this catchswitch, which would get
771 // confused if they found the outer handler in the callee.
772 FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
773 Replacement = NewCatchSwitch;
774 }
775 } else if (!isa<FuncletPadInst>(Val: I)) {
776 llvm_unreachable("unexpected EHPad!");
777 }
778
779 if (Replacement) {
780 Replacement->takeName(V: &*I);
781 I->replaceAllUsesWith(V: Replacement);
782 I->eraseFromParent();
783 UpdatePHINodes(&*BB);
784 }
785 }
786
787 if (InlinedCodeInfo.ContainsCalls)
788 for (Function::iterator BB = FirstNewBlock->getIterator(),
789 E = Caller->end();
790 BB != E; ++BB)
791 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
792 BB: &*BB, UnwindEdge: UnwindDest, OriginallyIndirectCalls&: InlinedCodeInfo.OriginallyIndirectCalls,
793 FuncletUnwindMap: &FuncletUnwindMap))
794 // Update any PHI nodes in the exceptional block to indicate that there
795 // is now a new entry in them.
796 UpdatePHINodes(NewBB);
797
798 // Now that everything is happy, we have one final detail. The PHI nodes in
799 // the exception destination block still have entries due to the original
800 // invoke instruction. Eliminate these entries (which might even delete the
801 // PHI node) now.
802 UnwindDest->removePredecessor(Pred: InvokeBB);
803}
804
805static bool haveCommonPrefix(MDNode *MIBStackContext,
806 MDNode *CallsiteStackContext) {
807 assert(MIBStackContext->getNumOperands() > 0 &&
808 CallsiteStackContext->getNumOperands() > 0);
809 // Because of the context trimming performed during matching, the callsite
810 // context could have more stack ids than the MIB. We match up to the end of
811 // the shortest stack context.
812 for (auto MIBStackIter = MIBStackContext->op_begin(),
813 CallsiteStackIter = CallsiteStackContext->op_begin();
814 MIBStackIter != MIBStackContext->op_end() &&
815 CallsiteStackIter != CallsiteStackContext->op_end();
816 MIBStackIter++, CallsiteStackIter++) {
817 auto *Val1 = mdconst::dyn_extract<ConstantInt>(MD: *MIBStackIter);
818 auto *Val2 = mdconst::dyn_extract<ConstantInt>(MD: *CallsiteStackIter);
819 assert(Val1 && Val2);
820 if (Val1->getZExtValue() != Val2->getZExtValue())
821 return false;
822 }
823 return true;
824}
825
826static void removeMemProfMetadata(CallBase *Call) {
827 Call->setMetadata(KindID: LLVMContext::MD_memprof, Node: nullptr);
828}
829
830static void removeCallsiteMetadata(CallBase *Call) {
831 Call->setMetadata(KindID: LLVMContext::MD_callsite, Node: nullptr);
832}
833
834static void updateMemprofMetadata(CallBase *CI,
835 const std::vector<Metadata *> &MIBList,
836 OptimizationRemarkEmitter *ORE) {
837 assert(!MIBList.empty());
838 // Remove existing memprof, which will either be replaced or may not be needed
839 // if we are able to use a single allocation type function attribute.
840 removeMemProfMetadata(Call: CI);
841 CallStackTrie CallStack(ORE);
842 for (Metadata *MIB : MIBList)
843 CallStack.addCallStack(MIB: cast<MDNode>(Val: MIB));
844 bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI);
845 assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof));
846 if (!MemprofMDAttached)
847 // If we used a function attribute remove the callsite metadata as well.
848 removeCallsiteMetadata(Call: CI);
849}
850
851// Update the metadata on the inlined copy ClonedCall of a call OrigCall in the
852// inlined callee body, based on the callsite metadata InlinedCallsiteMD from
853// the call that was inlined.
854static void propagateMemProfHelper(const CallBase *OrigCall,
855 CallBase *ClonedCall,
856 MDNode *InlinedCallsiteMD,
857 OptimizationRemarkEmitter *ORE) {
858 MDNode *OrigCallsiteMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_callsite);
859 MDNode *ClonedCallsiteMD = nullptr;
860 // Check if the call originally had callsite metadata, and update it for the
861 // new call in the inlined body.
862 if (OrigCallsiteMD) {
863 // The cloned call's context is now the concatenation of the original call's
864 // callsite metadata and the callsite metadata on the call where it was
865 // inlined.
866 ClonedCallsiteMD = MDNode::concatenate(A: OrigCallsiteMD, B: InlinedCallsiteMD);
867 ClonedCall->setMetadata(KindID: LLVMContext::MD_callsite, Node: ClonedCallsiteMD);
868 }
869
870 // Update any memprof metadata on the cloned call.
871 MDNode *OrigMemProfMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_memprof);
872 if (!OrigMemProfMD)
873 return;
874 // We currently expect that allocations with memprof metadata also have
875 // callsite metadata for the allocation's part of the context.
876 assert(OrigCallsiteMD);
877
878 // New call's MIB list.
879 std::vector<Metadata *> NewMIBList;
880
881 // For each MIB metadata, check if its call stack context starts with the
882 // new clone's callsite metadata. If so, that MIB goes onto the cloned call in
883 // the inlined body. If not, it stays on the out-of-line original call.
884 for (auto &MIBOp : OrigMemProfMD->operands()) {
885 MDNode *MIB = dyn_cast<MDNode>(Val: MIBOp);
886 // Stack is first operand of MIB.
887 MDNode *StackMD = getMIBStackNode(MIB);
888 assert(StackMD);
889 // See if the new cloned callsite context matches this profiled context.
890 if (haveCommonPrefix(MIBStackContext: StackMD, CallsiteStackContext: ClonedCallsiteMD))
891 // Add it to the cloned call's MIB list.
892 NewMIBList.push_back(x: MIB);
893 }
894 if (NewMIBList.empty()) {
895 removeMemProfMetadata(Call: ClonedCall);
896 removeCallsiteMetadata(Call: ClonedCall);
897 return;
898 }
899 if (NewMIBList.size() < OrigMemProfMD->getNumOperands())
900 updateMemprofMetadata(CI: ClonedCall, MIBList: NewMIBList, ORE);
901}
902
903// Update memprof related metadata (!memprof and !callsite) based on the
904// inlining of Callee into the callsite at CB. The updates include merging the
905// inlined callee's callsite metadata with that of the inlined call,
906// and moving the subset of any memprof contexts to the inlined callee
907// allocations if they match the new inlined call stack.
908static void
909propagateMemProfMetadata(Function *Callee, CallBase &CB,
910 bool ContainsMemProfMetadata,
911 const ValueMap<const Value *, WeakTrackingVH> &VMap,
912 OptimizationRemarkEmitter *ORE) {
913 MDNode *CallsiteMD = CB.getMetadata(KindID: LLVMContext::MD_callsite);
914 // Only need to update if the inlined callsite had callsite metadata, or if
915 // there was any memprof metadata inlined.
916 if (!CallsiteMD && !ContainsMemProfMetadata)
917 return;
918
919 // Propagate metadata onto the cloned calls in the inlined callee.
920 for (const auto &Entry : VMap) {
921 // See if this is a call that has been inlined and remapped, and not
922 // simplified away in the process.
923 auto *OrigCall = dyn_cast_or_null<CallBase>(Val: Entry.first);
924 auto *ClonedCall = dyn_cast_or_null<CallBase>(Val: Entry.second);
925 if (!OrigCall || !ClonedCall)
926 continue;
927 // If the inlined callsite did not have any callsite metadata, then it isn't
928 // involved in any profiled call contexts, and we can remove any memprof
929 // metadata on the cloned call.
930 if (!CallsiteMD) {
931 removeMemProfMetadata(Call: ClonedCall);
932 removeCallsiteMetadata(Call: ClonedCall);
933 continue;
934 }
935 propagateMemProfHelper(OrigCall, ClonedCall, InlinedCallsiteMD: CallsiteMD, ORE);
936 }
937}
938
939/// Collect all calls that produce RetVal, following only pointer-preserving
940/// instructions (cast, phi, select).
941static void collectPointerReturningCalls(Value *RetVal,
942 SmallVectorImpl<CallBase *> &Out) {
943 SmallVector<Value *, 8> Worklist{RetVal};
944 SmallPtrSet<Value *, 8> Visited;
945 while (!Worklist.empty()) {
946 Value *V = Worklist.pop_back_val();
947 if (!V->getType()->isPointerTy() || !Visited.insert(Ptr: V).second)
948 continue;
949 if (auto *CB = dyn_cast<CallBase>(Val: V))
950 Out.push_back(Elt: CB);
951 else if (isa<BitCastInst, AddrSpaceCastInst>(Val: V))
952 Worklist.push_back(Elt: cast<CastInst>(Val: V)->getOperand(i_nocapture: 0));
953 else if (auto *PN = dyn_cast<PHINode>(Val: V))
954 append_range(C&: Worklist, R: PN->incoming_values());
955 else if (auto *SI = dyn_cast<SelectInst>(Val: V)) {
956 Worklist.push_back(Elt: SI->getTrueValue());
957 Worklist.push_back(Elt: SI->getFalseValue());
958 }
959 }
960}
961
962/// When inlining a call that carries !alloc_token metadata, propagate that
963/// metadata onto calls exposed by inlining the wrapper body. Propagation is
964/// restricted to return-value producing calls, which avoids instrumenting
965/// unrelated calls in the wrapper body.
966static void
967propagateAllocTokenMetadata(Function *CalledFunc, CallBase &CB,
968 const ValueMap<const Value *, WeakTrackingVH> &VMap,
969 ClonedCodeInfo &InlinedFunctionInfo) {
970 MDNode *AllocTokenMD = CB.getMetadata(KindID: LLVMContext::MD_alloc_token);
971 if (!AllocTokenMD)
972 return;
973
974 SmallVector<CallBase *, 2> AllocCalls;
975 for (BasicBlock &BB : *CalledFunc)
976 if (auto *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator()))
977 if (Value *RV = RI->getReturnValue())
978 collectPointerReturningCalls(RetVal: RV, Out&: AllocCalls);
979
980 for (CallBase *OrigCall : AllocCalls) {
981 auto *ClonedCall = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: OrigCall));
982 if (!ClonedCall)
983 continue;
984 // Skip calls simplified during inlining; propagation may be incorrect.
985 if (InlinedFunctionInfo.isSimplified(From: OrigCall, To: ClonedCall))
986 continue;
987 // Fill missing only: never overwrite a more specific token the wrapper
988 // already set on an internal allocation.
989 if (ClonedCall->getMetadata(KindID: LLVMContext::MD_alloc_token))
990 continue;
991 ClonedCall->setMetadata(KindID: LLVMContext::MD_alloc_token, Node: AllocTokenMD);
992 }
993}
994
995/// When inlining a call site that has !llvm.mem.parallel_loop_access,
996/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
997/// be propagated to all memory-accessing cloned instructions.
998static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
999 Function::iterator FEnd) {
1000 MDNode *MemParallelLoopAccess =
1001 CB.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
1002 MDNode *AccessGroup = CB.getMetadata(KindID: LLVMContext::MD_access_group);
1003 MDNode *AliasScope = CB.getMetadata(KindID: LLVMContext::MD_alias_scope);
1004 MDNode *NoAlias = CB.getMetadata(KindID: LLVMContext::MD_noalias);
1005 if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
1006 return;
1007
1008 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
1009 for (Instruction &I : BB) {
1010 // This metadata is only relevant for instructions that access memory.
1011 if (!I.mayReadOrWriteMemory())
1012 continue;
1013
1014 if (MemParallelLoopAccess) {
1015 // TODO: This probably should not overwrite MemParalleLoopAccess.
1016 MemParallelLoopAccess = MDNode::concatenate(
1017 A: I.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access),
1018 B: MemParallelLoopAccess);
1019 I.setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access,
1020 Node: MemParallelLoopAccess);
1021 }
1022
1023 if (AccessGroup)
1024 I.setMetadata(KindID: LLVMContext::MD_access_group, Node: uniteAccessGroups(
1025 AccGroups1: I.getMetadata(KindID: LLVMContext::MD_access_group), AccGroups2: AccessGroup));
1026
1027 if (AliasScope)
1028 I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MDNode::concatenate(
1029 A: I.getMetadata(KindID: LLVMContext::MD_alias_scope), B: AliasScope));
1030
1031 if (NoAlias)
1032 I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::concatenate(
1033 A: I.getMetadata(KindID: LLVMContext::MD_noalias), B: NoAlias));
1034 }
1035 }
1036}
1037
1038/// Track inlining chain via inlined.from metadata for dontcall diagnostics.
1039static void PropagateInlinedFromMetadata(CallBase &CB, StringRef CalledFuncName,
1040 StringRef CallerFuncName,
1041 Function::iterator FStart,
1042 Function::iterator FEnd) {
1043 LLVMContext &Ctx = CB.getContext();
1044 uint64_t InlineSiteLoc = 0;
1045 if (auto *MD = CB.getMetadata(Kind: "srcloc"))
1046 if (auto *CI = mdconst::dyn_extract<ConstantInt>(MD: MD->getOperand(I: 0)))
1047 InlineSiteLoc = CI->getZExtValue();
1048
1049 auto *I64Ty = Type::getInt64Ty(C&: Ctx);
1050 auto MakeMDInt = [&](uint64_t V) {
1051 return ConstantAsMetadata::get(C: ConstantInt::get(Ty: I64Ty, V));
1052 };
1053
1054 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
1055 for (Instruction &I : BB) {
1056 auto *CI = dyn_cast<CallInst>(Val: &I);
1057 if (!CI || !CI->getMetadata(Kind: "srcloc"))
1058 continue;
1059 auto *Callee = CI->getCalledFunction();
1060 if (!Callee || (!Callee->hasFnAttribute(Kind: "dontcall-error") &&
1061 !Callee->hasFnAttribute(Kind: "dontcall-warn")))
1062 continue;
1063
1064 SmallVector<Metadata *, 8> Ops;
1065 if (MDNode *Existing = CI->getMetadata(Kind: "inlined.from"))
1066 append_range(C&: Ops, R: Existing->operands());
1067 else {
1068 Ops.push_back(Elt: MDString::get(Context&: Ctx, Str: CalledFuncName));
1069 Ops.push_back(Elt: MakeMDInt(0));
1070 }
1071 Ops.push_back(Elt: MDString::get(Context&: Ctx, Str: CallerFuncName));
1072 Ops.push_back(Elt: MakeMDInt(InlineSiteLoc));
1073 CI->setMetadata(Kind: "inlined.from", Node: MDNode::get(Context&: Ctx, MDs: Ops));
1074 }
1075 }
1076}
1077
1078/// Bundle operands of the inlined function must be added to inlined call sites.
1079static void PropagateOperandBundles(Function::iterator InlinedBB,
1080 Instruction *CallSiteEHPad) {
1081 for (Instruction &II : llvm::make_early_inc_range(Range&: *InlinedBB)) {
1082 CallBase *I = dyn_cast<CallBase>(Val: &II);
1083 if (!I)
1084 continue;
1085 // Skip call sites which already have a "funclet" bundle.
1086 if (I->getOperandBundle(ID: LLVMContext::OB_funclet))
1087 continue;
1088 // Skip call sites which are nounwind intrinsics (as long as they don't
1089 // lower into regular function calls in the course of IR transformations).
1090 auto *CalledFn =
1091 dyn_cast<Function>(Val: I->getCalledOperand()->stripPointerCasts());
1092 if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() &&
1093 !IntrinsicInst::mayLowerToFunctionCall(IID: CalledFn->getIntrinsicID()))
1094 continue;
1095
1096 SmallVector<OperandBundleDef, 1> OpBundles;
1097 I->getOperandBundlesAsDefs(Defs&: OpBundles);
1098 OpBundles.emplace_back(Args: "funclet", Args&: CallSiteEHPad);
1099
1100 Instruction *NewInst = CallBase::Create(CB: I, Bundles: OpBundles, InsertPt: I->getIterator());
1101 NewInst->takeName(V: I);
1102 I->replaceAllUsesWith(V: NewInst);
1103 I->eraseFromParent();
1104 }
1105}
1106
1107namespace {
1108/// Utility for cloning !noalias and !alias.scope metadata. When a code region
1109/// using scoped alias metadata is inlined, the aliasing relationships may not
1110/// hold between the two version. It is necessary to create a deep clone of the
1111/// metadata, putting the two versions in separate scope domains.
1112class ScopedAliasMetadataDeepCloner {
1113 using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;
1114 SetVector<const MDNode *> MD;
1115 MetadataMap MDMap;
1116 void addRecursiveMetadataUses();
1117
1118public:
1119 ScopedAliasMetadataDeepCloner(const Function *F);
1120
1121 /// Create a new clone of the scoped alias metadata, which will be used by
1122 /// subsequent remap() calls.
1123 void clone();
1124
1125 /// Remap instructions in the given range from the original to the cloned
1126 /// metadata.
1127 void remap(Function::iterator FStart, Function::iterator FEnd);
1128};
1129} // namespace
1130
1131ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
1132 const Function *F) {
1133 for (const BasicBlock &BB : *F) {
1134 for (const Instruction &I : BB) {
1135 if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1136 MD.insert(X: M);
1137 if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1138 MD.insert(X: M);
1139
1140 // We also need to clone the metadata in noalias intrinsics.
1141 if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1142 MD.insert(X: Decl->getScopeList());
1143 }
1144 }
1145 addRecursiveMetadataUses();
1146}
1147
1148void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {
1149 SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
1150 while (!Queue.empty()) {
1151 const MDNode *M = cast<MDNode>(Val: Queue.pop_back_val());
1152 for (const Metadata *Op : M->operands())
1153 if (const MDNode *OpMD = dyn_cast<MDNode>(Val: Op))
1154 if (MD.insert(X: OpMD))
1155 Queue.push_back(Elt: OpMD);
1156 }
1157}
1158
1159void ScopedAliasMetadataDeepCloner::clone() {
1160 assert(MDMap.empty() && "clone() already called ?");
1161
1162 SmallVector<TempMDTuple, 16> DummyNodes;
1163 for (const MDNode *I : MD) {
1164 DummyNodes.push_back(Elt: MDTuple::getTemporary(Context&: I->getContext(), MDs: {}));
1165 MDMap[I].reset(MD: DummyNodes.back().get());
1166 }
1167
1168 // Create new metadata nodes to replace the dummy nodes, replacing old
1169 // metadata references with either a dummy node or an already-created new
1170 // node.
1171 SmallVector<Metadata *, 4> NewOps;
1172 for (const MDNode *I : MD) {
1173 for (const Metadata *Op : I->operands()) {
1174 if (const MDNode *M = dyn_cast<MDNode>(Val: Op))
1175 NewOps.push_back(Elt: MDMap[M]);
1176 else
1177 NewOps.push_back(Elt: const_cast<Metadata *>(Op));
1178 }
1179
1180 MDNode *NewM = MDNode::get(Context&: I->getContext(), MDs: NewOps);
1181 MDTuple *TempM = cast<MDTuple>(Val&: MDMap[I]);
1182 assert(TempM->isTemporary() && "Expected temporary node");
1183
1184 TempM->replaceAllUsesWith(MD: NewM);
1185 NewOps.clear();
1186 }
1187}
1188
1189void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
1190 Function::iterator FEnd) {
1191 if (MDMap.empty())
1192 return; // Nothing to do.
1193
1194 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
1195 for (Instruction &I : BB) {
1196 // TODO: The null checks for the MDMap.lookup() results should no longer
1197 // be necessary.
1198 if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1199 if (MDNode *MNew = MDMap.lookup(Val: M))
1200 I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MNew);
1201
1202 if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1203 if (MDNode *MNew = MDMap.lookup(Val: M))
1204 I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MNew);
1205
1206 if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1207 if (MDNode *MNew = MDMap.lookup(Val: Decl->getScopeList()))
1208 Decl->setScopeList(MNew);
1209 }
1210 }
1211}
1212
1213/// If the inlined function has noalias arguments,
1214/// then add new alias scopes for each noalias argument, tag the mapped noalias
1215/// parameters with noalias metadata specifying the new scope, and tag all
1216/// non-derived loads, stores and memory intrinsics with the new alias scopes.
1217static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
1218 const DataLayout &DL, AAResults *CalleeAAR,
1219 ClonedCodeInfo &InlinedFunctionInfo) {
1220 if (!EnableNoAliasConversion)
1221 return;
1222
1223 const Function *CalledFunc = CB.getCalledFunction();
1224 SmallVector<const Argument *, 4> NoAliasArgs;
1225
1226 for (const Argument &Arg : CalledFunc->args())
1227 if (CB.paramHasAttr(ArgNo: Arg.getArgNo(), Kind: Attribute::NoAlias) && !Arg.use_empty())
1228 NoAliasArgs.push_back(Elt: &Arg);
1229
1230 if (NoAliasArgs.empty())
1231 return;
1232
1233 // To do a good job, if a noalias variable is captured, we need to know if
1234 // the capture point dominates the particular use we're considering.
1235 DominatorTree DT;
1236 DT.recalculate(Func&: const_cast<Function&>(*CalledFunc));
1237
1238 // noalias indicates that pointer values based on the argument do not alias
1239 // pointer values which are not based on it. So we add a new "scope" for each
1240 // noalias function argument. Accesses using pointers based on that argument
1241 // become part of that alias scope, accesses using pointers not based on that
1242 // argument are tagged as noalias with that scope.
1243
1244 DenseMap<const Argument *, MDNode *> NewScopes;
1245 MDBuilder MDB(CalledFunc->getContext());
1246
1247 // Create a new scope domain for this function.
1248 MDNode *NewDomain =
1249 MDB.createAnonymousAliasScopeDomain(Name: CalledFunc->getName());
1250 for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
1251 const Argument *A = NoAliasArgs[i];
1252
1253 std::string Name = std::string(CalledFunc->getName());
1254 if (A->hasName()) {
1255 Name += ": %";
1256 Name += A->getName();
1257 } else {
1258 Name += ": argument ";
1259 Name += utostr(X: i);
1260 }
1261
1262 // Note: We always create a new anonymous root here. This is true regardless
1263 // of the linkage of the callee because the aliasing "scope" is not just a
1264 // property of the callee, but also all control dependencies in the caller.
1265 MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name);
1266 NewScopes.insert(KV: std::make_pair(x&: A, y&: NewScope));
1267
1268 if (UseNoAliasIntrinsic) {
1269 // Introduce a llvm.experimental.noalias.scope.decl for the noalias
1270 // argument.
1271 MDNode *AScopeList = MDNode::get(Context&: CalledFunc->getContext(), MDs: NewScope);
1272 auto *NoAliasDecl =
1273 IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(ScopeTag: AScopeList);
1274 // Ignore the result for now. The result will be used when the
1275 // llvm.noalias intrinsic is introduced.
1276 (void)NoAliasDecl;
1277 }
1278 }
1279
1280 // Iterate over all new instructions in the map; for all memory-access
1281 // instructions, add the alias scope metadata.
1282 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
1283 VMI != VMIE; ++VMI) {
1284 if (const Instruction *I = dyn_cast<Instruction>(Val: VMI->first)) {
1285 if (!VMI->second)
1286 continue;
1287
1288 Instruction *NI = dyn_cast<Instruction>(Val&: VMI->second);
1289 if (!NI || InlinedFunctionInfo.isSimplified(From: I, To: NI))
1290 continue;
1291
1292 bool IsArgMemOnlyCall = false, IsFuncCall = false;
1293 SmallVector<const Value *, 2> PtrArgs;
1294
1295 if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I))
1296 PtrArgs.push_back(Elt: LI->getPointerOperand());
1297 else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1298 PtrArgs.push_back(Elt: SI->getPointerOperand());
1299 else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(Val: I))
1300 PtrArgs.push_back(Elt: VAAI->getPointerOperand());
1301 else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Val: I))
1302 PtrArgs.push_back(Elt: CXI->getPointerOperand());
1303 else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Val: I))
1304 PtrArgs.push_back(Elt: RMWI->getPointerOperand());
1305 else if (const auto *Call = dyn_cast<CallBase>(Val: I)) {
1306 // If we know that the call does not access memory, then we'll still
1307 // know that about the inlined clone of this call site, and we don't
1308 // need to add metadata.
1309 if (Call->doesNotAccessMemory())
1310 continue;
1311
1312 IsFuncCall = true;
1313 if (CalleeAAR) {
1314 MemoryEffects ME = CalleeAAR->getMemoryEffects(Call);
1315
1316 // We'll retain this knowledge without additional metadata.
1317 if (ME.onlyAccessesInaccessibleMem())
1318 continue;
1319
1320 if (ME.onlyAccessesArgPointees())
1321 IsArgMemOnlyCall = true;
1322 }
1323
1324 for (Value *Arg : Call->args()) {
1325 // Only care about pointer arguments. If a noalias argument is
1326 // accessed through a non-pointer argument, it must be captured
1327 // first (e.g. via ptrtoint), and we protect against captures below.
1328 if (!Arg->getType()->isPointerTy())
1329 continue;
1330
1331 PtrArgs.push_back(Elt: Arg);
1332 }
1333 }
1334
1335 // If we found no pointers, then this instruction is not suitable for
1336 // pairing with an instruction to receive aliasing metadata.
1337 // However, if this is a call, this we might just alias with none of the
1338 // noalias arguments.
1339 if (PtrArgs.empty() && !IsFuncCall)
1340 continue;
1341
1342 // It is possible that there is only one underlying object, but you
1343 // need to go through several PHIs to see it, and thus could be
1344 // repeated in the Objects list.
1345 SmallPtrSet<const Value *, 4> ObjSet;
1346 SmallVector<Metadata *, 4> Scopes, NoAliases;
1347
1348 for (const Value *V : PtrArgs) {
1349 SmallVector<const Value *, 4> Objects;
1350 getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
1351
1352 ObjSet.insert_range(R&: Objects);
1353 }
1354
1355 // Figure out if we're derived from anything that is not a noalias
1356 // argument.
1357 bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false,
1358 UsesUnknownObject = false;
1359 for (const Value *V : ObjSet) {
1360 // Is this value a constant that cannot be derived from any pointer
1361 // value (we need to exclude constant expressions, for example, that
1362 // are formed from arithmetic on global symbols).
1363 bool IsNonPtrConst = isa<ConstantInt>(Val: V) || isa<ConstantFP>(Val: V) ||
1364 isa<ConstantPointerNull>(Val: V) ||
1365 isa<ConstantDataVector>(Val: V) || isa<UndefValue>(Val: V);
1366 if (IsNonPtrConst)
1367 continue;
1368
1369 // If this is anything other than a noalias argument, then we cannot
1370 // completely describe the aliasing properties using alias.scope
1371 // metadata (and, thus, won't add any).
1372 if (const Argument *A = dyn_cast<Argument>(Val: V)) {
1373 if (!CB.paramHasAttr(ArgNo: A->getArgNo(), Kind: Attribute::NoAlias))
1374 UsesAliasingPtr = true;
1375 } else {
1376 UsesAliasingPtr = true;
1377 }
1378
1379 if (isEscapeSource(V)) {
1380 // An escape source can only alias with a noalias argument if it has
1381 // been captured beforehand.
1382 RequiresNoCaptureBefore = true;
1383 } else if (!isa<Argument>(Val: V) && !isIdentifiedObject(V)) {
1384 // If this is neither an escape source, nor some identified object
1385 // (which cannot directly alias a noalias argument), nor some other
1386 // argument (which, by definition, also cannot alias a noalias
1387 // argument), conservatively do not make any assumptions.
1388 UsesUnknownObject = true;
1389 }
1390 }
1391
1392 // Nothing we can do if the used underlying object cannot be reliably
1393 // determined.
1394 if (UsesUnknownObject)
1395 continue;
1396
1397 // A function call can always get captured noalias pointers (via other
1398 // parameters, globals, etc.).
1399 if (IsFuncCall && !IsArgMemOnlyCall)
1400 RequiresNoCaptureBefore = true;
1401
1402 // First, we want to figure out all of the sets with which we definitely
1403 // don't alias. Iterate over all noalias set, and add those for which:
1404 // 1. The noalias argument is not in the set of objects from which we
1405 // definitely derive.
1406 // 2. The noalias argument has not yet been captured.
1407 // An arbitrary function that might load pointers could see captured
1408 // noalias arguments via other noalias arguments or globals, and so we
1409 // must always check for prior capture.
1410 for (const Argument *A : NoAliasArgs) {
1411 if (ObjSet.contains(Ptr: A))
1412 continue; // May be based on a noalias argument.
1413
1414 // It might be tempting to skip the PointerMayBeCapturedBefore check if
1415 // A->hasNoCaptureAttr() is true, but this is incorrect because
1416 // nocapture only guarantees that no copies outlive the function, not
1417 // that the value cannot be locally captured.
1418 if (!RequiresNoCaptureBefore ||
1419 !capturesAnything(CC: PointerMayBeCapturedBefore(
1420 V: A, /*ReturnCaptures=*/false, I, DT: &DT, /*IncludeI=*/false,
1421 Mask: CaptureComponents::Provenance)))
1422 NoAliases.push_back(Elt: NewScopes[A]);
1423 }
1424
1425 if (!NoAliases.empty())
1426 NI->setMetadata(KindID: LLVMContext::MD_noalias,
1427 Node: MDNode::concatenate(
1428 A: NI->getMetadata(KindID: LLVMContext::MD_noalias),
1429 B: MDNode::get(Context&: CalledFunc->getContext(), MDs: NoAliases)));
1430
1431 // Next, we want to figure out all of the sets to which we might belong.
1432 // We might belong to a set if the noalias argument is in the set of
1433 // underlying objects. If there is some non-noalias argument in our list
1434 // of underlying objects, then we cannot add a scope because the fact
1435 // that some access does not alias with any set of our noalias arguments
1436 // cannot itself guarantee that it does not alias with this access
1437 // (because there is some pointer of unknown origin involved and the
1438 // other access might also depend on this pointer). We also cannot add
1439 // scopes to arbitrary functions unless we know they don't access any
1440 // non-parameter pointer-values.
1441 bool CanAddScopes = !UsesAliasingPtr;
1442 if (CanAddScopes && IsFuncCall)
1443 CanAddScopes = IsArgMemOnlyCall;
1444
1445 if (CanAddScopes)
1446 for (const Argument *A : NoAliasArgs) {
1447 if (ObjSet.count(Ptr: A))
1448 Scopes.push_back(Elt: NewScopes[A]);
1449 }
1450
1451 if (!Scopes.empty())
1452 NI->setMetadata(
1453 KindID: LLVMContext::MD_alias_scope,
1454 Node: MDNode::concatenate(A: NI->getMetadata(KindID: LLVMContext::MD_alias_scope),
1455 B: MDNode::get(Context&: CalledFunc->getContext(), MDs: Scopes)));
1456 }
1457 }
1458}
1459
1460static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
1461 ReturnInst *End) {
1462
1463 assert(Begin->getParent() == End->getParent() &&
1464 "Expected to be in same basic block!");
1465 auto BeginIt = Begin->getIterator();
1466 assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator");
1467 return !llvm::isGuaranteedToTransferExecutionToSuccessor(
1468 Begin: ++BeginIt, End: End->getIterator(), ScanLimit: InlinerAttributeWindow + 1);
1469}
1470
1471// Add attributes from CB params and Fn attributes that can always be propagated
1472// to the corresponding argument / inner callbases.
1473static void AddParamAndFnBasicAttributes(const CallBase &CB,
1474 ValueToValueMapTy &VMap,
1475 ClonedCodeInfo &InlinedFunctionInfo) {
1476 auto *CalledFunction = CB.getCalledFunction();
1477 auto &Context = CalledFunction->getContext();
1478
1479 // Collect valid attributes for all params.
1480 SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
1481 bool HasAttrToPropagate = false;
1482
1483 // Attributes we can only propagate if the exact parameter is forwarded.
1484 // We can propagate both poison generating and UB generating attributes
1485 // without any extra checks. The only attribute that is tricky to propagate
1486 // is `noundef` (skipped for now) as that can create new UB where previous
1487 // behavior was just using a poison value.
1488 static const Attribute::AttrKind ExactAttrsToPropagate[] = {
1489 Attribute::Dereferenceable, Attribute::DereferenceableOrNull,
1490 Attribute::NonNull, Attribute::NoFPClass,
1491 Attribute::Alignment, Attribute::Range};
1492
1493 for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
1494 ValidObjParamAttrs.emplace_back(Args: AttrBuilder{CB.getContext()});
1495 ValidExactParamAttrs.emplace_back(Args: AttrBuilder{CB.getContext()});
1496 // Access attributes can be propagated to any param with the same underlying
1497 // object as the argument.
1498 if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadNone))
1499 ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadNone);
1500 if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadOnly))
1501 ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadOnly);
1502
1503 for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
1504 Attribute Attr = CB.getParamAttr(ArgNo: I, Kind: AK);
1505 if (Attr.isValid())
1506 ValidExactParamAttrs.back().addAttribute(A: Attr);
1507 }
1508
1509 HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes();
1510 HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes();
1511 }
1512
1513 // Won't be able to propagate anything.
1514 if (!HasAttrToPropagate)
1515 return;
1516
1517 for (BasicBlock &BB : *CalledFunction) {
1518 for (Instruction &Ins : BB) {
1519 const auto *InnerCB = dyn_cast<CallBase>(Val: &Ins);
1520 if (!InnerCB)
1521 continue;
1522 auto *NewInnerCB = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: InnerCB));
1523 if (!NewInnerCB)
1524 continue;
1525 // The InnerCB might have be simplified during the inlining
1526 // process which can make propagation incorrect.
1527 if (InlinedFunctionInfo.isSimplified(From: InnerCB, To: NewInnerCB))
1528 continue;
1529
1530 AttributeList AL = NewInnerCB->getAttributes();
1531 for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
1532 // It's unsound or requires special handling to propagate
1533 // attributes to byval arguments. Even if CalledFunction
1534 // doesn't e.g. write to the argument (readonly), the call to
1535 // NewInnerCB may write to its by-value copy.
1536 if (NewInnerCB->paramHasAttr(ArgNo: I, Kind: Attribute::ByVal))
1537 continue;
1538
1539 // Don't bother propagating attrs to constants.
1540 if (match(V: NewInnerCB->getArgOperand(i: I),
1541 P: llvm::PatternMatch::m_ImmConstant()))
1542 continue;
1543
1544 // Check if the underlying value for the parameter is an argument.
1545 const Argument *Arg = dyn_cast<Argument>(Val: InnerCB->getArgOperand(i: I));
1546 unsigned ArgNo;
1547 if (Arg) {
1548 ArgNo = Arg->getArgNo();
1549 // For dereferenceable, dereferenceable_or_null, align, etc...
1550 // we don't want to propagate if the existing param has the same
1551 // attribute with "better" constraints. So remove from the
1552 // new AL if the region of the existing param is larger than
1553 // what we can propagate.
1554 AttrBuilder NewAB{
1555 Context, AttributeSet::get(C&: Context, B: ValidExactParamAttrs[ArgNo])};
1556 if (AL.getParamDereferenceableBytes(Index: I) >
1557 NewAB.getDereferenceableBytes())
1558 NewAB.removeAttribute(Val: Attribute::Dereferenceable);
1559 if (AL.getParamDereferenceableOrNullBytes(ArgNo: I) >
1560 NewAB.getDereferenceableOrNullBytes())
1561 NewAB.removeAttribute(Val: Attribute::DereferenceableOrNull);
1562 if (AL.getParamAlignment(ArgNo: I).valueOrOne() >
1563 NewAB.getAlignment().valueOrOne())
1564 NewAB.removeAttribute(Val: Attribute::Alignment);
1565 if (auto ExistingRange = AL.getParamRange(ArgNo: I)) {
1566 if (auto NewRange = NewAB.getRange()) {
1567 ConstantRange CombinedRange =
1568 ExistingRange->intersectWith(CR: *NewRange);
1569 NewAB.removeAttribute(Val: Attribute::Range);
1570 NewAB.addRangeAttr(CR: CombinedRange);
1571 }
1572 }
1573
1574 if (FPClassTest ExistingNoFP = AL.getParamNoFPClass(ArgNo: I))
1575 NewAB.addNoFPClassAttr(NoFPClassMask: ExistingNoFP | NewAB.getNoFPClass());
1576
1577 AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: NewAB);
1578 } else if (NewInnerCB->getArgOperand(i: I)->getType()->isPointerTy()) {
1579 // Check if the underlying value for the parameter is an argument.
1580 const Value *UnderlyingV =
1581 getUnderlyingObject(V: InnerCB->getArgOperand(i: I));
1582 Arg = dyn_cast<Argument>(Val: UnderlyingV);
1583 if (!Arg)
1584 continue;
1585 ArgNo = Arg->getArgNo();
1586 } else {
1587 continue;
1588 }
1589
1590 // If so, propagate its access attributes.
1591 AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: ValidObjParamAttrs[ArgNo]);
1592
1593 // We can have conflicting attributes from the inner callsite and
1594 // to-be-inlined callsite. In that case, choose the most
1595 // restrictive.
1596
1597 // readonly + writeonly means we can never deref so make readnone.
1598 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) &&
1599 AL.hasParamAttr(ArgNo: I, Kind: Attribute::WriteOnly))
1600 AL = AL.addParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadNone);
1601
1602 // If have readnone, need to clear readonly/writeonly
1603 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone)) {
1604 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadOnly);
1605 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::WriteOnly);
1606 }
1607
1608 // Writable cannot exist in conjunction w/ readonly/readnone
1609 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) ||
1610 AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone))
1611 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::Writable);
1612 }
1613 NewInnerCB->setAttributes(AL);
1614 }
1615 }
1616}
1617
1618// Only allow these white listed attributes to be propagated back to the
1619// callee. This is because other attributes may only be valid on the call
1620// itself, i.e. attributes such as signext and zeroext.
1621
1622// Attributes that are always okay to propagate as if they are violated its
1623// immediate UB.
1624static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) {
1625 AttrBuilder Valid(CB.getContext());
1626 if (auto DerefBytes = CB.getRetDereferenceableBytes())
1627 Valid.addDereferenceableAttr(Bytes: DerefBytes);
1628 if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes())
1629 Valid.addDereferenceableOrNullAttr(Bytes: DerefOrNullBytes);
1630 if (CB.hasRetAttr(Kind: Attribute::NoAlias))
1631 Valid.addAttribute(Val: Attribute::NoAlias);
1632 if (CB.hasRetAttr(Kind: Attribute::NoUndef))
1633 Valid.addAttribute(Val: Attribute::NoUndef);
1634 return Valid;
1635}
1636
1637// Attributes that need additional checks as propagating them may change
1638// behavior or cause new UB.
1639static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {
1640 AttrBuilder Valid(CB.getContext());
1641 if (CB.hasRetAttr(Kind: Attribute::NonNull))
1642 Valid.addAttribute(Val: Attribute::NonNull);
1643 if (CB.hasRetAttr(Kind: Attribute::Alignment))
1644 Valid.addAlignmentAttr(Align: CB.getRetAlign());
1645 if (std::optional<ConstantRange> Range = CB.getRange())
1646 Valid.addRangeAttr(CR: *Range);
1647 if (CB.hasRetAttr(Kind: Attribute::NoFPClass))
1648 Valid.addNoFPClassAttr(NoFPClassMask: CB.getRetNoFPClass());
1649 return Valid;
1650}
1651
1652static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap,
1653 ClonedCodeInfo &InlinedFunctionInfo) {
1654 AttrBuilder CallSiteValidUB = IdentifyValidUBGeneratingAttributes(CB);
1655 AttrBuilder CallSiteValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
1656 if (!CallSiteValidUB.hasAttributes() && !CallSiteValidPG.hasAttributes())
1657 return;
1658 auto *CalledFunction = CB.getCalledFunction();
1659 auto &Context = CalledFunction->getContext();
1660
1661 for (auto &BB : *CalledFunction) {
1662 auto *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator());
1663 if (!RI || !isa<CallBase>(Val: RI->getOperand(i_nocapture: 0)))
1664 continue;
1665 auto *RetVal = cast<CallBase>(Val: RI->getOperand(i_nocapture: 0));
1666 // Check that the cloned RetVal exists and is a call, otherwise we cannot
1667 // add the attributes on the cloned RetVal. Simplification during inlining
1668 // could have transformed the cloned instruction.
1669 auto *NewRetVal = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: RetVal));
1670 if (!NewRetVal)
1671 continue;
1672
1673 // The RetVal might have be simplified during the inlining
1674 // process which can make propagation incorrect.
1675 if (InlinedFunctionInfo.isSimplified(From: RetVal, To: NewRetVal))
1676 continue;
1677 // Backward propagation of attributes to the returned value may be incorrect
1678 // if it is control flow dependent.
1679 // Consider:
1680 // @callee {
1681 // %rv = call @foo()
1682 // %rv2 = call @bar()
1683 // if (%rv2 != null)
1684 // return %rv2
1685 // if (%rv == null)
1686 // exit()
1687 // return %rv
1688 // }
1689 // caller() {
1690 // %val = call nonnull @callee()
1691 // }
1692 // Here we cannot add the nonnull attribute on either foo or bar. So, we
1693 // limit the check to both RetVal and RI are in the same basic block and
1694 // there are no throwing/exiting instructions between these instructions.
1695 if (RI->getParent() != RetVal->getParent() ||
1696 MayContainThrowingOrExitingCallAfterCB(Begin: RetVal, End: RI))
1697 continue;
1698 // Add to the existing attributes of NewRetVal, i.e. the cloned call
1699 // instruction.
1700 // NB! When we have the same attribute already existing on NewRetVal, but
1701 // with a differing value, the AttributeList's merge API honours the already
1702 // existing attribute value (i.e. attributes such as dereferenceable,
1703 // dereferenceable_or_null etc). See AttrBuilder::merge for more details.
1704 AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);
1705 AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
1706 AttributeList AL = NewRetVal->getAttributes();
1707 if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes())
1708 ValidUB.removeAttribute(Val: Attribute::Dereferenceable);
1709 if (ValidUB.getDereferenceableOrNullBytes() <
1710 AL.getRetDereferenceableOrNullBytes())
1711 ValidUB.removeAttribute(Val: Attribute::DereferenceableOrNull);
1712 AttributeList NewAL = AL.addRetAttributes(C&: Context, B: ValidUB);
1713 // Attributes that may generate poison returns are a bit tricky. If we
1714 // propagate them, other uses of the callsite might have their behavior
1715 // change or cause UB (if they have noundef) b.c of the new potential
1716 // poison.
1717 // Take the following three cases:
1718 //
1719 // 1)
1720 // define nonnull ptr @foo() {
1721 // %p = call ptr @bar()
1722 // call void @use(ptr %p) willreturn nounwind
1723 // ret ptr %p
1724 // }
1725 //
1726 // 2)
1727 // define noundef nonnull ptr @foo() {
1728 // %p = call ptr @bar()
1729 // call void @use(ptr %p) willreturn nounwind
1730 // ret ptr %p
1731 // }
1732 //
1733 // 3)
1734 // define nonnull ptr @foo() {
1735 // %p = call noundef ptr @bar()
1736 // ret ptr %p
1737 // }
1738 //
1739 // In case 1, we can't propagate nonnull because poison value in @use may
1740 // change behavior or trigger UB.
1741 // In case 2, we don't need to be concerned about propagating nonnull, as
1742 // any new poison at @use will trigger UB anyways.
1743 // In case 3, we can never propagate nonnull because it may create UB due to
1744 // the noundef on @bar.
1745 if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne())
1746 ValidPG.removeAttribute(Val: Attribute::Alignment);
1747 if (ValidPG.hasAttributes()) {
1748 Attribute CBRange = ValidPG.getAttribute(Kind: Attribute::Range);
1749 if (CBRange.isValid()) {
1750 Attribute NewRange = AL.getRetAttr(Kind: Attribute::Range);
1751 if (NewRange.isValid()) {
1752 ValidPG.addRangeAttr(
1753 CR: CBRange.getRange().intersectWith(CR: NewRange.getRange()));
1754 }
1755 }
1756
1757 Attribute CBNoFPClass = ValidPG.getAttribute(Kind: Attribute::NoFPClass);
1758 if (CBNoFPClass.isValid() && AL.hasRetAttr(Kind: Attribute::NoFPClass)) {
1759 ValidPG.addNoFPClassAttr(
1760 NoFPClassMask: CBNoFPClass.getNoFPClass() |
1761 AL.getRetAttr(Kind: Attribute::NoFPClass).getNoFPClass());
1762 }
1763
1764 // Three checks.
1765 // If the callsite has `noundef`, then a poison due to violating the
1766 // return attribute will create UB anyways so we can always propagate.
1767 // Otherwise, if the return value (callee to be inlined) has `noundef`, we
1768 // can't propagate as a new poison return will cause UB.
1769 // Finally, check if the return value has no uses whose behavior may
1770 // change/may cause UB if we potentially return poison. At the moment this
1771 // is implemented overly conservatively with a single-use check.
1772 // TODO: Update the single-use check to iterate through uses and only bail
1773 // if we have a potentially dangerous use.
1774
1775 if (CB.hasRetAttr(Kind: Attribute::NoUndef) ||
1776 (RetVal->hasOneUse() && !RetVal->hasRetAttr(Kind: Attribute::NoUndef)))
1777 NewAL = NewAL.addRetAttributes(C&: Context, B: ValidPG);
1778 }
1779 NewRetVal->setAttributes(NewAL);
1780 }
1781}
1782
1783/// If the inlined function has non-byval align arguments, then
1784/// add @llvm.assume-based alignment assumptions to preserve this information.
1785static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
1786 if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
1787 return;
1788
1789 AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller());
1790 auto &DL = CB.getDataLayout();
1791
1792 // To avoid inserting redundant assumptions, we should check for assumptions
1793 // already in the caller. To do this, we might need a DT of the caller.
1794 DominatorTree DT;
1795 bool DTCalculated = false;
1796
1797 Function *CalledFunc = CB.getCalledFunction();
1798 for (Argument &Arg : CalledFunc->args()) {
1799 if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() ||
1800 Arg.use_empty())
1801 continue;
1802 MaybeAlign Alignment = Arg.getParamAlign();
1803 if (!Alignment)
1804 continue;
1805
1806 if (!DTCalculated) {
1807 DT.recalculate(Func&: *CB.getCaller());
1808 DTCalculated = true;
1809 }
1810 // If we can already prove the asserted alignment in the context of the
1811 // caller, then don't bother inserting the assumption.
1812 Value *ArgVal = CB.getArgOperand(i: Arg.getArgNo());
1813 if (getKnownAlignment(V: ArgVal, DL, CxtI: &CB, AC, DT: &DT) >= *Alignment)
1814 continue;
1815
1816 CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption(
1817 DL, PtrValue: ArgVal, Alignment: Alignment->value());
1818 AC->registerAssumption(CI: cast<AssumeInst>(Val: NewAsmp));
1819 }
1820}
1821
1822static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
1823 MaybeAlign SrcAlign, Module *M,
1824 BasicBlock *InsertBlock,
1825 InlineFunctionInfo &IFI,
1826 Function *CalledFunc) {
1827 IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
1828
1829 Value *Size =
1830 Builder.getInt64(C: M->getDataLayout().getTypeStoreSize(Ty: ByValType));
1831
1832 Align DstAlign = Dst->getPointerAlignment(DL: M->getDataLayout());
1833
1834 // Generate a memcpy with the correct alignments.
1835 CallInst *CI = Builder.CreateMemCpy(Dst, DstAlign, Src, SrcAlign, Size);
1836
1837 // The verifier requires that all calls of debug-info-bearing functions
1838 // from debug-info-bearing functions have a debug location (for inlining
1839 // purposes). Assign a dummy location to satisfy the constraint.
1840 if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram())
1841 if (DISubprogram *SP = CalledFunc->getSubprogram())
1842 CI->setDebugLoc(DILocation::get(Context&: SP->getContext(), Line: 0, Column: 0, Scope: SP));
1843}
1844
1845/// When inlining a call site that has a byval argument,
1846/// we have to make the implicit memcpy explicit by adding it.
1847static Value *HandleByValArgument(Type *ByValType, Value *Arg,
1848 Instruction *TheCall,
1849 const Function *CalledFunc,
1850 InlineFunctionInfo &IFI,
1851 MaybeAlign ByValAlignment) {
1852 Function *Caller = TheCall->getFunction();
1853 const DataLayout &DL = Caller->getDataLayout();
1854
1855 // If the called function is readonly, then it could not mutate the caller's
1856 // copy of the byval'd memory. In this case, it is safe to elide the copy and
1857 // temporary.
1858 if (CalledFunc->onlyReadsMemory()) {
1859 // If the byval argument has a specified alignment that is greater than the
1860 // passed in pointer, then we either have to round up the input pointer or
1861 // give up on this transformation.
1862 if (ByValAlignment.valueOrOne() == 1)
1863 return Arg;
1864
1865 AssumptionCache *AC =
1866 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
1867
1868 // If the pointer is already known to be sufficiently aligned, or if we can
1869 // round it up to a larger alignment, then we don't need a temporary.
1870 if (getOrEnforceKnownAlignment(V: Arg, PrefAlign: *ByValAlignment, DL, CxtI: TheCall, AC) >=
1871 *ByValAlignment)
1872 return Arg;
1873
1874 // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
1875 // for code quality, but rarely happens and is required for correctness.
1876 }
1877
1878 // Create the alloca. If we have DataLayout, use nice alignment.
1879 Align Alignment = DL.getPrefTypeAlign(Ty: ByValType);
1880
1881 // If the byval had an alignment specified, we *must* use at least that
1882 // alignment, as it is required by the byval argument (and uses of the
1883 // pointer inside the callee).
1884 if (ByValAlignment)
1885 Alignment = std::max(a: Alignment, b: *ByValAlignment);
1886
1887 AllocaInst *NewAlloca =
1888 new AllocaInst(ByValType, Arg->getType()->getPointerAddressSpace(),
1889 nullptr, Alignment, Arg->getName());
1890 NewAlloca->setDebugLoc(DebugLoc::getCompilerGenerated());
1891 NewAlloca->insertBefore(InsertPos: Caller->begin()->begin());
1892 IFI.StaticAllocas.push_back(Elt: NewAlloca);
1893
1894 // Uses of the argument in the function should use our new alloca
1895 // instead.
1896 return NewAlloca;
1897}
1898
1899// Check whether this Value is used by a lifetime intrinsic.
1900static bool isUsedByLifetimeMarker(Value *V) {
1901 for (User *U : V->users())
1902 if (isa<LifetimeIntrinsic>(Val: U))
1903 return true;
1904 return false;
1905}
1906
1907// Check whether the given alloca already has
1908// lifetime.start or lifetime.end intrinsics.
1909static bool hasLifetimeMarkers(AllocaInst *AI) {
1910 Type *Ty = AI->getType();
1911 Type *Int8PtrTy =
1912 PointerType::get(C&: Ty->getContext(), AddressSpace: Ty->getPointerAddressSpace());
1913 if (Ty == Int8PtrTy)
1914 return isUsedByLifetimeMarker(V: AI);
1915
1916 // Do a scan to find all the casts to i8*.
1917 for (User *U : AI->users()) {
1918 if (U->getType() != Int8PtrTy) continue;
1919 if (U->stripPointerCasts() != AI) continue;
1920 if (isUsedByLifetimeMarker(V: U))
1921 return true;
1922 }
1923 return false;
1924}
1925
1926/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
1927/// block. Allocas used in inalloca calls and allocas of dynamic array size
1928/// cannot be static.
1929static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
1930 return isa<Constant>(Val: AI->getArraySize()) && !AI->isUsedWithInAlloca();
1931}
1932
1933/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
1934/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
1935static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
1936 LLVMContext &Ctx,
1937 DenseMap<const MDNode *, MDNode *> &IANodes) {
1938 auto IA = DebugLoc::appendInlinedAt(DL: OrigDL, InlinedAt, Ctx, Cache&: IANodes);
1939 return DILocation::get(Context&: Ctx, Line: OrigDL.getLine(), Column: OrigDL.getCol(),
1940 Scope: OrigDL.getScope(), InlinedAt: IA, ImplicitCode: OrigDL.isImplicitCode(),
1941 AtomGroup: OrigDL->getAtomGroup(), AtomRank: OrigDL->getAtomRank());
1942}
1943
1944/// Update inlined instructions' line numbers to
1945/// to encode location where these instructions are inlined.
1946static void fixupLineNumbers(Function *Fn, Function::iterator FI,
1947 Instruction *TheCall, bool CalleeHasDebugInfo) {
1948 if (!TheCall->getDebugLoc())
1949 return;
1950
1951 // Don't propagate the source location atom from the call to inlined nodebug
1952 // instructions, and avoid putting it in the InlinedAt field of inlined
1953 // not-nodebug instructions. FIXME: Possibly worth transferring/generating
1954 // an atom for the returned value, otherwise we miss stepping on inlined
1955 // nodebug functions (which is different to existing behaviour).
1956 DebugLoc TheCallDL = TheCall->getDebugLoc()->getWithoutAtom();
1957
1958 auto &Ctx = Fn->getContext();
1959 DILocation *InlinedAtNode = TheCallDL;
1960
1961 // Create a unique call site, not to be confused with any other call from the
1962 // same location.
1963 InlinedAtNode = DILocation::getDistinct(
1964 Context&: Ctx, Line: InlinedAtNode->getLine(), Column: InlinedAtNode->getColumn(),
1965 Scope: InlinedAtNode->getScope(), InlinedAt: InlinedAtNode->getInlinedAt());
1966
1967 // Cache the inlined-at nodes as they're built so they are reused, without
1968 // this every instruction's inlined-at chain would become distinct from each
1969 // other.
1970 DenseMap<const MDNode *, MDNode *> IANodes;
1971
1972 // Check if we are not generating inline line tables and want to use
1973 // the call site location instead.
1974 bool NoInlineLineTables = Fn->hasFnAttribute(Kind: "no-inline-line-tables");
1975
1976 // Helper-util for updating the metadata attached to an instruction.
1977 auto UpdateInst = [&](Instruction &I) {
1978 // Loop metadata needs to be updated so that the start and end locs
1979 // reference inlined-at locations.
1980 auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
1981 &IANodes](Metadata *MD) -> Metadata * {
1982 if (auto *Loc = dyn_cast_or_null<DILocation>(Val: MD))
1983 return inlineDebugLoc(OrigDL: Loc, InlinedAt: InlinedAtNode, Ctx, IANodes).get();
1984 return MD;
1985 };
1986 updateLoopMetadataDebugLocations(I, Updater: updateLoopInfoLoc);
1987
1988 if (!NoInlineLineTables)
1989 if (DebugLoc DL = I.getDebugLoc()) {
1990 DebugLoc IDL =
1991 inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode, Ctx&: I.getContext(), IANodes);
1992 I.setDebugLoc(IDL);
1993 return;
1994 }
1995
1996 if (CalleeHasDebugInfo && !NoInlineLineTables)
1997 return;
1998
1999 // If the inlined instruction has no line number, or if inline info
2000 // is not being generated, make it look as if it originates from the call
2001 // location. This is important for ((__always_inline, __nodebug__))
2002 // functions which must use caller location for all instructions in their
2003 // function body.
2004
2005 // Don't update static allocas, as they may get moved later.
2006 if (auto *AI = dyn_cast<AllocaInst>(Val: &I))
2007 if (allocaWouldBeStaticInEntry(AI))
2008 return;
2009
2010 // Do not force a debug loc for pseudo probes, since they do not need to
2011 // be debuggable, and also they are expected to have a zero/null dwarf
2012 // discriminator at this point which could be violated otherwise.
2013 if (isa<PseudoProbeInst>(Val: I))
2014 return;
2015
2016 I.setDebugLoc(TheCallDL);
2017 };
2018
2019 // Helper-util for updating debug-info records attached to instructions.
2020 auto UpdateDVR = [&](DbgRecord *DVR) {
2021 assert(DVR->getDebugLoc() && "Debug Value must have debug loc");
2022 if (NoInlineLineTables) {
2023 DVR->setDebugLoc(TheCallDL);
2024 return;
2025 }
2026 DebugLoc DL = DVR->getDebugLoc();
2027 DebugLoc IDL =
2028 inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode,
2029 Ctx&: DVR->getMarker()->getParent()->getContext(), IANodes);
2030 DVR->setDebugLoc(IDL);
2031 };
2032
2033 // Iterate over all instructions, updating metadata and debug-info records.
2034 for (; FI != Fn->end(); ++FI) {
2035 for (Instruction &I : *FI) {
2036 UpdateInst(I);
2037 for (DbgRecord &DVR : I.getDbgRecordRange()) {
2038 UpdateDVR(&DVR);
2039 }
2040 }
2041
2042 // Remove debug info records if we're not keeping inline info.
2043 if (NoInlineLineTables) {
2044 BasicBlock::iterator BI = FI->begin();
2045 while (BI != FI->end()) {
2046 BI->dropDbgRecords();
2047 ++BI;
2048 }
2049 }
2050 }
2051}
2052
2053#undef DEBUG_TYPE
2054#define DEBUG_TYPE "assignment-tracking"
2055/// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB.
2056static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL,
2057 const CallBase &CB) {
2058 at::StorageToVarsMap EscapedLocals;
2059 SmallPtrSet<const Value *, 4> SeenBases;
2060
2061 LLVM_DEBUG(
2062 errs() << "# Finding caller local variables escaped by callee\n");
2063 for (const Value *Arg : CB.args()) {
2064 LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n");
2065 if (!Arg->getType()->isPointerTy()) {
2066 LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n");
2067 continue;
2068 }
2069
2070 const Instruction *I = dyn_cast<Instruction>(Val: Arg);
2071 if (!I) {
2072 LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n");
2073 continue;
2074 }
2075
2076 // Walk back to the base storage.
2077 assert(Arg->getType()->isPtrOrPtrVectorTy());
2078 APInt TmpOffset(DL.getIndexTypeSizeInBits(Ty: Arg->getType()), 0, false);
2079 const AllocaInst *Base = dyn_cast<AllocaInst>(
2080 Val: Arg->stripAndAccumulateConstantOffsets(DL, Offset&: TmpOffset, AllowNonInbounds: true));
2081 if (!Base) {
2082 LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n");
2083 continue;
2084 }
2085
2086 assert(Base);
2087 LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n");
2088 // We only need to process each base address once - skip any duplicates.
2089 if (!SeenBases.insert(Ptr: Base).second)
2090 continue;
2091
2092 // Find all local variables associated with the backing storage.
2093 auto CollectAssignsForStorage = [&](DbgVariableRecord *DbgAssign) {
2094 // Skip variables from inlined functions - they are not local variables.
2095 if (DbgAssign->getDebugLoc().getInlinedAt())
2096 return;
2097 LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n");
2098 EscapedLocals[Base].insert(X: at::VarRecord(DbgAssign));
2099 };
2100 for_each(Range: at::getDVRAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage);
2101 }
2102 return EscapedLocals;
2103}
2104
2105static void trackInlinedStores(Function::iterator Start, Function::iterator End,
2106 const CallBase &CB) {
2107 LLVM_DEBUG(errs() << "trackInlinedStores into "
2108 << Start->getParent()->getName() << " from "
2109 << CB.getCalledFunction()->getName() << "\n");
2110 const DataLayout &DL = CB.getDataLayout();
2111 at::trackAssignments(Start, End, Vars: collectEscapedLocals(DL, CB), DL);
2112}
2113
2114/// Update inlined instructions' DIAssignID metadata. We need to do this
2115/// otherwise a function inlined more than once into the same function
2116/// will cause DIAssignID to be shared by many instructions.
2117static void fixupAssignments(Function::iterator Start, Function::iterator End) {
2118 DenseMap<DIAssignID *, DIAssignID *> Map;
2119 // Loop over all the inlined instructions. If we find a DIAssignID
2120 // attachment or use, replace it with a new version.
2121 for (auto BBI = Start; BBI != End; ++BBI) {
2122 for (Instruction &I : *BBI)
2123 at::remapAssignID(Map, I);
2124 }
2125}
2126#undef DEBUG_TYPE
2127#define DEBUG_TYPE "inline-function"
2128
2129/// Update the block frequencies of the caller after a callee has been inlined.
2130///
2131/// Each block cloned into the caller has its block frequency scaled by the
2132/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
2133/// callee's entry block gets the same frequency as the callsite block and the
2134/// relative frequencies of all cloned blocks remain the same after cloning.
2135static void updateCallerBFI(BasicBlock *CallSiteBlock,
2136 const ValueToValueMapTy &VMap,
2137 BlockFrequencyInfo *CallerBFI,
2138 BlockFrequencyInfo *CalleeBFI,
2139 const BasicBlock &CalleeEntryBlock) {
2140 SmallPtrSet<BasicBlock *, 16> ClonedBBs;
2141 for (auto Entry : VMap) {
2142 if (!isa<BasicBlock>(Val: Entry.first) || !Entry.second)
2143 continue;
2144 auto *OrigBB = cast<BasicBlock>(Val: Entry.first);
2145 auto *ClonedBB = cast<BasicBlock>(Val: Entry.second);
2146 BlockFrequency Freq = CalleeBFI->getBlockFreq(BB: OrigBB);
2147 if (!ClonedBBs.insert(Ptr: ClonedBB).second) {
2148 // Multiple blocks in the callee might get mapped to one cloned block in
2149 // the caller since we prune the callee as we clone it. When that happens,
2150 // we want to use the maximum among the original blocks' frequencies.
2151 BlockFrequency NewFreq = CallerBFI->getBlockFreq(BB: ClonedBB);
2152 if (NewFreq > Freq)
2153 Freq = NewFreq;
2154 }
2155 CallerBFI->setBlockFreq(BB: ClonedBB, Freq);
2156 }
2157 BasicBlock *EntryClone = cast<BasicBlock>(Val: VMap.lookup(Val: &CalleeEntryBlock));
2158 CallerBFI->setBlockFreqAndScale(
2159 ReferenceBB: EntryClone, Freq: CallerBFI->getBlockFreq(BB: CallSiteBlock), BlocksToScale&: ClonedBBs);
2160}
2161
2162/// Update the branch metadata for cloned call instructions.
2163static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
2164 const uint64_t &CalleeEntryCount,
2165 const CallBase &TheCall, ProfileSummaryInfo *PSI,
2166 BlockFrequencyInfo *CallerBFI) {
2167 if (CalleeEntryCount < 1)
2168 return;
2169 auto CallSiteCount =
2170 PSI ? PSI->getProfileCount(CallInst: TheCall, BFI: CallerBFI) : std::nullopt;
2171 int64_t CallCount = std::min(a: CallSiteCount.value_or(u: 0), b: CalleeEntryCount);
2172 updateProfileCallee(Callee, EntryDelta: -CallCount, VMap: &VMap);
2173}
2174
2175void llvm::updateProfileCallee(
2176 Function *Callee, int64_t EntryDelta,
2177 const ValueMap<const Value *, WeakTrackingVH> *VMap) {
2178 auto CalleeCount = Callee->getEntryCount();
2179 if (!CalleeCount)
2180 return;
2181
2182 // Since CallSiteCount is an estimate, it could exceed the original callee
2183 // count and has to be set to 0 so guard against underflow.
2184 const uint64_t NewEntryCount =
2185 (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > *CalleeCount)
2186 ? 0
2187 : *CalleeCount + EntryDelta;
2188
2189 auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount,
2190 const uint64_t PriorEntryCount) {
2191 Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB);
2192 if (VPtr)
2193 scaleProfData(I&: *VPtr, S: NewEntryCount, T: PriorEntryCount);
2194 };
2195
2196 // During inlining ?
2197 if (VMap) {
2198 uint64_t CloneEntryCount = *CalleeCount - NewEntryCount;
2199 for (auto Entry : *VMap) {
2200 if (isa<CallInst>(Val: Entry.first))
2201 if (auto *CI = dyn_cast_or_null<CallInst>(Val: Entry.second)) {
2202 CI->updateProfWeight(S: CloneEntryCount, T: *CalleeCount);
2203 updateVTableProfWeight(CI, CloneEntryCount, *CalleeCount);
2204 }
2205
2206 if (isa<InvokeInst>(Val: Entry.first))
2207 if (auto *II = dyn_cast_or_null<InvokeInst>(Val: Entry.second)) {
2208 II->updateProfWeight(S: CloneEntryCount, T: *CalleeCount);
2209 updateVTableProfWeight(II, CloneEntryCount, *CalleeCount);
2210 }
2211 }
2212 }
2213
2214 if (EntryDelta) {
2215 Callee->setEntryCount(Count: NewEntryCount);
2216
2217 for (BasicBlock &BB : *Callee)
2218 // No need to update the callsite if it is pruned during inlining.
2219 if (!VMap || VMap->count(Val: &BB))
2220 for (Instruction &I : BB) {
2221 if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
2222 CI->updateProfWeight(S: NewEntryCount, T: *CalleeCount);
2223 updateVTableProfWeight(CI, NewEntryCount, *CalleeCount);
2224 }
2225 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &I)) {
2226 II->updateProfWeight(S: NewEntryCount, T: *CalleeCount);
2227 updateVTableProfWeight(II, NewEntryCount, *CalleeCount);
2228 }
2229 }
2230 }
2231}
2232
2233/// An operand bundle "clang.arc.attachedcall" on a call indicates the call
2234/// result is implicitly consumed by a call to retainRV or claimRV immediately
2235/// after the call. This function inlines the retainRV/claimRV calls.
2236///
2237/// There are three cases to consider:
2238///
2239/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned
2240/// object in the callee return block, the autoreleaseRV call and the
2241/// retainRV/claimRV call in the caller cancel out. If the call in the caller
2242/// is a claimRV call, a call to objc_release is emitted.
2243///
2244/// 2. If there is a call in the callee return block that doesn't have operand
2245/// bundle "clang.arc.attachedcall", the operand bundle on the original call
2246/// is transferred to the call in the callee.
2247///
2248/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is
2249/// a retainRV call.
2250static void
2251inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
2252 const SmallVectorImpl<ReturnInst *> &Returns) {
2253 assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
2254 bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
2255 IsUnsafeClaimRV = !IsRetainRV;
2256
2257 for (auto *RI : Returns) {
2258 Value *RetOpnd = objcarc::GetRCIdentityRoot(V: RI->getOperand(i_nocapture: 0));
2259 bool InsertRetainCall = IsRetainRV;
2260 IRBuilder<> Builder(RI->getContext());
2261
2262 // Walk backwards through the basic block looking for either a matching
2263 // autoreleaseRV call or an unannotated call.
2264 auto InstRange = llvm::make_range(x: ++(RI->getIterator().getReverse()),
2265 y: RI->getParent()->rend());
2266 for (Instruction &I : llvm::make_early_inc_range(Range&: InstRange)) {
2267 // Ignore casts.
2268 if (isa<CastInst>(Val: I))
2269 continue;
2270
2271 if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) {
2272 if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue ||
2273 !II->use_empty() ||
2274 objcarc::GetRCIdentityRoot(V: II->getOperand(i_nocapture: 0)) != RetOpnd)
2275 break;
2276
2277 // If we've found a matching authoreleaseRV call:
2278 // - If claimRV is attached to the call, insert a call to objc_release
2279 // and erase the autoreleaseRV call.
2280 // - If retainRV is attached to the call, just erase the autoreleaseRV
2281 // call.
2282 if (IsUnsafeClaimRV) {
2283 Builder.SetInsertPoint(II);
2284 Builder.CreateIntrinsic(ID: Intrinsic::objc_release, Args: RetOpnd);
2285 }
2286 II->eraseFromParent();
2287 InsertRetainCall = false;
2288 break;
2289 }
2290
2291 auto *CI = dyn_cast<CallInst>(Val: &I);
2292
2293 if (!CI)
2294 break;
2295
2296 if (objcarc::GetRCIdentityRoot(V: CI) != RetOpnd ||
2297 objcarc::hasAttachedCallOpBundle(CB: CI))
2298 break;
2299
2300 // If we've found an unannotated call that defines RetOpnd, add a
2301 // "clang.arc.attachedcall" operand bundle.
2302 Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(CB: &CB)};
2303 OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
2304 auto *NewCall = CallBase::addOperandBundle(
2305 CB: CI, ID: LLVMContext::OB_clang_arc_attachedcall, OB, InsertPt: CI->getIterator());
2306 NewCall->copyMetadata(SrcInst: *CI);
2307 CI->replaceAllUsesWith(V: NewCall);
2308 CI->eraseFromParent();
2309 InsertRetainCall = false;
2310 break;
2311 }
2312
2313 if (InsertRetainCall) {
2314 // The retainRV is attached to the call and we've failed to find a
2315 // matching autoreleaseRV or an annotated call in the callee. Emit a call
2316 // to objc_retain.
2317 Builder.SetInsertPoint(RI);
2318 Builder.CreateIntrinsic(ID: Intrinsic::objc_retain, Args: RetOpnd);
2319 }
2320 }
2321}
2322
2323// In contextual profiling, when an inline succeeds, we want to remap the
2324// indices of the callee into the index space of the caller. We can't just leave
2325// them as-is because the same callee may appear in other places in this caller
2326// (other callsites), and its (callee's) counters and sub-contextual profile
2327// tree would be potentially different.
2328// Not all BBs of the callee may survive the opportunistic DCE InlineFunction
2329// does (same goes for callsites in the callee).
2330// We will return a pair of vectors, one for basic block IDs and one for
2331// callsites. For such a vector V, V[Idx] will be -1 if the callee
2332// instrumentation with index Idx did not survive inlining, and a new value
2333// otherwise.
2334// This function will update the caller's instrumentation intrinsics
2335// accordingly, mapping indices as described above. We also replace the "name"
2336// operand because we use it to distinguish between "own" instrumentation and
2337// "from callee" instrumentation when performing the traversal of the CFG of the
2338// caller. We traverse depth-first from the callsite's BB and up to the point we
2339// hit BBs owned by the caller.
2340// The return values will be then used to update the contextual
2341// profile. Note: we only update the "name" and "index" operands in the
2342// instrumentation intrinsics, we leave the hash and total nr of indices as-is,
2343// it's not worth updating those.
2344static std::pair<std::vector<int64_t>, std::vector<int64_t>>
2345remapIndices(Function &Caller, BasicBlock *StartBB,
2346 PGOContextualProfile &CtxProf, uint32_t CalleeCounters,
2347 uint32_t CalleeCallsites) {
2348 // We'll allocate a new ID to imported callsite counters and callsites. We're
2349 // using -1 to indicate a counter we delete. Most likely the entry ID, for
2350 // example, will be deleted - we don't want 2 IDs in the same BB, and the
2351 // entry would have been cloned in the callsite's old BB.
2352 std::vector<int64_t> CalleeCounterMap;
2353 std::vector<int64_t> CalleeCallsiteMap;
2354 CalleeCounterMap.resize(new_size: CalleeCounters, x: -1);
2355 CalleeCallsiteMap.resize(new_size: CalleeCallsites, x: -1);
2356
2357 auto RewriteInstrIfNeeded = [&](InstrProfIncrementInst &Ins) -> bool {
2358 if (Ins.getNameValue() == &Caller)
2359 return false;
2360 const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2361 if (CalleeCounterMap[OldID] == -1)
2362 CalleeCounterMap[OldID] = CtxProf.allocateNextCounterIndex(F: Caller);
2363 const auto NewID = static_cast<uint32_t>(CalleeCounterMap[OldID]);
2364
2365 Ins.setNameValue(&Caller);
2366 Ins.setIndex(NewID);
2367 return true;
2368 };
2369
2370 auto RewriteCallsiteInsIfNeeded = [&](InstrProfCallsite &Ins) -> bool {
2371 if (Ins.getNameValue() == &Caller)
2372 return false;
2373 const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2374 if (CalleeCallsiteMap[OldID] == -1)
2375 CalleeCallsiteMap[OldID] = CtxProf.allocateNextCallsiteIndex(F: Caller);
2376 const auto NewID = static_cast<uint32_t>(CalleeCallsiteMap[OldID]);
2377
2378 Ins.setNameValue(&Caller);
2379 Ins.setIndex(NewID);
2380 return true;
2381 };
2382
2383 std::deque<BasicBlock *> Worklist;
2384 DenseSet<const BasicBlock *> Seen;
2385 // We will traverse the BBs starting from the callsite BB. The callsite BB
2386 // will have at least a BB ID - maybe its own, and in any case the one coming
2387 // from the cloned function's entry BB. The other BBs we'll start seeing from
2388 // there on may or may not have BB IDs. BBs with IDs belonging to our caller
2389 // are definitely not coming from the imported function and form a boundary
2390 // past which we don't need to traverse anymore. BBs may have no
2391 // instrumentation (because we originally inserted instrumentation as per
2392 // MST), in which case we'll traverse past them. An invariant we'll keep is
2393 // that a BB will have at most 1 BB ID. For example, in the callsite BB, we
2394 // will delete the callee BB's instrumentation. This doesn't result in
2395 // information loss: the entry BB of the callee will have the same count as
2396 // the callsite's BB. At the end of this traversal, all the callee's
2397 // instrumentation would be mapped into the caller's instrumentation index
2398 // space. Some of the callee's counters may be deleted (as mentioned, this
2399 // should result in no loss of information).
2400 Worklist.push_back(x: StartBB);
2401 while (!Worklist.empty()) {
2402 auto *BB = Worklist.front();
2403 Worklist.pop_front();
2404 bool Changed = false;
2405 auto *BBID = CtxProfAnalysis::getBBInstrumentation(BB&: *BB);
2406 if (BBID) {
2407 Changed |= RewriteInstrIfNeeded(*BBID);
2408 // this may be the entryblock from the inlined callee, coming into a BB
2409 // that didn't have instrumentation because of MST decisions. Let's make
2410 // sure it's placed accordingly. This is a noop elsewhere.
2411 BBID->moveBefore(InsertPos: BB->getFirstInsertionPt());
2412 }
2413 for (auto &I : llvm::make_early_inc_range(Range&: *BB)) {
2414 if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Val: &I)) {
2415 if (isa<InstrProfIncrementInstStep>(Val: Inc)) {
2416 // Step instrumentation is used for select instructions. Inlining may
2417 // have propagated a constant resulting in the condition of the select
2418 // being resolved, case in which function cloning resolves the value
2419 // of the select, and elides the select instruction. If that is the
2420 // case, the step parameter of the instrumentation will reflect that.
2421 // We can delete the instrumentation in that case.
2422 if (isa<Constant>(Val: Inc->getStep())) {
2423 assert(!Inc->getNextNode() || !isa<SelectInst>(Inc->getNextNode()));
2424 Inc->eraseFromParent();
2425 } else {
2426 assert(isa_and_nonnull<SelectInst>(Inc->getNextNode()));
2427 RewriteInstrIfNeeded(*Inc);
2428 }
2429 } else if (Inc != BBID) {
2430 // If we're here it means that the BB had more than 1 IDs, presumably
2431 // some coming from the callee. We "made up our mind" to keep the
2432 // first one (which may or may not have been originally the caller's).
2433 // All the others are superfluous and we delete them.
2434 Inc->eraseFromParent();
2435 Changed = true;
2436 }
2437 } else if (auto *CS = dyn_cast<InstrProfCallsite>(Val: &I)) {
2438 Changed |= RewriteCallsiteInsIfNeeded(*CS);
2439 }
2440 }
2441 if (!BBID || Changed)
2442 for (auto *Succ : successors(BB))
2443 if (Seen.insert(V: Succ).second)
2444 Worklist.push_back(x: Succ);
2445 }
2446
2447 assert(!llvm::is_contained(CalleeCounterMap, 0) &&
2448 "Counter index mapping should be either to -1 or to non-zero index, "
2449 "because the 0 "
2450 "index corresponds to the entry BB of the caller");
2451 assert(!llvm::is_contained(CalleeCallsiteMap, 0) &&
2452 "Callsite index mapping should be either to -1 or to non-zero index, "
2453 "because there should have been at least a callsite - the inlined one "
2454 "- which would have had a 0 index.");
2455
2456 return {std::move(CalleeCounterMap), std::move(CalleeCallsiteMap)};
2457}
2458
2459// Inline. If successful, update the contextual profile (if a valid one is
2460// given).
2461// The contextual profile data is organized in trees, as follows:
2462// - each node corresponds to a function
2463// - the root of each tree corresponds to an "entrypoint" - e.g.
2464// RPC handler for server side
2465// - the path from the root to a node is a particular call path
2466// - the counters stored in a node are counter values observed in that
2467// particular call path ("context")
2468// - the edges between nodes are annotated with callsite IDs.
2469//
2470// Updating the contextual profile after an inlining means, at a high level,
2471// copying over the data of the callee, **intentionally without any value
2472// scaling**, and copying over the callees of the inlined callee.
2473llvm::InlineResult
2474llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
2475 PGOContextualProfile &CtxProf, bool MergeAttributes,
2476 AAResults *CalleeAAR, bool InsertLifetime,
2477 bool TrackInlineHistory, Function *ForwardVarArgsTo,
2478 OptimizationRemarkEmitter *ORE) {
2479 if (!CtxProf.isInSpecializedModule())
2480 return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
2481 TrackInlineHistory, ForwardVarArgsTo, ORE);
2482
2483 auto &Caller = *CB.getCaller();
2484 auto &Callee = *CB.getCalledFunction();
2485 auto *StartBB = CB.getParent();
2486
2487 // Get some preliminary data about the callsite before it might get inlined.
2488 // Inlining shouldn't delete the callee, but it's cleaner (and low-cost) to
2489 // get this data upfront and rely less on InlineFunction's behavior.
2490 const auto CalleeGUID = AssignGUIDPass::getGUID(F: Callee);
2491 auto *CallsiteIDIns = CtxProfAnalysis::getCallsiteInstrumentation(CB);
2492 const auto CallsiteID =
2493 static_cast<uint32_t>(CallsiteIDIns->getIndex()->getZExtValue());
2494
2495 const auto NumCalleeCounters = CtxProf.getNumCounters(F: Callee);
2496 const auto NumCalleeCallsites = CtxProf.getNumCallsites(F: Callee);
2497
2498 auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
2499 TrackInlineHistory, ForwardVarArgsTo, ORE);
2500 if (!Ret.isSuccess())
2501 return Ret;
2502
2503 // Inlining succeeded, we don't need the instrumentation of the inlined
2504 // callsite.
2505 CallsiteIDIns->eraseFromParent();
2506
2507 // Assinging Maps and then capturing references into it in the lambda because
2508 // captured structured bindings are a C++20 extension. We do also need a
2509 // capture here, though.
2510 const auto IndicesMaps = remapIndices(Caller, StartBB, CtxProf,
2511 CalleeCounters: NumCalleeCounters, CalleeCallsites: NumCalleeCallsites);
2512 const uint32_t NewCountersSize = CtxProf.getNumCounters(F: Caller);
2513
2514 auto Updater = [&](PGOCtxProfContext &Ctx) {
2515 assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller));
2516 const auto &[CalleeCounterMap, CalleeCallsiteMap] = IndicesMaps;
2517 assert(
2518 (Ctx.counters().size() +
2519 llvm::count_if(CalleeCounterMap, [](auto V) { return V != -1; }) ==
2520 NewCountersSize) &&
2521 "The caller's counters size should have grown by the number of new "
2522 "distinct counters inherited from the inlined callee.");
2523 Ctx.resizeCounters(Size: NewCountersSize);
2524 // If the callsite wasn't exercised in this context, the value of the
2525 // counters coming from it is 0 - which it is right now, after resizing them
2526 // - and so we're done.
2527 auto CSIt = Ctx.callsites().find(x: CallsiteID);
2528 if (CSIt == Ctx.callsites().end())
2529 return;
2530 auto CalleeCtxIt = CSIt->second.find(x: CalleeGUID);
2531 // The callsite was exercised, but not with this callee (so presumably this
2532 // is an indirect callsite). Again, we're done here.
2533 if (CalleeCtxIt == CSIt->second.end())
2534 return;
2535
2536 // Let's pull in the counter values and the subcontexts coming from the
2537 // inlined callee.
2538 auto &CalleeCtx = CalleeCtxIt->second;
2539 assert(CalleeCtx.guid() == CalleeGUID);
2540
2541 for (auto I = 0U; I < CalleeCtx.counters().size(); ++I) {
2542 const int64_t NewIndex = CalleeCounterMap[I];
2543 if (NewIndex >= 0) {
2544 assert(NewIndex != 0 && "counter index mapping shouldn't happen to a 0 "
2545 "index, that's the caller's entry BB");
2546 Ctx.counters()[NewIndex] = CalleeCtx.counters()[I];
2547 }
2548 }
2549 for (auto &[I, OtherSet] : CalleeCtx.callsites()) {
2550 const int64_t NewCSIdx = CalleeCallsiteMap[I];
2551 if (NewCSIdx >= 0) {
2552 assert(NewCSIdx != 0 &&
2553 "callsite index mapping shouldn't happen to a 0 index, the "
2554 "caller must've had at least one callsite (with such an index)");
2555 Ctx.ingestAllContexts(CSId: NewCSIdx, Other: std::move(OtherSet));
2556 }
2557 }
2558 // We know the traversal is preorder, so it wouldn't have yet looked at the
2559 // sub-contexts of this context that it's currently visiting. Meaning, the
2560 // erase below invalidates no iterators.
2561 auto Deleted = Ctx.callsites().erase(x: CallsiteID);
2562 assert(Deleted);
2563 (void)Deleted;
2564 };
2565 CtxProf.update(Updater, F: Caller);
2566 return Ret;
2567}
2568
2569llvm::InlineResult llvm::CanInlineCallSite(const CallBase &CB,
2570 InlineFunctionInfo &IFI) {
2571 assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
2572
2573 // FIXME: we don't inline callbr yet.
2574 if (isa<CallBrInst>(Val: CB))
2575 return InlineResult::failure(Reason: "We don't inline callbr yet.");
2576
2577 // If IFI has any state in it, zap it before we fill it in.
2578 IFI.reset();
2579
2580 Function *CalledFunc = CB.getCalledFunction();
2581 if (!CalledFunc || // Can't inline external function or indirect
2582 CalledFunc->isDeclaration()) // call!
2583 return InlineResult::failure(Reason: "external or indirect");
2584
2585 // Don't inline if we've already inlined this callee through this call site
2586 // before to prevent infinite inlining through mutually recursive functions.
2587 if (MDNode *InlineHistory = CB.getMetadata(KindID: LLVMContext::MD_inline_history)) {
2588 for (const auto &Op : InlineHistory->operands()) {
2589 if (auto *MD = dyn_cast_or_null<ValueAsMetadata>(Val: Op)) {
2590 if (MD->getValue() == CalledFunc) {
2591 return InlineResult::failure(Reason: "inline history");
2592 }
2593 }
2594 }
2595 }
2596
2597 // The inliner does not know how to inline through calls with operand bundles
2598 // in general ...
2599 if (CB.hasOperandBundles()) {
2600 for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
2601 auto OBUse = CB.getOperandBundleAt(Index: i);
2602 uint32_t Tag = OBUse.getTagID();
2603 // ... but it knows how to inline through "deopt" operand bundles ...
2604 if (Tag == LLVMContext::OB_deopt)
2605 continue;
2606 // ... and "funclet" operand bundles.
2607 if (Tag == LLVMContext::OB_funclet)
2608 continue;
2609 if (Tag == LLVMContext::OB_clang_arc_attachedcall)
2610 continue;
2611 if (Tag == LLVMContext::OB_kcfi)
2612 continue;
2613 if (Tag == LLVMContext::OB_convergencectrl) {
2614 IFI.ConvergenceControlToken = OBUse.Inputs[0].get();
2615 continue;
2616 }
2617
2618 return InlineResult::failure(Reason: "unsupported operand bundle");
2619 }
2620 }
2621
2622 // FIXME: The check below is redundant and incomplete. According to spec, if a
2623 // convergent call is missing a token, then the caller is using uncontrolled
2624 // convergence. If the callee has an entry intrinsic, then the callee is using
2625 // controlled convergence, and the call cannot be inlined. A proper
2626 // implemenation of this check requires a whole new analysis that identifies
2627 // convergence in every function. For now, we skip that and just do this one
2628 // cursory check. The underlying assumption is that in a compiler flow that
2629 // fully implements convergence control tokens, there is no mixing of
2630 // controlled and uncontrolled convergent operations in the whole program.
2631 if (CB.isConvergent()) {
2632 if (!IFI.ConvergenceControlToken &&
2633 getConvergenceEntry(BB&: CalledFunc->getEntryBlock())) {
2634 return InlineResult::failure(
2635 Reason: "convergent call needs convergencectrl operand");
2636 }
2637 }
2638
2639 const BasicBlock *OrigBB = CB.getParent();
2640 const Function *Caller = OrigBB->getParent();
2641
2642 // GC poses two hazards to inlining, which only occur when the callee has GC:
2643 // 1. If the caller has no GC, then the callee's GC must be propagated to the
2644 // caller.
2645 // 2. If the caller has a differing GC, it is invalid to inline.
2646 if (CalledFunc->hasGC()) {
2647 if (Caller->hasGC() && CalledFunc->getGC() != Caller->getGC())
2648 return InlineResult::failure(Reason: "incompatible GC");
2649 }
2650
2651 // Get the personality function from the callee if it contains a landing pad.
2652 Constant *CalledPersonality =
2653 CalledFunc->hasPersonalityFn()
2654 ? CalledFunc->getPersonalityFn()->stripPointerCasts()
2655 : nullptr;
2656
2657 // Find the personality function used by the landing pads of the caller. If it
2658 // exists, then check to see that it matches the personality function used in
2659 // the callee.
2660 Constant *CallerPersonality =
2661 Caller->hasPersonalityFn()
2662 ? Caller->getPersonalityFn()->stripPointerCasts()
2663 : nullptr;
2664 if (CalledPersonality) {
2665 // If the personality functions match, then we can perform the
2666 // inlining. Otherwise, we can't inline.
2667 // TODO: This isn't 100% true. Some personality functions are proper
2668 // supersets of others and can be used in place of the other.
2669 if (CallerPersonality && CalledPersonality != CallerPersonality)
2670 return InlineResult::failure(Reason: "incompatible personality");
2671 }
2672
2673 // We need to figure out which funclet the callsite was in so that we may
2674 // properly nest the callee.
2675 if (CallerPersonality) {
2676 EHPersonality Personality = classifyEHPersonality(Pers: CallerPersonality);
2677 if (isScopedEHPersonality(Pers: Personality)) {
2678 std::optional<OperandBundleUse> ParentFunclet =
2679 CB.getOperandBundle(ID: LLVMContext::OB_funclet);
2680 if (ParentFunclet)
2681 IFI.CallSiteEHPad = cast<FuncletPadInst>(Val: ParentFunclet->Inputs.front());
2682
2683 // OK, the inlining site is legal. What about the target function?
2684
2685 if (IFI.CallSiteEHPad) {
2686 if (Personality == EHPersonality::MSVC_CXX) {
2687 // The MSVC personality cannot tolerate catches getting inlined into
2688 // cleanup funclets.
2689 if (isa<CleanupPadInst>(Val: IFI.CallSiteEHPad)) {
2690 // Ok, the call site is within a cleanuppad. Let's check the callee
2691 // for catchpads.
2692 for (const BasicBlock &CalledBB : *CalledFunc) {
2693 if (isa<CatchSwitchInst>(Val: CalledBB.getFirstNonPHIIt()))
2694 return InlineResult::failure(Reason: "catch in cleanup funclet");
2695 }
2696 }
2697 } else if (isAsynchronousEHPersonality(Pers: Personality)) {
2698 // SEH is even less tolerant, there may not be any sort of exceptional
2699 // funclet in the callee.
2700 for (const BasicBlock &CalledBB : *CalledFunc) {
2701 if (CalledBB.isEHPad())
2702 return InlineResult::failure(Reason: "SEH in cleanup funclet");
2703 }
2704 }
2705 }
2706 }
2707 }
2708
2709 return InlineResult::success();
2710}
2711
2712/// This function inlines the called function into the basic block of the
2713/// caller. This returns false if it is not possible to inline this call.
2714/// The program is still in a well defined state if this occurs though.
2715///
2716/// Note that this only does one level of inlining. For example, if the
2717/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
2718/// exists in the instruction stream. Similarly this will inline a recursive
2719/// function by one level.
2720void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI,
2721 bool MergeAttributes, AAResults *CalleeAAR,
2722 bool InsertLifetime, bool TrackInlineHistory,
2723 Function *ForwardVarArgsTo,
2724 OptimizationRemarkEmitter *ORE) {
2725 BasicBlock *OrigBB = CB.getParent();
2726 Function *Caller = OrigBB->getParent();
2727 Function *CalledFunc = CB.getCalledFunction();
2728 assert(CalledFunc && !CalledFunc->isDeclaration() &&
2729 "CanInlineCallSite should have verified direct call to definition");
2730
2731 // Determine if we are dealing with a call in an EHPad which does not unwind
2732 // to caller.
2733 bool EHPadForCallUnwindsLocally = false;
2734 if (IFI.CallSiteEHPad && isa<CallInst>(Val: CB)) {
2735 UnwindDestMemoTy FuncletUnwindMap;
2736 Value *CallSiteUnwindDestToken =
2737 getUnwindDestToken(EHPad: IFI.CallSiteEHPad, MemoMap&: FuncletUnwindMap);
2738
2739 EHPadForCallUnwindsLocally =
2740 CallSiteUnwindDestToken &&
2741 !isa<ConstantTokenNone>(Val: CallSiteUnwindDestToken);
2742 }
2743
2744 // Get an iterator to the last basic block in the function, which will have
2745 // the new function inlined after it.
2746 Function::iterator LastBlock = --Caller->end();
2747
2748 // Make sure to capture all of the return instructions from the cloned
2749 // function.
2750 SmallVector<ReturnInst*, 8> Returns;
2751 ClonedCodeInfo InlinedFunctionInfo;
2752 Function::iterator FirstNewBlock;
2753
2754 // GC poses two hazards to inlining, which only occur when the callee has GC:
2755 // 1. If the caller has no GC, then the callee's GC must be propagated to the
2756 // caller.
2757 // 2. If the caller has a differing GC, it is invalid to inline.
2758 if (CalledFunc->hasGC()) {
2759 if (!Caller->hasGC())
2760 Caller->setGC(CalledFunc->getGC());
2761 else {
2762 assert(CalledFunc->getGC() == Caller->getGC() &&
2763 "CanInlineCallSite should have verified compatible GCs");
2764 }
2765 }
2766
2767 if (CalledFunc->hasPersonalityFn()) {
2768 Constant *CalledPersonality =
2769 CalledFunc->getPersonalityFn()->stripPointerCasts();
2770 if (!Caller->hasPersonalityFn()) {
2771 Caller->setPersonalityFn(CalledPersonality);
2772 } else
2773 assert(Caller->getPersonalityFn()->stripPointerCasts() ==
2774 CalledPersonality &&
2775 "CanInlineCallSite should have verified compatible personality");
2776 }
2777
2778 { // Scope to destroy VMap after cloning.
2779 ValueToValueMapTy VMap;
2780 struct ByValInit {
2781 Value *Dst;
2782 Value *Src;
2783 MaybeAlign SrcAlign;
2784 Type *Ty;
2785 };
2786 // Keep a list of tuples (dst, src, src_align) to emit byval
2787 // initializations. Src Alignment is only available though the callbase,
2788 // therefore has to be saved.
2789 SmallVector<ByValInit, 4> ByValInits;
2790
2791 // When inlining a function that contains noalias scope metadata,
2792 // this metadata needs to be cloned so that the inlined blocks
2793 // have different "unique scopes" at every call site.
2794 // Track the metadata that must be cloned. Do this before other changes to
2795 // the function, so that we do not get in trouble when inlining caller ==
2796 // callee.
2797 ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());
2798
2799 auto &DL = Caller->getDataLayout();
2800
2801 // Calculate the vector of arguments to pass into the function cloner, which
2802 // matches up the formal to the actual argument values.
2803 auto AI = CB.arg_begin();
2804 unsigned ArgNo = 0;
2805 for (Function::arg_iterator I = CalledFunc->arg_begin(),
2806 E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
2807 Value *ActualArg = *AI;
2808
2809 // When byval arguments actually inlined, we need to make the copy implied
2810 // by them explicit. However, we don't do this if the callee is readonly
2811 // or readnone, because the copy would be unneeded: the callee doesn't
2812 // modify the struct.
2813 if (CB.isByValArgument(ArgNo)) {
2814 ActualArg = HandleByValArgument(ByValType: CB.getParamByValType(ArgNo), Arg: ActualArg,
2815 TheCall: &CB, CalledFunc, IFI,
2816 ByValAlignment: CalledFunc->getParamAlign(ArgNo));
2817 if (ActualArg != *AI)
2818 ByValInits.push_back(Elt: {.Dst: ActualArg, .Src: (Value *)*AI,
2819 .SrcAlign: CB.getParamAlign(ArgNo),
2820 .Ty: CB.getParamByValType(ArgNo)});
2821 }
2822
2823 VMap[&*I] = ActualArg;
2824 }
2825
2826 // TODO: Remove this when users have been updated to the assume bundles.
2827 // Add alignment assumptions if necessary. We do this before the inlined
2828 // instructions are actually cloned into the caller so that we can easily
2829 // check what will be known at the start of the inlined code.
2830 AddAlignmentAssumptions(CB, IFI);
2831
2832 AssumptionCache *AC =
2833 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
2834
2835 /// Preserve all attributes on of the call and its parameters.
2836 salvageKnowledge(I: &CB, AC);
2837
2838 // We want the inliner to prune the code as it copies. We would LOVE to
2839 // have no dead or constant instructions leftover after inlining occurs
2840 // (which can happen, e.g., because an argument was constant), but we'll be
2841 // happy with whatever the cloner can do.
2842 CloneAndPruneFunctionInto(NewFunc: Caller, OldFunc: CalledFunc, VMap,
2843 /*ModuleLevelChanges=*/false, Returns, NameSuffix: ".i",
2844 CodeInfo&: InlinedFunctionInfo);
2845 // Remember the first block that is newly cloned over.
2846 FirstNewBlock = LastBlock; ++FirstNewBlock;
2847
2848 // Insert retainRV/clainRV runtime calls.
2849 objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(CB: &CB);
2850 if (RVCallKind != objcarc::ARCInstKind::None)
2851 inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns);
2852
2853 // Updated caller/callee profiles only when requested. For sample loader
2854 // inlining, the context-sensitive inlinee profile doesn't need to be
2855 // subtracted from callee profile, and the inlined clone also doesn't need
2856 // to be scaled based on call site count.
2857 if (IFI.UpdateProfile) {
2858 if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
2859 // Update the BFI of blocks cloned into the caller.
2860 updateCallerBFI(CallSiteBlock: OrigBB, VMap, CallerBFI: IFI.CallerBFI, CalleeBFI: IFI.CalleeBFI,
2861 CalleeEntryBlock: CalledFunc->front());
2862
2863 if (auto Profile = CalledFunc->getEntryCount())
2864 updateCallProfile(Callee: CalledFunc, VMap, CalleeEntryCount: *Profile, TheCall: CB, PSI: IFI.PSI,
2865 CallerBFI: IFI.CallerBFI);
2866 }
2867
2868 // Inject byval arguments initialization.
2869 for (ByValInit &Init : ByValInits)
2870 HandleByValArgumentInit(ByValType: Init.Ty, Dst: Init.Dst, Src: Init.Src, SrcAlign: Init.SrcAlign,
2871 M: Caller->getParent(), InsertBlock: &*FirstNewBlock, IFI,
2872 CalledFunc);
2873
2874 std::optional<OperandBundleUse> ParentDeopt =
2875 CB.getOperandBundle(ID: LLVMContext::OB_deopt);
2876 if (ParentDeopt) {
2877 SmallVector<OperandBundleDef, 2> OpDefs;
2878
2879 for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
2880 CallBase *ICS = dyn_cast_or_null<CallBase>(Val&: VH);
2881 if (!ICS)
2882 continue; // instruction was DCE'd or RAUW'ed to undef
2883
2884 OpDefs.clear();
2885
2886 OpDefs.reserve(N: ICS->getNumOperandBundles());
2887
2888 for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe;
2889 ++COBi) {
2890 auto ChildOB = ICS->getOperandBundleAt(Index: COBi);
2891 if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
2892 // If the inlined call has other operand bundles, let them be
2893 OpDefs.emplace_back(Args&: ChildOB);
2894 continue;
2895 }
2896
2897 // It may be useful to separate this logic (of handling operand
2898 // bundles) out to a separate "policy" component if this gets crowded.
2899 // Prepend the parent's deoptimization continuation to the newly
2900 // inlined call's deoptimization continuation.
2901 std::vector<Value *> MergedDeoptArgs;
2902 MergedDeoptArgs.reserve(n: ParentDeopt->Inputs.size() +
2903 ChildOB.Inputs.size());
2904
2905 llvm::append_range(C&: MergedDeoptArgs, R&: ParentDeopt->Inputs);
2906 llvm::append_range(C&: MergedDeoptArgs, R&: ChildOB.Inputs);
2907
2908 OpDefs.emplace_back(Args: "deopt", Args: std::move(MergedDeoptArgs));
2909 }
2910
2911 Instruction *NewI = CallBase::Create(CB: ICS, Bundles: OpDefs, InsertPt: ICS->getIterator());
2912
2913 // Note: the RAUW does the appropriate fixup in VMap, so we need to do
2914 // this even if the call returns void.
2915 ICS->replaceAllUsesWith(V: NewI);
2916
2917 VH = nullptr;
2918 ICS->eraseFromParent();
2919 }
2920 }
2921
2922 // For 'nodebug' functions, the associated DISubprogram is always null.
2923 // Conservatively avoid propagating the callsite debug location to
2924 // instructions inlined from a function whose DISubprogram is not null.
2925 fixupLineNumbers(Fn: Caller, FI: FirstNewBlock, TheCall: &CB,
2926 CalleeHasDebugInfo: CalledFunc->getSubprogram() != nullptr);
2927
2928 if (isAssignmentTrackingEnabled(M: *Caller->getParent())) {
2929 // Interpret inlined stores to caller-local variables as assignments.
2930 trackInlinedStores(Start: FirstNewBlock, End: Caller->end(), CB);
2931
2932 // Update DIAssignID metadata attachments and uses so that they are
2933 // unique to this inlined instance.
2934 fixupAssignments(Start: FirstNewBlock, End: Caller->end());
2935 }
2936
2937 // Now clone the inlined noalias scope metadata.
2938 SAMetadataCloner.clone();
2939 SAMetadataCloner.remap(FStart: FirstNewBlock, FEnd: Caller->end());
2940
2941 // Add noalias metadata if necessary.
2942 AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);
2943
2944 // Clone return attributes on the callsite into the calls within the inlined
2945 // function which feed into its return value.
2946 AddReturnAttributes(CB, VMap, InlinedFunctionInfo);
2947
2948 // Clone attributes on the params of the callsite to calls within the
2949 // inlined function which use the same param.
2950 AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo);
2951
2952 propagateMemProfMetadata(
2953 Callee: CalledFunc, CB, ContainsMemProfMetadata: InlinedFunctionInfo.ContainsMemProfMetadata, VMap, ORE);
2954
2955 // Propagate metadata on the callsite if necessary.
2956 PropagateCallSiteMetadata(CB, FStart: FirstNewBlock, FEnd: Caller->end());
2957
2958 // Propagate an allocation wrapper's !alloc_token if necessary.
2959 propagateAllocTokenMetadata(CalledFunc, CB, VMap, InlinedFunctionInfo);
2960
2961 // Propagate implicit ref metadata.
2962 if (CalledFunc->hasMetadata(KindID: LLVMContext::MD_implicit_ref)) {
2963 SmallVector<MDNode *> MDs;
2964 CalledFunc->getMetadata(KindID: LLVMContext::MD_implicit_ref, MDs);
2965 for (MDNode *MD : MDs) {
2966 Caller->addMetadata(KindID: LLVMContext::MD_implicit_ref, MD&: *MD);
2967 }
2968 }
2969
2970 // Propagate inlined.from metadata for dontcall diagnostics.
2971 PropagateInlinedFromMetadata(CB, CalledFuncName: CalledFunc->getName(), CallerFuncName: Caller->getName(),
2972 FStart: FirstNewBlock, FEnd: Caller->end());
2973
2974 // Register any cloned assumptions.
2975 if (IFI.GetAssumptionCache)
2976 for (BasicBlock &NewBlock :
2977 make_range(x: FirstNewBlock->getIterator(), y: Caller->end()))
2978 for (Instruction &I : NewBlock)
2979 if (auto *II = dyn_cast<AssumeInst>(Val: &I))
2980 IFI.GetAssumptionCache(*Caller).registerAssumption(CI: II);
2981 }
2982
2983 if (IFI.ConvergenceControlToken) {
2984 IntrinsicInst *IntrinsicCall = getConvergenceEntry(BB&: *FirstNewBlock);
2985 if (IntrinsicCall) {
2986 IntrinsicCall->replaceAllUsesWith(V: IFI.ConvergenceControlToken);
2987 IntrinsicCall->eraseFromParent();
2988 }
2989 }
2990
2991 // If there are any alloca instructions in the block that used to be the entry
2992 // block for the callee, move them to the entry block of the caller. First
2993 // calculate which instruction they should be inserted before. We insert the
2994 // instructions at the end of the current alloca list.
2995 {
2996 BasicBlock::iterator InsertPoint = Caller->begin()->begin();
2997 for (BasicBlock::iterator I = FirstNewBlock->begin(),
2998 E = FirstNewBlock->end(); I != E; ) {
2999 AllocaInst *AI = dyn_cast<AllocaInst>(Val: I++);
3000 if (!AI) continue;
3001
3002 // If the alloca is now dead, remove it. This often occurs due to code
3003 // specialization.
3004 if (AI->use_empty()) {
3005 AI->eraseFromParent();
3006 continue;
3007 }
3008
3009 if (!allocaWouldBeStaticInEntry(AI))
3010 continue;
3011
3012 // Keep track of the static allocas that we inline into the caller.
3013 IFI.StaticAllocas.push_back(Elt: AI);
3014
3015 // Scan for the block of allocas that we can move over, and move them
3016 // all at once.
3017 while (isa<AllocaInst>(Val: I) &&
3018 !cast<AllocaInst>(Val&: I)->use_empty() &&
3019 allocaWouldBeStaticInEntry(AI: cast<AllocaInst>(Val&: I))) {
3020 IFI.StaticAllocas.push_back(Elt: cast<AllocaInst>(Val&: I));
3021 ++I;
3022 }
3023
3024 // Transfer all of the allocas over in a block. Using splice means
3025 // that the instructions aren't removed from the symbol table, then
3026 // reinserted.
3027 I.setTailBit(true);
3028 Caller->getEntryBlock().splice(ToIt: InsertPoint, FromBB: &*FirstNewBlock,
3029 FromBeginIt: AI->getIterator(), FromEndIt: I);
3030 }
3031 }
3032
3033 // If the call to the callee cannot throw, set the 'nounwind' flag on any
3034 // calls that we inline.
3035 bool MarkNoUnwind = CB.doesNotThrow();
3036
3037 SmallVector<Value*,4> VarArgsToForward;
3038 SmallVector<AttributeSet, 4> VarArgsAttrs;
3039 for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
3040 i < CB.arg_size(); i++) {
3041 VarArgsToForward.push_back(Elt: CB.getArgOperand(i));
3042 VarArgsAttrs.push_back(Elt: CB.getAttributes().getParamAttrs(ArgNo: i));
3043 }
3044
3045 bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
3046 if (InlinedFunctionInfo.ContainsCalls) {
3047 CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
3048 if (CallInst *CI = dyn_cast<CallInst>(Val: &CB))
3049 CallSiteTailKind = CI->getTailCallKind();
3050
3051 // For inlining purposes, the "notail" marker is the same as no marker.
3052 if (CallSiteTailKind == CallInst::TCK_NoTail)
3053 CallSiteTailKind = CallInst::TCK_None;
3054
3055 for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
3056 ++BB) {
3057 for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
3058 CallInst *CI = dyn_cast<CallInst>(Val: &I);
3059 if (!CI)
3060 continue;
3061
3062 // Forward varargs from inlined call site to calls to the
3063 // ForwardVarArgsTo function, if requested, and to musttail calls.
3064 if (!VarArgsToForward.empty() &&
3065 ((ForwardVarArgsTo &&
3066 CI->getCalledFunction() == ForwardVarArgsTo) ||
3067 CI->isMustTailCall())) {
3068 // Collect attributes for non-vararg parameters.
3069 AttributeList Attrs = CI->getAttributes();
3070 SmallVector<AttributeSet, 8> ArgAttrs;
3071 if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
3072 for (unsigned ArgNo = 0;
3073 ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
3074 ArgAttrs.push_back(Elt: Attrs.getParamAttrs(ArgNo));
3075 }
3076
3077 // Add VarArg attributes.
3078 ArgAttrs.append(in_start: VarArgsAttrs.begin(), in_end: VarArgsAttrs.end());
3079 Attrs = AttributeList::get(C&: CI->getContext(), FnAttrs: Attrs.getFnAttrs(),
3080 RetAttrs: Attrs.getRetAttrs(), ArgAttrs);
3081 // Add VarArgs to existing parameters.
3082 SmallVector<Value *, 6> Params(CI->args());
3083 Params.append(in_start: VarArgsToForward.begin(), in_end: VarArgsToForward.end());
3084 CallInst *NewCI = CallInst::Create(
3085 Ty: CI->getFunctionType(), Func: CI->getCalledOperand(), Args: Params, NameStr: "", InsertBefore: CI->getIterator());
3086 NewCI->setDebugLoc(CI->getDebugLoc());
3087 NewCI->setAttributes(Attrs);
3088 NewCI->setCallingConv(CI->getCallingConv());
3089 CI->replaceAllUsesWith(V: NewCI);
3090 CI->eraseFromParent();
3091 CI = NewCI;
3092 }
3093
3094 if (Function *F = CI->getCalledFunction())
3095 InlinedDeoptimizeCalls |=
3096 F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
3097
3098 // We need to reduce the strength of any inlined tail calls. For
3099 // musttail, we have to avoid introducing potential unbounded stack
3100 // growth. For example, if functions 'f' and 'g' are mutually recursive
3101 // with musttail, we can inline 'g' into 'f' so long as we preserve
3102 // musttail on the cloned call to 'f'. If either the inlined call site
3103 // or the cloned call site is *not* musttail, the program already has
3104 // one frame of stack growth, so it's safe to remove musttail. Here is
3105 // a table of example transformations:
3106 //
3107 // f -> musttail g -> musttail f ==> f -> musttail f
3108 // f -> musttail g -> tail f ==> f -> tail f
3109 // f -> g -> musttail f ==> f -> f
3110 // f -> g -> tail f ==> f -> f
3111 //
3112 // Inlined notail calls should remain notail calls.
3113 CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
3114 if (ChildTCK != CallInst::TCK_NoTail)
3115 ChildTCK = std::min(a: CallSiteTailKind, b: ChildTCK);
3116 CI->setTailCallKind(ChildTCK);
3117 InlinedMustTailCalls |= CI->isMustTailCall();
3118
3119 // Call sites inlined through a 'nounwind' call site should be
3120 // 'nounwind' as well. However, avoid marking call sites explicitly
3121 // where possible. This helps expose more opportunities for CSE after
3122 // inlining, commonly when the callee is an intrinsic.
3123 if (MarkNoUnwind && !CI->doesNotThrow())
3124 CI->setDoesNotThrow();
3125 }
3126 }
3127 }
3128
3129 // Leave lifetime markers for the static alloca's, scoping them to the
3130 // function we just inlined.
3131 // We need to insert lifetime intrinsics even at O0 to avoid invalid
3132 // access caused by multithreaded coroutines. The check
3133 // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.
3134 if ((InsertLifetime || Caller->isPresplitCoroutine()) &&
3135 !IFI.StaticAllocas.empty()) {
3136 IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin());
3137 for (AllocaInst *AI : IFI.StaticAllocas) {
3138 // Don't mark swifterror allocas. They can't have bitcast uses.
3139 if (AI->isSwiftError())
3140 continue;
3141
3142 // If the alloca is already scoped to something smaller than the whole
3143 // function then there's no need to add redundant, less accurate markers.
3144 if (hasLifetimeMarkers(AI))
3145 continue;
3146
3147 std::optional<TypeSize> Size = AI->getAllocationSize(DL: AI->getDataLayout());
3148 if (Size && Size->isZero())
3149 continue;
3150
3151 builder.CreateLifetimeStart(Ptr: AI);
3152 for (ReturnInst *RI : Returns) {
3153 // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
3154 // call and a return. The return kills all local allocas.
3155 if (InlinedMustTailCalls &&
3156 RI->getParent()->getTerminatingMustTailCall())
3157 continue;
3158 if (InlinedDeoptimizeCalls &&
3159 RI->getParent()->getTerminatingDeoptimizeCall())
3160 continue;
3161 IRBuilder<>(RI).CreateLifetimeEnd(Ptr: AI);
3162 }
3163 }
3164 }
3165
3166 // If the inlined code contained dynamic alloca instructions, wrap the inlined
3167 // code with llvm.stacksave/llvm.stackrestore intrinsics.
3168 if (InlinedFunctionInfo.ContainsDynamicAllocas) {
3169 // Insert the llvm.stacksave.
3170 CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
3171 .CreateStackSave(Name: "savedstack");
3172
3173 // Insert a call to llvm.stackrestore before any return instructions in the
3174 // inlined function.
3175 for (ReturnInst *RI : Returns) {
3176 // Don't insert llvm.stackrestore calls between a musttail or deoptimize
3177 // call and a return. The return will restore the stack pointer.
3178 if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
3179 continue;
3180 if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
3181 continue;
3182 IRBuilder<>(RI).CreateStackRestore(Ptr: SavedPtr);
3183 }
3184 }
3185
3186 // If we are inlining for an invoke instruction, we must make sure to rewrite
3187 // any call instructions into invoke instructions. This is sensitive to which
3188 // funclet pads were top-level in the inlinee, so must be done before
3189 // rewriting the "parent pad" links.
3190 if (auto *II = dyn_cast<InvokeInst>(Val: &CB)) {
3191 BasicBlock *UnwindDest = II->getUnwindDest();
3192 BasicBlock::iterator FirstNonPHI = UnwindDest->getFirstNonPHIIt();
3193 if (isa<LandingPadInst>(Val: FirstNonPHI)) {
3194 HandleInlinedLandingPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
3195 } else {
3196 HandleInlinedEHPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
3197 }
3198 }
3199
3200 // Update the lexical scopes of the new funclets and callsites.
3201 // Anything that had 'none' as its parent is now nested inside the callsite's
3202 // EHPad.
3203 if (IFI.CallSiteEHPad) {
3204 for (Function::iterator BB = FirstNewBlock->getIterator(),
3205 E = Caller->end();
3206 BB != E; ++BB) {
3207 // Add bundle operands to inlined call sites.
3208 PropagateOperandBundles(InlinedBB: BB, CallSiteEHPad: IFI.CallSiteEHPad);
3209
3210 // It is problematic if the inlinee has a cleanupret which unwinds to
3211 // caller and we inline it into a call site which doesn't unwind but into
3212 // an EH pad that does. Such an edge must be dynamically unreachable.
3213 // As such, we replace the cleanupret with unreachable.
3214 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator()))
3215 if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
3216 changeToUnreachable(I: CleanupRet);
3217
3218 BasicBlock::iterator I = BB->getFirstNonPHIIt();
3219 if (!I->isEHPad())
3220 continue;
3221
3222 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) {
3223 if (isa<ConstantTokenNone>(Val: CatchSwitch->getParentPad()))
3224 CatchSwitch->setParentPad(IFI.CallSiteEHPad);
3225 } else {
3226 auto *FPI = cast<FuncletPadInst>(Val&: I);
3227 if (isa<ConstantTokenNone>(Val: FPI->getParentPad()))
3228 FPI->setParentPad(IFI.CallSiteEHPad);
3229 }
3230 }
3231 }
3232
3233 if (InlinedDeoptimizeCalls) {
3234 // We need to at least remove the deoptimizing returns from the Return set,
3235 // so that the control flow from those returns does not get merged into the
3236 // caller (but terminate it instead). If the caller's return type does not
3237 // match the callee's return type, we also need to change the return type of
3238 // the intrinsic.
3239 if (Caller->getReturnType() == CB.getType()) {
3240 llvm::erase_if(C&: Returns, P: [](ReturnInst *RI) {
3241 return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
3242 });
3243 } else {
3244 SmallVector<ReturnInst *, 8> NormalReturns;
3245 Function *NewDeoptIntrinsic = Intrinsic::getOrInsertDeclaration(
3246 M: Caller->getParent(), id: Intrinsic::experimental_deoptimize,
3247 OverloadTys: {Caller->getReturnType()});
3248
3249 for (ReturnInst *RI : Returns) {
3250 CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
3251 if (!DeoptCall) {
3252 NormalReturns.push_back(Elt: RI);
3253 continue;
3254 }
3255
3256 // The calling convention on the deoptimize call itself may be bogus,
3257 // since the code we're inlining may have undefined behavior (and may
3258 // never actually execute at runtime); but all
3259 // @llvm.experimental.deoptimize declarations have to have the same
3260 // calling convention in a well-formed module.
3261 auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
3262 NewDeoptIntrinsic->setCallingConv(CallingConv);
3263 auto *CurBB = RI->getParent();
3264 RI->eraseFromParent();
3265
3266 SmallVector<Value *, 4> CallArgs(DeoptCall->args());
3267
3268 SmallVector<OperandBundleDef, 1> OpBundles;
3269 DeoptCall->getOperandBundlesAsDefs(Defs&: OpBundles);
3270 auto DeoptAttributes = DeoptCall->getAttributes();
3271 DeoptCall->eraseFromParent();
3272 assert(!OpBundles.empty() &&
3273 "Expected at least the deopt operand bundle");
3274
3275 IRBuilder<> Builder(CurBB);
3276 CallInst *NewDeoptCall =
3277 Builder.CreateCall(Callee: NewDeoptIntrinsic, Args: CallArgs, OpBundles);
3278 NewDeoptCall->setCallingConv(CallingConv);
3279 NewDeoptCall->setAttributes(DeoptAttributes);
3280 if (NewDeoptCall->getType()->isVoidTy())
3281 Builder.CreateRetVoid();
3282 else
3283 Builder.CreateRet(V: NewDeoptCall);
3284 // Since the ret type is changed, remove the incompatible attributes.
3285 NewDeoptCall->removeRetAttrs(AttrsToRemove: AttributeFuncs::typeIncompatible(
3286 Ty: NewDeoptCall->getType(), AS: NewDeoptCall->getRetAttributes()));
3287 }
3288
3289 // Leave behind the normal returns so we can merge control flow.
3290 std::swap(LHS&: Returns, RHS&: NormalReturns);
3291 }
3292 }
3293
3294 // Handle any inlined musttail call sites. In order for a new call site to be
3295 // musttail, the source of the clone and the inlined call site must have been
3296 // musttail. Therefore it's safe to return without merging control into the
3297 // phi below.
3298 if (InlinedMustTailCalls) {
3299 // Handle the returns preceded by musttail calls separately.
3300 SmallVector<ReturnInst *, 8> NormalReturns;
3301 for (ReturnInst *RI : Returns) {
3302 CallInst *ReturnedMustTail =
3303 RI->getParent()->getTerminatingMustTailCall();
3304 if (!ReturnedMustTail)
3305 NormalReturns.push_back(Elt: RI);
3306 }
3307
3308 // Leave behind the normal returns so we can merge control flow.
3309 std::swap(LHS&: Returns, RHS&: NormalReturns);
3310 }
3311
3312 // Now that all of the transforms on the inlined code have taken place but
3313 // before we splice the inlined code into the CFG and lose track of which
3314 // blocks were actually inlined, collect the call sites. We only do this if
3315 // call graph updates weren't requested, as those provide value handle based
3316 // tracking of inlined call sites instead. Calls to intrinsics are not
3317 // collected because they are not inlineable.
3318 if (InlinedFunctionInfo.ContainsCalls) {
3319 // Otherwise just collect the raw call sites that were inlined.
3320 for (BasicBlock &NewBB :
3321 make_range(x: FirstNewBlock->getIterator(), y: Caller->end()))
3322 for (Instruction &I : NewBB)
3323 if (auto *CB = dyn_cast<CallBase>(Val: &I))
3324 if (!(CB->getCalledFunction() &&
3325 CB->getCalledFunction()->isIntrinsic()))
3326 IFI.InlinedCallSites.push_back(Elt: CB);
3327 }
3328
3329 for (CallBase *ICB : IFI.InlinedCallSites) {
3330 // We only track inline history if requested, or if the inlined call site
3331 // was originally an indirect call (it may have become a direct call
3332 // during inlining).
3333 if (TrackInlineHistory ||
3334 InlinedFunctionInfo.OriginallyIndirectCalls.contains(key: ICB)) {
3335 // !inline_history is {Callee, CB.inline_history, ICB.inline_history}.
3336 // Metadata nodes may be null if the referenced function was erased from
3337 // the module.
3338 SmallVector<Metadata *, 4> History;
3339 History.push_back(Elt: ValueAsMetadata::get(V: CalledFunc));
3340 if (MDNode *CBHistory = CB.getMetadata(KindID: LLVMContext::MD_inline_history)) {
3341 for (const auto &Op : CBHistory->operands()) {
3342 if (Op)
3343 History.push_back(Elt: Op.get());
3344 }
3345 }
3346 if (MDNode *CBHistory =
3347 ICB->getMetadata(KindID: LLVMContext::MD_inline_history)) {
3348 for (const auto &Op : CBHistory->operands()) {
3349 if (Op)
3350 History.push_back(Elt: Op.get());
3351 }
3352 }
3353 MDNode *NewHistory = MDNode::get(Context&: Caller->getContext(), MDs: History);
3354 ICB->setMetadata(KindID: LLVMContext::MD_inline_history, Node: NewHistory);
3355 }
3356 }
3357
3358 // If we cloned in _exactly one_ basic block, and if that block ends in a
3359 // return instruction, we splice the body of the inlined callee directly into
3360 // the calling basic block.
3361 if (Returns.size() == 1 && std::distance(first: FirstNewBlock, last: Caller->end()) == 1) {
3362 // Move all of the instructions right before the call.
3363 OrigBB->splice(ToIt: CB.getIterator(), FromBB: &*FirstNewBlock, FromBeginIt: FirstNewBlock->begin(),
3364 FromEndIt: FirstNewBlock->end());
3365 // Remove the cloned basic block.
3366 Caller->back().eraseFromParent();
3367
3368 // If the call site was an invoke instruction, add a branch to the normal
3369 // destination.
3370 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
3371 UncondBrInst *NewBr =
3372 UncondBrInst::Create(Target: II->getNormalDest(), InsertBefore: CB.getIterator());
3373 NewBr->setDebugLoc(Returns[0]->getDebugLoc());
3374 }
3375
3376 // If the return instruction returned a value, replace uses of the call with
3377 // uses of the returned value.
3378 if (!CB.use_empty()) {
3379 ReturnInst *R = Returns[0];
3380 if (&CB == R->getReturnValue())
3381 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3382 else
3383 CB.replaceAllUsesWith(V: R->getReturnValue());
3384 }
3385 // Since we are now done with the Call/Invoke, we can delete it.
3386 CB.eraseFromParent();
3387
3388 // Since we are now done with the return instruction, delete it also.
3389 Returns[0]->eraseFromParent();
3390
3391 if (MergeAttributes)
3392 AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc);
3393
3394 // We are now done with the inlining.
3395 return;
3396 }
3397
3398 // Otherwise, we have the normal case, of more than one block to inline or
3399 // multiple return sites.
3400
3401 // We want to clone the entire callee function into the hole between the
3402 // "starter" and "ender" blocks. How we accomplish this depends on whether
3403 // this is an invoke instruction or a call instruction.
3404 BasicBlock *AfterCallBB;
3405 UncondBrInst *CreatedBranchToNormalDest = nullptr;
3406 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
3407
3408 // Add an unconditional branch to make this look like the CallInst case...
3409 CreatedBranchToNormalDest =
3410 UncondBrInst::Create(Target: II->getNormalDest(), InsertBefore: CB.getIterator());
3411 // We intend to replace this DebugLoc with another later.
3412 CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getTemporary());
3413
3414 // Split the basic block. This guarantees that no PHI nodes will have to be
3415 // updated due to new incoming edges, and make the invoke case more
3416 // symmetric to the call case.
3417 AfterCallBB =
3418 OrigBB->splitBasicBlock(I: CreatedBranchToNormalDest->getIterator(),
3419 BBName: CalledFunc->getName() + ".exit");
3420
3421 } else { // It's a call
3422 // If this is a call instruction, we need to split the basic block that
3423 // the call lives in.
3424 //
3425 AfterCallBB = OrigBB->splitBasicBlock(I: CB.getIterator(),
3426 BBName: CalledFunc->getName() + ".exit");
3427 }
3428
3429 if (IFI.CallerBFI) {
3430 // Copy original BB's block frequency to AfterCallBB
3431 IFI.CallerBFI->setBlockFreq(BB: AfterCallBB,
3432 Freq: IFI.CallerBFI->getBlockFreq(BB: OrigBB));
3433 }
3434
3435 // Change the branch that used to go to AfterCallBB to branch to the first
3436 // basic block of the inlined function.
3437 //
3438 UncondBrInst *Br = cast<UncondBrInst>(Val: OrigBB->getTerminator());
3439 Br->setSuccessor(&*FirstNewBlock);
3440
3441 // Now that the function is correct, make it a little bit nicer. In
3442 // particular, move the basic blocks inserted from the end of the function
3443 // into the space made by splitting the source basic block.
3444 Caller->splice(ToIt: AfterCallBB->getIterator(), FromF: Caller, FromBeginIt: FirstNewBlock,
3445 FromEndIt: Caller->end());
3446
3447 // Handle all of the return instructions that we just cloned in, and eliminate
3448 // any users of the original call/invoke instruction.
3449 Type *RTy = CalledFunc->getReturnType();
3450
3451 PHINode *PHI = nullptr;
3452 if (Returns.size() > 1) {
3453 // The PHI node should go at the front of the new basic block to merge all
3454 // possible incoming values.
3455 if (!CB.use_empty()) {
3456 PHI = PHINode::Create(Ty: RTy, NumReservedValues: Returns.size(), NameStr: CB.getName());
3457 PHI->insertBefore(InsertPos: AfterCallBB->begin());
3458 // Anything that used the result of the function call should now use the
3459 // PHI node as their operand.
3460 CB.replaceAllUsesWith(V: PHI);
3461 }
3462
3463 // Loop over all of the return instructions adding entries to the PHI node
3464 // as appropriate.
3465 if (PHI) {
3466 for (ReturnInst *RI : Returns) {
3467 assert(RI->getReturnValue()->getType() == PHI->getType() &&
3468 "Ret value not consistent in function!");
3469 PHI->addIncoming(V: RI->getReturnValue(), BB: RI->getParent());
3470 }
3471 }
3472
3473 // Add a branch to the merge points and remove return instructions.
3474 DebugLoc Loc;
3475 for (ReturnInst *RI : Returns) {
3476 UncondBrInst *BI = UncondBrInst::Create(Target: AfterCallBB, InsertBefore: RI->getIterator());
3477 Loc = RI->getDebugLoc();
3478 BI->setDebugLoc(Loc);
3479 RI->eraseFromParent();
3480 }
3481 // We need to set the debug location to *somewhere* inside the
3482 // inlined function. The line number may be nonsensical, but the
3483 // instruction will at least be associated with the right
3484 // function.
3485 if (CreatedBranchToNormalDest)
3486 CreatedBranchToNormalDest->setDebugLoc(Loc);
3487 } else if (!Returns.empty()) {
3488 // Otherwise, if there is exactly one return value, just replace anything
3489 // using the return value of the call with the computed value.
3490 if (!CB.use_empty()) {
3491 if (&CB == Returns[0]->getReturnValue())
3492 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3493 else
3494 CB.replaceAllUsesWith(V: Returns[0]->getReturnValue());
3495 }
3496
3497 // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
3498 BasicBlock *ReturnBB = Returns[0]->getParent();
3499 ReturnBB->replaceAllUsesWith(V: AfterCallBB);
3500
3501 // Splice the code from the return block into the block that it will return
3502 // to, which contains the code that was after the call.
3503 AfterCallBB->splice(ToIt: AfterCallBB->begin(), FromBB: ReturnBB);
3504
3505 if (CreatedBranchToNormalDest)
3506 CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
3507
3508 // Delete the return instruction now and empty ReturnBB now.
3509 Returns[0]->eraseFromParent();
3510 ReturnBB->eraseFromParent();
3511 } else if (!CB.use_empty()) {
3512 // In this case there are no returns to use, so there is no clear source
3513 // location for the "return".
3514 // FIXME: It may be correct to use the scope end line of the function here,
3515 // since this likely means we are falling out of the function.
3516 if (CreatedBranchToNormalDest)
3517 CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getUnknown());
3518 // No returns, but something is using the return value of the call. Just
3519 // nuke the result.
3520 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3521 }
3522
3523 // Since we are now done with the Call/Invoke, we can delete it.
3524 CB.eraseFromParent();
3525
3526 // If we inlined any musttail calls and the original return is now
3527 // unreachable, delete it. It can only contain a ret.
3528 if (InlinedMustTailCalls && pred_empty(BB: AfterCallBB))
3529 AfterCallBB->eraseFromParent();
3530
3531 // We should always be able to fold the entry block of the function into the
3532 // single predecessor of the block...
3533 BasicBlock *CalleeEntry = Br->getSuccessor();
3534
3535 // Splice the code entry block into calling block, right before the
3536 // unconditional branch.
3537 CalleeEntry->replaceAllUsesWith(V: OrigBB); // Update PHI nodes
3538 OrigBB->splice(ToIt: Br->getIterator(), FromBB: CalleeEntry);
3539
3540 // Remove the unconditional branch.
3541 Br->eraseFromParent();
3542
3543 // Now we can remove the CalleeEntry block, which is now empty.
3544 CalleeEntry->eraseFromParent();
3545
3546 // If we inserted a phi node, check to see if it has a single value (e.g. all
3547 // the entries are the same or undef). If so, remove the PHI so it doesn't
3548 // block other optimizations.
3549 if (PHI) {
3550 AssumptionCache *AC =
3551 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
3552 auto &DL = Caller->getDataLayout();
3553 if (Value *V = simplifyInstruction(I: PHI, Q: {DL, nullptr, nullptr, AC})) {
3554 PHI->replaceAllUsesWith(V);
3555 PHI->eraseFromParent();
3556 }
3557 }
3558
3559 if (MergeAttributes)
3560 AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc);
3561}
3562
3563llvm::InlineResult llvm::InlineFunction(
3564 CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes,
3565 AAResults *CalleeAAR, bool InsertLifetime, bool TrackInlineHistory,
3566 Function *ForwardVarArgsTo, OptimizationRemarkEmitter *ORE) {
3567 llvm::InlineResult Result = CanInlineCallSite(CB, IFI);
3568 if (Result.isSuccess()) {
3569 InlineFunctionImpl(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
3570 TrackInlineHistory, ForwardVarArgsTo, ORE);
3571 }
3572
3573 return Result;
3574}
3575