1//===- InlineFunction.cpp - Code to perform function inlining -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements inlining of a function into a call site, resolving
10// parameters and the return value as appropriate.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/SetVector.h"
17#include "llvm/ADT/SmallPtrSet.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/ADT/iterator_range.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Analysis/AssumptionCache.h"
23#include "llvm/Analysis/BlockFrequencyInfo.h"
24#include "llvm/Analysis/CallGraph.h"
25#include "llvm/Analysis/CaptureTracking.h"
26#include "llvm/Analysis/CtxProfAnalysis.h"
27#include "llvm/Analysis/IndirectCallVisitor.h"
28#include "llvm/Analysis/InstructionSimplify.h"
29#include "llvm/Analysis/MemoryProfileInfo.h"
30#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
31#include "llvm/Analysis/ObjCARCUtil.h"
32#include "llvm/Analysis/ProfileSummaryInfo.h"
33#include "llvm/Analysis/ValueTracking.h"
34#include "llvm/Analysis/VectorUtils.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/AttributeMask.h"
37#include "llvm/IR/Attributes.h"
38#include "llvm/IR/BasicBlock.h"
39#include "llvm/IR/CFG.h"
40#include "llvm/IR/Constant.h"
41#include "llvm/IR/ConstantRange.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/DataLayout.h"
44#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/DebugInfoMetadata.h"
46#include "llvm/IR/DebugLoc.h"
47#include "llvm/IR/DerivedTypes.h"
48#include "llvm/IR/Dominators.h"
49#include "llvm/IR/EHPersonalities.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalVariable.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InlineAsm.h"
54#include "llvm/IR/InstrTypes.h"
55#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Instructions.h"
57#include "llvm/IR/IntrinsicInst.h"
58#include "llvm/IR/Intrinsics.h"
59#include "llvm/IR/LLVMContext.h"
60#include "llvm/IR/MDBuilder.h"
61#include "llvm/IR/Metadata.h"
62#include "llvm/IR/Module.h"
63#include "llvm/IR/PatternMatch.h"
64#include "llvm/IR/ProfDataUtils.h"
65#include "llvm/IR/Type.h"
66#include "llvm/IR/User.h"
67#include "llvm/IR/Value.h"
68#include "llvm/Support/Casting.h"
69#include "llvm/Support/CommandLine.h"
70#include "llvm/Support/ErrorHandling.h"
71#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
72#include "llvm/Transforms/Utils/Cloning.h"
73#include "llvm/Transforms/Utils/Local.h"
74#include "llvm/Transforms/Utils/ValueMapper.h"
75#include <algorithm>
76#include <cassert>
77#include <cstdint>
78#include <deque>
79#include <iterator>
80#include <optional>
81#include <string>
82#include <utility>
83#include <vector>
84
85#define DEBUG_TYPE "inline-function"
86
87using namespace llvm;
88using namespace llvm::memprof;
89using ProfileCount = Function::ProfileCount;
90
91static cl::opt<bool>
92EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(Val: true),
93 cl::Hidden,
94 cl::desc("Convert noalias attributes to metadata during inlining."));
95
96static cl::opt<bool>
97 UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
98 cl::init(Val: true),
99 cl::desc("Use the llvm.experimental.noalias.scope.decl "
100 "intrinsic during inlining."));
101
102// Disabled by default, because the added alignment assumptions may increase
103// compile-time and block optimizations. This option is not suitable for use
104// with frontends that emit comprehensive parameter alignment annotations.
105static cl::opt<bool>
106PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
107 cl::init(Val: false), cl::Hidden,
108 cl::desc("Convert align attributes to assumptions during inlining."));
109
110static cl::opt<unsigned> InlinerAttributeWindow(
111 "max-inst-checked-for-throw-during-inlining", cl::Hidden,
112 cl::desc("the maximum number of instructions analyzed for may throw during "
113 "attribute inference in inlined body"),
114 cl::init(Val: 4));
115
116namespace {
117
118 /// A class for recording information about inlining a landing pad.
119 class LandingPadInliningInfo {
120 /// Destination of the invoke's unwind.
121 BasicBlock *OuterResumeDest;
122
123 /// Destination for the callee's resume.
124 BasicBlock *InnerResumeDest = nullptr;
125
126 /// LandingPadInst associated with the invoke.
127 LandingPadInst *CallerLPad = nullptr;
128
129 /// PHI for EH values from landingpad insts.
130 PHINode *InnerEHValuesPHI = nullptr;
131
132 SmallVector<Value*, 8> UnwindDestPHIValues;
133
134 public:
135 LandingPadInliningInfo(InvokeInst *II)
136 : OuterResumeDest(II->getUnwindDest()) {
137 // If there are PHI nodes in the unwind destination block, we need to keep
138 // track of which values came into them from the invoke before removing
139 // the edge from this block.
140 BasicBlock *InvokeBB = II->getParent();
141 BasicBlock::iterator I = OuterResumeDest->begin();
142 for (; isa<PHINode>(Val: I); ++I) {
143 // Save the value to use for this edge.
144 PHINode *PHI = cast<PHINode>(Val&: I);
145 UnwindDestPHIValues.push_back(Elt: PHI->getIncomingValueForBlock(BB: InvokeBB));
146 }
147
148 CallerLPad = cast<LandingPadInst>(Val&: I);
149 }
150
151 /// The outer unwind destination is the target of
152 /// unwind edges introduced for calls within the inlined function.
153 BasicBlock *getOuterResumeDest() const {
154 return OuterResumeDest;
155 }
156
157 BasicBlock *getInnerResumeDest();
158
159 LandingPadInst *getLandingPadInst() const { return CallerLPad; }
160
161 /// Forward the 'resume' instruction to the caller's landing pad block.
162 /// When the landing pad block has only one predecessor, this is
163 /// a simple branch. When there is more than one predecessor, we need to
164 /// split the landing pad block after the landingpad instruction and jump
165 /// to there.
166 void forwardResume(ResumeInst *RI,
167 SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
168
169 /// Add incoming-PHI values to the unwind destination block for the given
170 /// basic block, using the values for the original invoke's source block.
171 void addIncomingPHIValuesFor(BasicBlock *BB) const {
172 addIncomingPHIValuesForInto(src: BB, dest: OuterResumeDest);
173 }
174
175 void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
176 BasicBlock::iterator I = dest->begin();
177 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
178 PHINode *phi = cast<PHINode>(Val&: I);
179 phi->addIncoming(V: UnwindDestPHIValues[i], BB: src);
180 }
181 }
182 };
183} // end anonymous namespace
184
185static IntrinsicInst *getConvergenceEntry(BasicBlock &BB) {
186 BasicBlock::iterator It = BB.getFirstNonPHIIt();
187 while (It != BB.end()) {
188 if (auto *IntrinsicCall = dyn_cast<ConvergenceControlInst>(Val&: It)) {
189 if (IntrinsicCall->isEntry()) {
190 return IntrinsicCall;
191 }
192 }
193 It = std::next(x: It);
194 }
195 return nullptr;
196}
197
198/// Get or create a target for the branch from ResumeInsts.
199BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
200 if (InnerResumeDest) return InnerResumeDest;
201
202 // Split the landing pad.
203 BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
204 InnerResumeDest =
205 OuterResumeDest->splitBasicBlock(I: SplitPoint,
206 BBName: OuterResumeDest->getName() + ".body");
207
208 // The number of incoming edges we expect to the inner landing pad.
209 const unsigned PHICapacity = 2;
210
211 // Create corresponding new PHIs for all the PHIs in the outer landing pad.
212 BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
213 BasicBlock::iterator I = OuterResumeDest->begin();
214 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
215 PHINode *OuterPHI = cast<PHINode>(Val&: I);
216 PHINode *InnerPHI = PHINode::Create(Ty: OuterPHI->getType(), NumReservedValues: PHICapacity,
217 NameStr: OuterPHI->getName() + ".lpad-body");
218 InnerPHI->insertBefore(InsertPos: InsertPoint);
219 OuterPHI->replaceAllUsesWith(V: InnerPHI);
220 InnerPHI->addIncoming(V: OuterPHI, BB: OuterResumeDest);
221 }
222
223 // Create a PHI for the exception values.
224 InnerEHValuesPHI =
225 PHINode::Create(Ty: CallerLPad->getType(), NumReservedValues: PHICapacity, NameStr: "eh.lpad-body");
226 InnerEHValuesPHI->insertBefore(InsertPos: InsertPoint);
227 CallerLPad->replaceAllUsesWith(V: InnerEHValuesPHI);
228 InnerEHValuesPHI->addIncoming(V: CallerLPad, BB: OuterResumeDest);
229
230 // All done.
231 return InnerResumeDest;
232}
233
234/// Forward the 'resume' instruction to the caller's landing pad block.
235/// When the landing pad block has only one predecessor, this is a simple
236/// branch. When there is more than one predecessor, we need to split the
237/// landing pad block after the landingpad instruction and jump to there.
238void LandingPadInliningInfo::forwardResume(
239 ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
240 BasicBlock *Dest = getInnerResumeDest();
241 BasicBlock *Src = RI->getParent();
242
243 auto *BI = UncondBrInst::Create(Target: Dest, InsertBefore: Src);
244 BI->setDebugLoc(RI->getDebugLoc());
245
246 // Update the PHIs in the destination. They were inserted in an order which
247 // makes this work.
248 addIncomingPHIValuesForInto(src: Src, dest: Dest);
249
250 InnerEHValuesPHI->addIncoming(V: RI->getOperand(i_nocapture: 0), BB: Src);
251 RI->eraseFromParent();
252}
253
254/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
255static Value *getParentPad(Value *EHPad) {
256 if (auto *FPI = dyn_cast<FuncletPadInst>(Val: EHPad))
257 return FPI->getParentPad();
258 return cast<CatchSwitchInst>(Val: EHPad)->getParentPad();
259}
260
261using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;
262
263/// Helper for getUnwindDestToken that does the descendant-ward part of
264/// the search.
265static Value *getUnwindDestTokenHelper(Instruction *EHPad,
266 UnwindDestMemoTy &MemoMap) {
267 SmallVector<Instruction *, 8> Worklist(1, EHPad);
268
269 while (!Worklist.empty()) {
270 Instruction *CurrentPad = Worklist.pop_back_val();
271 // We only put pads on the worklist that aren't in the MemoMap. When
272 // we find an unwind dest for a pad we may update its ancestors, but
273 // the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
274 // so they should never get updated while queued on the worklist.
275 assert(!MemoMap.count(CurrentPad));
276 Value *UnwindDestToken = nullptr;
277 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: CurrentPad)) {
278 if (CatchSwitch->hasUnwindDest()) {
279 UnwindDestToken = &*CatchSwitch->getUnwindDest()->getFirstNonPHIIt();
280 } else {
281 // Catchswitch doesn't have a 'nounwind' variant, and one might be
282 // annotated as "unwinds to caller" when really it's nounwind (see
283 // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
284 // parent's unwind dest from this. We can check its catchpads'
285 // descendants, since they might include a cleanuppad with an
286 // "unwinds to caller" cleanupret, which can be trusted.
287 for (auto HI = CatchSwitch->handler_begin(),
288 HE = CatchSwitch->handler_end();
289 HI != HE && !UnwindDestToken; ++HI) {
290 BasicBlock *HandlerBlock = *HI;
291 auto *CatchPad =
292 cast<CatchPadInst>(Val: &*HandlerBlock->getFirstNonPHIIt());
293 for (User *Child : CatchPad->users()) {
294 // Intentionally ignore invokes here -- since the catchswitch is
295 // marked "unwind to caller", it would be a verifier error if it
296 // contained an invoke which unwinds out of it, so any invoke we'd
297 // encounter must unwind to some child of the catch.
298 if (!isa<CleanupPadInst>(Val: Child) && !isa<CatchSwitchInst>(Val: Child))
299 continue;
300
301 Instruction *ChildPad = cast<Instruction>(Val: Child);
302 auto Memo = MemoMap.find(Val: ChildPad);
303 if (Memo == MemoMap.end()) {
304 // Haven't figured out this child pad yet; queue it.
305 Worklist.push_back(Elt: ChildPad);
306 continue;
307 }
308 // We've already checked this child, but might have found that
309 // it offers no proof either way.
310 Value *ChildUnwindDestToken = Memo->second;
311 if (!ChildUnwindDestToken)
312 continue;
313 // We already know the child's unwind dest, which can either
314 // be ConstantTokenNone to indicate unwind to caller, or can
315 // be another child of the catchpad. Only the former indicates
316 // the unwind dest of the catchswitch.
317 if (isa<ConstantTokenNone>(Val: ChildUnwindDestToken)) {
318 UnwindDestToken = ChildUnwindDestToken;
319 break;
320 }
321 assert(getParentPad(ChildUnwindDestToken) == CatchPad);
322 }
323 }
324 }
325 } else {
326 auto *CleanupPad = cast<CleanupPadInst>(Val: CurrentPad);
327 for (User *U : CleanupPad->users()) {
328 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: U)) {
329 if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
330 UnwindDestToken = &*RetUnwindDest->getFirstNonPHIIt();
331 else
332 UnwindDestToken = ConstantTokenNone::get(Context&: CleanupPad->getContext());
333 break;
334 }
335 Value *ChildUnwindDestToken;
336 if (auto *Invoke = dyn_cast<InvokeInst>(Val: U)) {
337 ChildUnwindDestToken = &*Invoke->getUnwindDest()->getFirstNonPHIIt();
338 } else if (isa<CleanupPadInst>(Val: U) || isa<CatchSwitchInst>(Val: U)) {
339 Instruction *ChildPad = cast<Instruction>(Val: U);
340 auto Memo = MemoMap.find(Val: ChildPad);
341 if (Memo == MemoMap.end()) {
342 // Haven't resolved this child yet; queue it and keep searching.
343 Worklist.push_back(Elt: ChildPad);
344 continue;
345 }
346 // We've checked this child, but still need to ignore it if it
347 // had no proof either way.
348 ChildUnwindDestToken = Memo->second;
349 if (!ChildUnwindDestToken)
350 continue;
351 } else {
352 // Not a relevant user of the cleanuppad
353 continue;
354 }
355 // In a well-formed program, the child/invoke must either unwind to
356 // an(other) child of the cleanup, or exit the cleanup. In the
357 // first case, continue searching.
358 if (isa<Instruction>(Val: ChildUnwindDestToken) &&
359 getParentPad(EHPad: ChildUnwindDestToken) == CleanupPad)
360 continue;
361 UnwindDestToken = ChildUnwindDestToken;
362 break;
363 }
364 }
365 // If we haven't found an unwind dest for CurrentPad, we may have queued its
366 // children, so move on to the next in the worklist.
367 if (!UnwindDestToken)
368 continue;
369
370 // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
371 // any ancestors of CurrentPad up to but not including UnwindDestToken's
372 // parent pad. Record this in the memo map, and check to see if the
373 // original EHPad being queried is one of the ones exited.
374 Value *UnwindParent;
375 if (auto *UnwindPad = dyn_cast<Instruction>(Val: UnwindDestToken))
376 UnwindParent = getParentPad(EHPad: UnwindPad);
377 else
378 UnwindParent = nullptr;
379 bool ExitedOriginalPad = false;
380 for (Instruction *ExitedPad = CurrentPad;
381 ExitedPad && ExitedPad != UnwindParent;
382 ExitedPad = dyn_cast<Instruction>(Val: getParentPad(EHPad: ExitedPad))) {
383 // Skip over catchpads since they just follow their catchswitches.
384 if (isa<CatchPadInst>(Val: ExitedPad))
385 continue;
386 MemoMap[ExitedPad] = UnwindDestToken;
387 ExitedOriginalPad |= (ExitedPad == EHPad);
388 }
389
390 if (ExitedOriginalPad)
391 return UnwindDestToken;
392
393 // Continue the search.
394 }
395
396 // No definitive information is contained within this funclet.
397 return nullptr;
398}
399
400/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
401/// return that pad instruction. If it unwinds to caller, return
402/// ConstantTokenNone. If it does not have a definitive unwind destination,
403/// return nullptr.
404///
405/// This routine gets invoked for calls in funclets in inlinees when inlining
406/// an invoke. Since many funclets don't have calls inside them, it's queried
407/// on-demand rather than building a map of pads to unwind dests up front.
408/// Determining a funclet's unwind dest may require recursively searching its
409/// descendants, and also ancestors and cousins if the descendants don't provide
410/// an answer. Since most funclets will have their unwind dest immediately
411/// available as the unwind dest of a catchswitch or cleanupret, this routine
412/// searches top-down from the given pad and then up. To avoid worst-case
413/// quadratic run-time given that approach, it uses a memo map to avoid
414/// re-processing funclet trees. The callers that rewrite the IR as they go
415/// take advantage of this, for correctness, by checking/forcing rewritten
416/// pads' entries to match the original callee view.
417static Value *getUnwindDestToken(Instruction *EHPad,
418 UnwindDestMemoTy &MemoMap) {
419 // Catchpads unwind to the same place as their catchswitch;
420 // redirct any queries on catchpads so the code below can
421 // deal with just catchswitches and cleanuppads.
422 if (auto *CPI = dyn_cast<CatchPadInst>(Val: EHPad))
423 EHPad = CPI->getCatchSwitch();
424
425 // Check if we've already determined the unwind dest for this pad.
426 auto Memo = MemoMap.find(Val: EHPad);
427 if (Memo != MemoMap.end())
428 return Memo->second;
429
430 // Search EHPad and, if necessary, its descendants.
431 Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
432 assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
433 if (UnwindDestToken)
434 return UnwindDestToken;
435
436 // No information is available for this EHPad from itself or any of its
437 // descendants. An unwind all the way out to a pad in the caller would
438 // need also to agree with the unwind dest of the parent funclet, so
439 // search up the chain to try to find a funclet with information. Put
440 // null entries in the memo map to avoid re-processing as we go up.
441 MemoMap[EHPad] = nullptr;
442#ifndef NDEBUG
443 SmallPtrSet<Instruction *, 4> TempMemos;
444 TempMemos.insert(EHPad);
445#endif
446 Instruction *LastUselessPad = EHPad;
447 Value *AncestorToken;
448 for (AncestorToken = getParentPad(EHPad);
449 auto *AncestorPad = dyn_cast<Instruction>(Val: AncestorToken);
450 AncestorToken = getParentPad(EHPad: AncestorToken)) {
451 // Skip over catchpads since they just follow their catchswitches.
452 if (isa<CatchPadInst>(Val: AncestorPad))
453 continue;
454 // If the MemoMap had an entry mapping AncestorPad to nullptr, since we
455 // haven't yet called getUnwindDestTokenHelper for AncestorPad in this
456 // call to getUnwindDestToken, that would mean that AncestorPad had no
457 // information in itself, its descendants, or its ancestors. If that
458 // were the case, then we should also have recorded the lack of information
459 // for the descendant that we're coming from. So assert that we don't
460 // find a null entry in the MemoMap for AncestorPad.
461 assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
462 auto AncestorMemo = MemoMap.find(Val: AncestorPad);
463 if (AncestorMemo == MemoMap.end()) {
464 UnwindDestToken = getUnwindDestTokenHelper(EHPad: AncestorPad, MemoMap);
465 } else {
466 UnwindDestToken = AncestorMemo->second;
467 }
468 if (UnwindDestToken)
469 break;
470 LastUselessPad = AncestorPad;
471 MemoMap[LastUselessPad] = nullptr;
472#ifndef NDEBUG
473 TempMemos.insert(LastUselessPad);
474#endif
475 }
476
477 // We know that getUnwindDestTokenHelper was called on LastUselessPad and
478 // returned nullptr (and likewise for EHPad and any of its ancestors up to
479 // LastUselessPad), so LastUselessPad has no information from below. Since
480 // getUnwindDestTokenHelper must investigate all downward paths through
481 // no-information nodes to prove that a node has no information like this,
482 // and since any time it finds information it records it in the MemoMap for
483 // not just the immediately-containing funclet but also any ancestors also
484 // exited, it must be the case that, walking downward from LastUselessPad,
485 // visiting just those nodes which have not been mapped to an unwind dest
486 // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
487 // they are just used to keep getUnwindDestTokenHelper from repeating work),
488 // any node visited must have been exhaustively searched with no information
489 // for it found.
490 SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
491 while (!Worklist.empty()) {
492 Instruction *UselessPad = Worklist.pop_back_val();
493 auto Memo = MemoMap.find(Val: UselessPad);
494 if (Memo != MemoMap.end() && Memo->second) {
495 // Here the name 'UselessPad' is a bit of a misnomer, because we've found
496 // that it is a funclet that does have information about unwinding to
497 // a particular destination; its parent was a useless pad.
498 // Since its parent has no information, the unwind edge must not escape
499 // the parent, and must target a sibling of this pad. This local unwind
500 // gives us no information about EHPad. Leave it and the subtree rooted
501 // at it alone.
502 assert(getParentPad(Memo->second) == getParentPad(UselessPad));
503 continue;
504 }
505 // We know we don't have information for UselesPad. If it has an entry in
506 // the MemoMap (mapping it to nullptr), it must be one of the TempMemos
507 // added on this invocation of getUnwindDestToken; if a previous invocation
508 // recorded nullptr, it would have had to prove that the ancestors of
509 // UselessPad, which include LastUselessPad, had no information, and that
510 // in turn would have required proving that the descendants of
511 // LastUselesPad, which include EHPad, have no information about
512 // LastUselessPad, which would imply that EHPad was mapped to nullptr in
513 // the MemoMap on that invocation, which isn't the case if we got here.
514 assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
515 // Assert as we enumerate users that 'UselessPad' doesn't have any unwind
516 // information that we'd be contradicting by making a map entry for it
517 // (which is something that getUnwindDestTokenHelper must have proved for
518 // us to get here). Just assert on is direct users here; the checks in
519 // this downward walk at its descendants will verify that they don't have
520 // any unwind edges that exit 'UselessPad' either (i.e. they either have no
521 // unwind edges or unwind to a sibling).
522 MemoMap[UselessPad] = UnwindDestToken;
523 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: UselessPad)) {
524 assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
525 for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
526 auto *CatchPad = &*HandlerBlock->getFirstNonPHIIt();
527 for (User *U : CatchPad->users()) {
528 assert((!isa<InvokeInst>(U) ||
529 (getParentPad(&*cast<InvokeInst>(U)
530 ->getUnwindDest()
531 ->getFirstNonPHIIt()) == CatchPad)) &&
532 "Expected useless pad");
533 if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U))
534 Worklist.push_back(Elt: cast<Instruction>(Val: U));
535 }
536 }
537 } else {
538 assert(isa<CleanupPadInst>(UselessPad));
539 for (User *U : UselessPad->users()) {
540 assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
541 assert(
542 (!isa<InvokeInst>(U) ||
543 (getParentPad(
544 &*cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHIIt()) ==
545 UselessPad)) &&
546 "Expected useless pad");
547 if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U))
548 Worklist.push_back(Elt: cast<Instruction>(Val: U));
549 }
550 }
551 }
552
553 return UnwindDestToken;
554}
555
556/// When we inline a basic block into an invoke,
557/// we have to turn all of the calls that can throw into invokes.
558/// This function analyze BB to see if there are any calls, and if so,
559/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
560/// nodes in that block with the values specified in InvokeDestPHIValues.
561static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
562 BasicBlock *BB, BasicBlock *UnwindEdge,
563 UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
564 for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
565 // We only need to check for function calls: inlined invoke
566 // instructions require no special handling.
567 CallInst *CI = dyn_cast<CallInst>(Val: &I);
568
569 if (!CI || CI->doesNotThrow())
570 continue;
571
572 // We do not need to (and in fact, cannot) convert possibly throwing calls
573 // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
574 // invokes. The caller's "segment" of the deoptimization continuation
575 // attached to the newly inlined @llvm.experimental_deoptimize
576 // (resp. @llvm.experimental.guard) call should contain the exception
577 // handling logic, if any.
578 if (auto *F = CI->getCalledFunction())
579 if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
580 F->getIntrinsicID() == Intrinsic::experimental_guard)
581 continue;
582
583 if (auto FuncletBundle = CI->getOperandBundle(ID: LLVMContext::OB_funclet)) {
584 // This call is nested inside a funclet. If that funclet has an unwind
585 // destination within the inlinee, then unwinding out of this call would
586 // be UB. Rewriting this call to an invoke which targets the inlined
587 // invoke's unwind dest would give the call's parent funclet multiple
588 // unwind destinations, which is something that subsequent EH table
589 // generation can't handle and that the veirifer rejects. So when we
590 // see such a call, leave it as a call.
591 auto *FuncletPad = cast<Instruction>(Val: FuncletBundle->Inputs[0]);
592 Value *UnwindDestToken =
593 getUnwindDestToken(EHPad: FuncletPad, MemoMap&: *FuncletUnwindMap);
594 if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
595 continue;
596#ifndef NDEBUG
597 Instruction *MemoKey;
598 if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
599 MemoKey = CatchPad->getCatchSwitch();
600 else
601 MemoKey = FuncletPad;
602 assert(FuncletUnwindMap->count(MemoKey) &&
603 (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
604 "must get memoized to avoid confusing later searches");
605#endif // NDEBUG
606 }
607
608 changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
609 return BB;
610 }
611 return nullptr;
612}
613
614/// If we inlined an invoke site, we need to convert calls
615/// in the body of the inlined function into invokes.
616///
617/// II is the invoke instruction being inlined. FirstNewBlock is the first
618/// block of the inlined code (the last block is the end of the function),
619/// and InlineCodeInfo is information about the code that got inlined.
620static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
621 ClonedCodeInfo &InlinedCodeInfo) {
622 BasicBlock *InvokeDest = II->getUnwindDest();
623
624 Function *Caller = FirstNewBlock->getParent();
625
626 // The inlined code is currently at the end of the function, scan from the
627 // start of the inlined code to its end, checking for stuff we need to
628 // rewrite.
629 LandingPadInliningInfo Invoke(II);
630
631 // Get all of the inlined landing pad instructions.
632 SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
633 for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
634 I != E; ++I)
635 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: I->getTerminator()))
636 InlinedLPads.insert(Ptr: II->getLandingPadInst());
637
638 // Append the clauses from the outer landing pad instruction into the inlined
639 // landing pad instructions.
640 LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
641 for (LandingPadInst *InlinedLPad : InlinedLPads) {
642 unsigned OuterNum = OuterLPad->getNumClauses();
643 InlinedLPad->reserveClauses(Size: OuterNum);
644 for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
645 InlinedLPad->addClause(ClauseVal: OuterLPad->getClause(Idx: OuterIdx));
646 if (OuterLPad->isCleanup())
647 InlinedLPad->setCleanup(true);
648 }
649
650 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
651 BB != E; ++BB) {
652 if (InlinedCodeInfo.ContainsCalls)
653 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
654 BB: &*BB, UnwindEdge: Invoke.getOuterResumeDest()))
655 // Update any PHI nodes in the exceptional block to indicate that there
656 // is now a new entry in them.
657 Invoke.addIncomingPHIValuesFor(BB: NewBB);
658
659 // Forward any resumes that are remaining here.
660 if (ResumeInst *RI = dyn_cast<ResumeInst>(Val: BB->getTerminator()))
661 Invoke.forwardResume(RI, InlinedLPads);
662 }
663
664 // Now that everything is happy, we have one final detail. The PHI nodes in
665 // the exception destination block still have entries due to the original
666 // invoke instruction. Eliminate these entries (which might even delete the
667 // PHI node) now.
668 InvokeDest->removePredecessor(Pred: II->getParent());
669}
670
671/// If we inlined an invoke site, we need to convert calls
672/// in the body of the inlined function into invokes.
673///
674/// II is the invoke instruction being inlined. FirstNewBlock is the first
675/// block of the inlined code (the last block is the end of the function),
676/// and InlineCodeInfo is information about the code that got inlined.
677static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
678 ClonedCodeInfo &InlinedCodeInfo) {
679 BasicBlock *UnwindDest = II->getUnwindDest();
680 Function *Caller = FirstNewBlock->getParent();
681
682 assert(UnwindDest->getFirstNonPHIIt()->isEHPad() && "unexpected BasicBlock!");
683
684 // If there are PHI nodes in the unwind destination block, we need to keep
685 // track of which values came into them from the invoke before removing the
686 // edge from this block.
687 SmallVector<Value *, 8> UnwindDestPHIValues;
688 BasicBlock *InvokeBB = II->getParent();
689 for (PHINode &PHI : UnwindDest->phis()) {
690 // Save the value to use for this edge.
691 UnwindDestPHIValues.push_back(Elt: PHI.getIncomingValueForBlock(BB: InvokeBB));
692 }
693
694 // Add incoming-PHI values to the unwind destination block for the given basic
695 // block, using the values for the original invoke's source block.
696 auto UpdatePHINodes = [&](BasicBlock *Src) {
697 BasicBlock::iterator I = UnwindDest->begin();
698 for (Value *V : UnwindDestPHIValues) {
699 PHINode *PHI = cast<PHINode>(Val&: I);
700 PHI->addIncoming(V, BB: Src);
701 ++I;
702 }
703 };
704
705 // This connects all the instructions which 'unwind to caller' to the invoke
706 // destination.
707 UnwindDestMemoTy FuncletUnwindMap;
708 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
709 BB != E; ++BB) {
710 if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator())) {
711 if (CRI->unwindsToCaller()) {
712 auto *CleanupPad = CRI->getCleanupPad();
713 CleanupReturnInst::Create(CleanupPad, UnwindBB: UnwindDest, InsertBefore: CRI->getIterator());
714 CRI->eraseFromParent();
715 UpdatePHINodes(&*BB);
716 // Finding a cleanupret with an unwind destination would confuse
717 // subsequent calls to getUnwindDestToken, so map the cleanuppad
718 // to short-circuit any such calls and recognize this as an "unwind
719 // to caller" cleanup.
720 assert(!FuncletUnwindMap.count(CleanupPad) ||
721 isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
722 FuncletUnwindMap[CleanupPad] =
723 ConstantTokenNone::get(Context&: Caller->getContext());
724 }
725 }
726
727 BasicBlock::iterator I = BB->getFirstNonPHIIt();
728 if (!I->isEHPad())
729 continue;
730
731 Instruction *Replacement = nullptr;
732 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) {
733 if (CatchSwitch->unwindsToCaller()) {
734 Value *UnwindDestToken;
735 if (auto *ParentPad =
736 dyn_cast<Instruction>(Val: CatchSwitch->getParentPad())) {
737 // This catchswitch is nested inside another funclet. If that
738 // funclet has an unwind destination within the inlinee, then
739 // unwinding out of this catchswitch would be UB. Rewriting this
740 // catchswitch to unwind to the inlined invoke's unwind dest would
741 // give the parent funclet multiple unwind destinations, which is
742 // something that subsequent EH table generation can't handle and
743 // that the veirifer rejects. So when we see such a call, leave it
744 // as "unwind to caller".
745 UnwindDestToken = getUnwindDestToken(EHPad: ParentPad, MemoMap&: FuncletUnwindMap);
746 if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
747 continue;
748 } else {
749 // This catchswitch has no parent to inherit constraints from, and
750 // none of its descendants can have an unwind edge that exits it and
751 // targets another funclet in the inlinee. It may or may not have a
752 // descendant that definitively has an unwind to caller. In either
753 // case, we'll have to assume that any unwinds out of it may need to
754 // be routed to the caller, so treat it as though it has a definitive
755 // unwind to caller.
756 UnwindDestToken = ConstantTokenNone::get(Context&: Caller->getContext());
757 }
758 auto *NewCatchSwitch = CatchSwitchInst::Create(
759 ParentPad: CatchSwitch->getParentPad(), UnwindDest,
760 NumHandlers: CatchSwitch->getNumHandlers(), NameStr: CatchSwitch->getName(),
761 InsertBefore: CatchSwitch->getIterator());
762 for (BasicBlock *PadBB : CatchSwitch->handlers())
763 NewCatchSwitch->addHandler(Dest: PadBB);
764 // Propagate info for the old catchswitch over to the new one in
765 // the unwind map. This also serves to short-circuit any subsequent
766 // checks for the unwind dest of this catchswitch, which would get
767 // confused if they found the outer handler in the callee.
768 FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
769 Replacement = NewCatchSwitch;
770 }
771 } else if (!isa<FuncletPadInst>(Val: I)) {
772 llvm_unreachable("unexpected EHPad!");
773 }
774
775 if (Replacement) {
776 Replacement->takeName(V: &*I);
777 I->replaceAllUsesWith(V: Replacement);
778 I->eraseFromParent();
779 UpdatePHINodes(&*BB);
780 }
781 }
782
783 if (InlinedCodeInfo.ContainsCalls)
784 for (Function::iterator BB = FirstNewBlock->getIterator(),
785 E = Caller->end();
786 BB != E; ++BB)
787 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
788 BB: &*BB, UnwindEdge: UnwindDest, FuncletUnwindMap: &FuncletUnwindMap))
789 // Update any PHI nodes in the exceptional block to indicate that there
790 // is now a new entry in them.
791 UpdatePHINodes(NewBB);
792
793 // Now that everything is happy, we have one final detail. The PHI nodes in
794 // the exception destination block still have entries due to the original
795 // invoke instruction. Eliminate these entries (which might even delete the
796 // PHI node) now.
797 UnwindDest->removePredecessor(Pred: InvokeBB);
798}
799
800static bool haveCommonPrefix(MDNode *MIBStackContext,
801 MDNode *CallsiteStackContext) {
802 assert(MIBStackContext->getNumOperands() > 0 &&
803 CallsiteStackContext->getNumOperands() > 0);
804 // Because of the context trimming performed during matching, the callsite
805 // context could have more stack ids than the MIB. We match up to the end of
806 // the shortest stack context.
807 for (auto MIBStackIter = MIBStackContext->op_begin(),
808 CallsiteStackIter = CallsiteStackContext->op_begin();
809 MIBStackIter != MIBStackContext->op_end() &&
810 CallsiteStackIter != CallsiteStackContext->op_end();
811 MIBStackIter++, CallsiteStackIter++) {
812 auto *Val1 = mdconst::dyn_extract<ConstantInt>(MD: *MIBStackIter);
813 auto *Val2 = mdconst::dyn_extract<ConstantInt>(MD: *CallsiteStackIter);
814 assert(Val1 && Val2);
815 if (Val1->getZExtValue() != Val2->getZExtValue())
816 return false;
817 }
818 return true;
819}
820
821static void removeMemProfMetadata(CallBase *Call) {
822 Call->setMetadata(KindID: LLVMContext::MD_memprof, Node: nullptr);
823}
824
825static void removeCallsiteMetadata(CallBase *Call) {
826 Call->setMetadata(KindID: LLVMContext::MD_callsite, Node: nullptr);
827}
828
829static void updateMemprofMetadata(CallBase *CI,
830 const std::vector<Metadata *> &MIBList,
831 OptimizationRemarkEmitter *ORE) {
832 assert(!MIBList.empty());
833 // Remove existing memprof, which will either be replaced or may not be needed
834 // if we are able to use a single allocation type function attribute.
835 removeMemProfMetadata(Call: CI);
836 CallStackTrie CallStack(ORE);
837 for (Metadata *MIB : MIBList)
838 CallStack.addCallStack(MIB: cast<MDNode>(Val: MIB));
839 bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI);
840 assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof));
841 if (!MemprofMDAttached)
842 // If we used a function attribute remove the callsite metadata as well.
843 removeCallsiteMetadata(Call: CI);
844}
845
846// Update the metadata on the inlined copy ClonedCall of a call OrigCall in the
847// inlined callee body, based on the callsite metadata InlinedCallsiteMD from
848// the call that was inlined.
849static void propagateMemProfHelper(const CallBase *OrigCall,
850 CallBase *ClonedCall,
851 MDNode *InlinedCallsiteMD,
852 OptimizationRemarkEmitter *ORE) {
853 MDNode *OrigCallsiteMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_callsite);
854 MDNode *ClonedCallsiteMD = nullptr;
855 // Check if the call originally had callsite metadata, and update it for the
856 // new call in the inlined body.
857 if (OrigCallsiteMD) {
858 // The cloned call's context is now the concatenation of the original call's
859 // callsite metadata and the callsite metadata on the call where it was
860 // inlined.
861 ClonedCallsiteMD = MDNode::concatenate(A: OrigCallsiteMD, B: InlinedCallsiteMD);
862 ClonedCall->setMetadata(KindID: LLVMContext::MD_callsite, Node: ClonedCallsiteMD);
863 }
864
865 // Update any memprof metadata on the cloned call.
866 MDNode *OrigMemProfMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_memprof);
867 if (!OrigMemProfMD)
868 return;
869 // We currently expect that allocations with memprof metadata also have
870 // callsite metadata for the allocation's part of the context.
871 assert(OrigCallsiteMD);
872
873 // New call's MIB list.
874 std::vector<Metadata *> NewMIBList;
875
876 // For each MIB metadata, check if its call stack context starts with the
877 // new clone's callsite metadata. If so, that MIB goes onto the cloned call in
878 // the inlined body. If not, it stays on the out-of-line original call.
879 for (auto &MIBOp : OrigMemProfMD->operands()) {
880 MDNode *MIB = dyn_cast<MDNode>(Val: MIBOp);
881 // Stack is first operand of MIB.
882 MDNode *StackMD = getMIBStackNode(MIB);
883 assert(StackMD);
884 // See if the new cloned callsite context matches this profiled context.
885 if (haveCommonPrefix(MIBStackContext: StackMD, CallsiteStackContext: ClonedCallsiteMD))
886 // Add it to the cloned call's MIB list.
887 NewMIBList.push_back(x: MIB);
888 }
889 if (NewMIBList.empty()) {
890 removeMemProfMetadata(Call: ClonedCall);
891 removeCallsiteMetadata(Call: ClonedCall);
892 return;
893 }
894 if (NewMIBList.size() < OrigMemProfMD->getNumOperands())
895 updateMemprofMetadata(CI: ClonedCall, MIBList: NewMIBList, ORE);
896}
897
898// Update memprof related metadata (!memprof and !callsite) based on the
899// inlining of Callee into the callsite at CB. The updates include merging the
900// inlined callee's callsite metadata with that of the inlined call,
901// and moving the subset of any memprof contexts to the inlined callee
902// allocations if they match the new inlined call stack.
903static void
904propagateMemProfMetadata(Function *Callee, CallBase &CB,
905 bool ContainsMemProfMetadata,
906 const ValueMap<const Value *, WeakTrackingVH> &VMap,
907 OptimizationRemarkEmitter *ORE) {
908 MDNode *CallsiteMD = CB.getMetadata(KindID: LLVMContext::MD_callsite);
909 // Only need to update if the inlined callsite had callsite metadata, or if
910 // there was any memprof metadata inlined.
911 if (!CallsiteMD && !ContainsMemProfMetadata)
912 return;
913
914 // Propagate metadata onto the cloned calls in the inlined callee.
915 for (const auto &Entry : VMap) {
916 // See if this is a call that has been inlined and remapped, and not
917 // simplified away in the process.
918 auto *OrigCall = dyn_cast_or_null<CallBase>(Val: Entry.first);
919 auto *ClonedCall = dyn_cast_or_null<CallBase>(Val: Entry.second);
920 if (!OrigCall || !ClonedCall)
921 continue;
922 // If the inlined callsite did not have any callsite metadata, then it isn't
923 // involved in any profiled call contexts, and we can remove any memprof
924 // metadata on the cloned call.
925 if (!CallsiteMD) {
926 removeMemProfMetadata(Call: ClonedCall);
927 removeCallsiteMetadata(Call: ClonedCall);
928 continue;
929 }
930 propagateMemProfHelper(OrigCall, ClonedCall, InlinedCallsiteMD: CallsiteMD, ORE);
931 }
932}
933
934/// When inlining a call site that has !llvm.mem.parallel_loop_access,
935/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
936/// be propagated to all memory-accessing cloned instructions.
937static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
938 Function::iterator FEnd) {
939 MDNode *MemParallelLoopAccess =
940 CB.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
941 MDNode *AccessGroup = CB.getMetadata(KindID: LLVMContext::MD_access_group);
942 MDNode *AliasScope = CB.getMetadata(KindID: LLVMContext::MD_alias_scope);
943 MDNode *NoAlias = CB.getMetadata(KindID: LLVMContext::MD_noalias);
944 if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
945 return;
946
947 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
948 for (Instruction &I : BB) {
949 // This metadata is only relevant for instructions that access memory.
950 if (!I.mayReadOrWriteMemory())
951 continue;
952
953 if (MemParallelLoopAccess) {
954 // TODO: This probably should not overwrite MemParalleLoopAccess.
955 MemParallelLoopAccess = MDNode::concatenate(
956 A: I.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access),
957 B: MemParallelLoopAccess);
958 I.setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access,
959 Node: MemParallelLoopAccess);
960 }
961
962 if (AccessGroup)
963 I.setMetadata(KindID: LLVMContext::MD_access_group, Node: uniteAccessGroups(
964 AccGroups1: I.getMetadata(KindID: LLVMContext::MD_access_group), AccGroups2: AccessGroup));
965
966 if (AliasScope)
967 I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MDNode::concatenate(
968 A: I.getMetadata(KindID: LLVMContext::MD_alias_scope), B: AliasScope));
969
970 if (NoAlias)
971 I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::concatenate(
972 A: I.getMetadata(KindID: LLVMContext::MD_noalias), B: NoAlias));
973 }
974 }
975}
976
977/// Track inlining chain via inlined.from metadata for dontcall diagnostics.
978static void PropagateInlinedFromMetadata(CallBase &CB, StringRef CalledFuncName,
979 StringRef CallerFuncName,
980 Function::iterator FStart,
981 Function::iterator FEnd) {
982 LLVMContext &Ctx = CB.getContext();
983 uint64_t InlineSiteLoc = 0;
984 if (auto *MD = CB.getMetadata(Kind: "srcloc"))
985 if (auto *CI = mdconst::dyn_extract<ConstantInt>(MD: MD->getOperand(I: 0)))
986 InlineSiteLoc = CI->getZExtValue();
987
988 auto *I64Ty = Type::getInt64Ty(C&: Ctx);
989 auto MakeMDInt = [&](uint64_t V) {
990 return ConstantAsMetadata::get(C: ConstantInt::get(Ty: I64Ty, V));
991 };
992
993 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
994 for (Instruction &I : BB) {
995 auto *CI = dyn_cast<CallInst>(Val: &I);
996 if (!CI || !CI->getMetadata(Kind: "srcloc"))
997 continue;
998 auto *Callee = CI->getCalledFunction();
999 if (!Callee || (!Callee->hasFnAttribute(Kind: "dontcall-error") &&
1000 !Callee->hasFnAttribute(Kind: "dontcall-warn")))
1001 continue;
1002
1003 SmallVector<Metadata *, 8> Ops;
1004 if (MDNode *Existing = CI->getMetadata(Kind: "inlined.from"))
1005 append_range(C&: Ops, R: Existing->operands());
1006 else {
1007 Ops.push_back(Elt: MDString::get(Context&: Ctx, Str: CalledFuncName));
1008 Ops.push_back(Elt: MakeMDInt(0));
1009 }
1010 Ops.push_back(Elt: MDString::get(Context&: Ctx, Str: CallerFuncName));
1011 Ops.push_back(Elt: MakeMDInt(InlineSiteLoc));
1012 CI->setMetadata(Kind: "inlined.from", Node: MDNode::get(Context&: Ctx, MDs: Ops));
1013 }
1014 }
1015}
1016
1017/// Bundle operands of the inlined function must be added to inlined call sites.
1018static void PropagateOperandBundles(Function::iterator InlinedBB,
1019 Instruction *CallSiteEHPad) {
1020 for (Instruction &II : llvm::make_early_inc_range(Range&: *InlinedBB)) {
1021 CallBase *I = dyn_cast<CallBase>(Val: &II);
1022 if (!I)
1023 continue;
1024 // Skip call sites which already have a "funclet" bundle.
1025 if (I->getOperandBundle(ID: LLVMContext::OB_funclet))
1026 continue;
1027 // Skip call sites which are nounwind intrinsics (as long as they don't
1028 // lower into regular function calls in the course of IR transformations).
1029 auto *CalledFn =
1030 dyn_cast<Function>(Val: I->getCalledOperand()->stripPointerCasts());
1031 if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() &&
1032 !IntrinsicInst::mayLowerToFunctionCall(IID: CalledFn->getIntrinsicID()))
1033 continue;
1034
1035 SmallVector<OperandBundleDef, 1> OpBundles;
1036 I->getOperandBundlesAsDefs(Defs&: OpBundles);
1037 OpBundles.emplace_back(Args: "funclet", Args&: CallSiteEHPad);
1038
1039 Instruction *NewInst = CallBase::Create(CB: I, Bundles: OpBundles, InsertPt: I->getIterator());
1040 NewInst->takeName(V: I);
1041 I->replaceAllUsesWith(V: NewInst);
1042 I->eraseFromParent();
1043 }
1044}
1045
1046namespace {
1047/// Utility for cloning !noalias and !alias.scope metadata. When a code region
1048/// using scoped alias metadata is inlined, the aliasing relationships may not
1049/// hold between the two version. It is necessary to create a deep clone of the
1050/// metadata, putting the two versions in separate scope domains.
1051class ScopedAliasMetadataDeepCloner {
1052 using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;
1053 SetVector<const MDNode *> MD;
1054 MetadataMap MDMap;
1055 void addRecursiveMetadataUses();
1056
1057public:
1058 ScopedAliasMetadataDeepCloner(const Function *F);
1059
1060 /// Create a new clone of the scoped alias metadata, which will be used by
1061 /// subsequent remap() calls.
1062 void clone();
1063
1064 /// Remap instructions in the given range from the original to the cloned
1065 /// metadata.
1066 void remap(Function::iterator FStart, Function::iterator FEnd);
1067};
1068} // namespace
1069
1070ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
1071 const Function *F) {
1072 for (const BasicBlock &BB : *F) {
1073 for (const Instruction &I : BB) {
1074 if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1075 MD.insert(X: M);
1076 if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1077 MD.insert(X: M);
1078
1079 // We also need to clone the metadata in noalias intrinsics.
1080 if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1081 MD.insert(X: Decl->getScopeList());
1082 }
1083 }
1084 addRecursiveMetadataUses();
1085}
1086
1087void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {
1088 SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
1089 while (!Queue.empty()) {
1090 const MDNode *M = cast<MDNode>(Val: Queue.pop_back_val());
1091 for (const Metadata *Op : M->operands())
1092 if (const MDNode *OpMD = dyn_cast<MDNode>(Val: Op))
1093 if (MD.insert(X: OpMD))
1094 Queue.push_back(Elt: OpMD);
1095 }
1096}
1097
1098void ScopedAliasMetadataDeepCloner::clone() {
1099 assert(MDMap.empty() && "clone() already called ?");
1100
1101 SmallVector<TempMDTuple, 16> DummyNodes;
1102 for (const MDNode *I : MD) {
1103 DummyNodes.push_back(Elt: MDTuple::getTemporary(Context&: I->getContext(), MDs: {}));
1104 MDMap[I].reset(MD: DummyNodes.back().get());
1105 }
1106
1107 // Create new metadata nodes to replace the dummy nodes, replacing old
1108 // metadata references with either a dummy node or an already-created new
1109 // node.
1110 SmallVector<Metadata *, 4> NewOps;
1111 for (const MDNode *I : MD) {
1112 for (const Metadata *Op : I->operands()) {
1113 if (const MDNode *M = dyn_cast<MDNode>(Val: Op))
1114 NewOps.push_back(Elt: MDMap[M]);
1115 else
1116 NewOps.push_back(Elt: const_cast<Metadata *>(Op));
1117 }
1118
1119 MDNode *NewM = MDNode::get(Context&: I->getContext(), MDs: NewOps);
1120 MDTuple *TempM = cast<MDTuple>(Val&: MDMap[I]);
1121 assert(TempM->isTemporary() && "Expected temporary node");
1122
1123 TempM->replaceAllUsesWith(MD: NewM);
1124 NewOps.clear();
1125 }
1126}
1127
1128void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
1129 Function::iterator FEnd) {
1130 if (MDMap.empty())
1131 return; // Nothing to do.
1132
1133 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
1134 for (Instruction &I : BB) {
1135 // TODO: The null checks for the MDMap.lookup() results should no longer
1136 // be necessary.
1137 if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1138 if (MDNode *MNew = MDMap.lookup(Val: M))
1139 I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MNew);
1140
1141 if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1142 if (MDNode *MNew = MDMap.lookup(Val: M))
1143 I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MNew);
1144
1145 if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1146 if (MDNode *MNew = MDMap.lookup(Val: Decl->getScopeList()))
1147 Decl->setScopeList(MNew);
1148 }
1149 }
1150}
1151
1152/// If the inlined function has noalias arguments,
1153/// then add new alias scopes for each noalias argument, tag the mapped noalias
1154/// parameters with noalias metadata specifying the new scope, and tag all
1155/// non-derived loads, stores and memory intrinsics with the new alias scopes.
1156static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
1157 const DataLayout &DL, AAResults *CalleeAAR,
1158 ClonedCodeInfo &InlinedFunctionInfo) {
1159 if (!EnableNoAliasConversion)
1160 return;
1161
1162 const Function *CalledFunc = CB.getCalledFunction();
1163 SmallVector<const Argument *, 4> NoAliasArgs;
1164
1165 for (const Argument &Arg : CalledFunc->args())
1166 if (CB.paramHasAttr(ArgNo: Arg.getArgNo(), Kind: Attribute::NoAlias) && !Arg.use_empty())
1167 NoAliasArgs.push_back(Elt: &Arg);
1168
1169 if (NoAliasArgs.empty())
1170 return;
1171
1172 // To do a good job, if a noalias variable is captured, we need to know if
1173 // the capture point dominates the particular use we're considering.
1174 DominatorTree DT;
1175 DT.recalculate(Func&: const_cast<Function&>(*CalledFunc));
1176
1177 // noalias indicates that pointer values based on the argument do not alias
1178 // pointer values which are not based on it. So we add a new "scope" for each
1179 // noalias function argument. Accesses using pointers based on that argument
1180 // become part of that alias scope, accesses using pointers not based on that
1181 // argument are tagged as noalias with that scope.
1182
1183 DenseMap<const Argument *, MDNode *> NewScopes;
1184 MDBuilder MDB(CalledFunc->getContext());
1185
1186 // Create a new scope domain for this function.
1187 MDNode *NewDomain =
1188 MDB.createAnonymousAliasScopeDomain(Name: CalledFunc->getName());
1189 for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
1190 const Argument *A = NoAliasArgs[i];
1191
1192 std::string Name = std::string(CalledFunc->getName());
1193 if (A->hasName()) {
1194 Name += ": %";
1195 Name += A->getName();
1196 } else {
1197 Name += ": argument ";
1198 Name += utostr(X: i);
1199 }
1200
1201 // Note: We always create a new anonymous root here. This is true regardless
1202 // of the linkage of the callee because the aliasing "scope" is not just a
1203 // property of the callee, but also all control dependencies in the caller.
1204 MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name);
1205 NewScopes.insert(KV: std::make_pair(x&: A, y&: NewScope));
1206
1207 if (UseNoAliasIntrinsic) {
1208 // Introduce a llvm.experimental.noalias.scope.decl for the noalias
1209 // argument.
1210 MDNode *AScopeList = MDNode::get(Context&: CalledFunc->getContext(), MDs: NewScope);
1211 auto *NoAliasDecl =
1212 IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(ScopeTag: AScopeList);
1213 // Ignore the result for now. The result will be used when the
1214 // llvm.noalias intrinsic is introduced.
1215 (void)NoAliasDecl;
1216 }
1217 }
1218
1219 // Iterate over all new instructions in the map; for all memory-access
1220 // instructions, add the alias scope metadata.
1221 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
1222 VMI != VMIE; ++VMI) {
1223 if (const Instruction *I = dyn_cast<Instruction>(Val: VMI->first)) {
1224 if (!VMI->second)
1225 continue;
1226
1227 Instruction *NI = dyn_cast<Instruction>(Val&: VMI->second);
1228 if (!NI || InlinedFunctionInfo.isSimplified(From: I, To: NI))
1229 continue;
1230
1231 bool IsArgMemOnlyCall = false, IsFuncCall = false;
1232 SmallVector<const Value *, 2> PtrArgs;
1233
1234 if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I))
1235 PtrArgs.push_back(Elt: LI->getPointerOperand());
1236 else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1237 PtrArgs.push_back(Elt: SI->getPointerOperand());
1238 else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(Val: I))
1239 PtrArgs.push_back(Elt: VAAI->getPointerOperand());
1240 else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Val: I))
1241 PtrArgs.push_back(Elt: CXI->getPointerOperand());
1242 else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Val: I))
1243 PtrArgs.push_back(Elt: RMWI->getPointerOperand());
1244 else if (const auto *Call = dyn_cast<CallBase>(Val: I)) {
1245 // If we know that the call does not access memory, then we'll still
1246 // know that about the inlined clone of this call site, and we don't
1247 // need to add metadata.
1248 if (Call->doesNotAccessMemory())
1249 continue;
1250
1251 IsFuncCall = true;
1252 if (CalleeAAR) {
1253 MemoryEffects ME = CalleeAAR->getMemoryEffects(Call);
1254
1255 // We'll retain this knowledge without additional metadata.
1256 if (ME.onlyAccessesInaccessibleMem())
1257 continue;
1258
1259 if (ME.onlyAccessesArgPointees())
1260 IsArgMemOnlyCall = true;
1261 }
1262
1263 for (Value *Arg : Call->args()) {
1264 // Only care about pointer arguments. If a noalias argument is
1265 // accessed through a non-pointer argument, it must be captured
1266 // first (e.g. via ptrtoint), and we protect against captures below.
1267 if (!Arg->getType()->isPointerTy())
1268 continue;
1269
1270 PtrArgs.push_back(Elt: Arg);
1271 }
1272 }
1273
1274 // If we found no pointers, then this instruction is not suitable for
1275 // pairing with an instruction to receive aliasing metadata.
1276 // However, if this is a call, this we might just alias with none of the
1277 // noalias arguments.
1278 if (PtrArgs.empty() && !IsFuncCall)
1279 continue;
1280
1281 // It is possible that there is only one underlying object, but you
1282 // need to go through several PHIs to see it, and thus could be
1283 // repeated in the Objects list.
1284 SmallPtrSet<const Value *, 4> ObjSet;
1285 SmallVector<Metadata *, 4> Scopes, NoAliases;
1286
1287 for (const Value *V : PtrArgs) {
1288 SmallVector<const Value *, 4> Objects;
1289 getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
1290
1291 ObjSet.insert_range(R&: Objects);
1292 }
1293
1294 // Figure out if we're derived from anything that is not a noalias
1295 // argument.
1296 bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false,
1297 UsesUnknownObject = false;
1298 for (const Value *V : ObjSet) {
1299 // Is this value a constant that cannot be derived from any pointer
1300 // value (we need to exclude constant expressions, for example, that
1301 // are formed from arithmetic on global symbols).
1302 bool IsNonPtrConst = isa<ConstantInt>(Val: V) || isa<ConstantFP>(Val: V) ||
1303 isa<ConstantPointerNull>(Val: V) ||
1304 isa<ConstantDataVector>(Val: V) || isa<UndefValue>(Val: V);
1305 if (IsNonPtrConst)
1306 continue;
1307
1308 // If this is anything other than a noalias argument, then we cannot
1309 // completely describe the aliasing properties using alias.scope
1310 // metadata (and, thus, won't add any).
1311 if (const Argument *A = dyn_cast<Argument>(Val: V)) {
1312 if (!CB.paramHasAttr(ArgNo: A->getArgNo(), Kind: Attribute::NoAlias))
1313 UsesAliasingPtr = true;
1314 } else {
1315 UsesAliasingPtr = true;
1316 }
1317
1318 if (isEscapeSource(V)) {
1319 // An escape source can only alias with a noalias argument if it has
1320 // been captured beforehand.
1321 RequiresNoCaptureBefore = true;
1322 } else if (!isa<Argument>(Val: V) && !isIdentifiedObject(V)) {
1323 // If this is neither an escape source, nor some identified object
1324 // (which cannot directly alias a noalias argument), nor some other
1325 // argument (which, by definition, also cannot alias a noalias
1326 // argument), conservatively do not make any assumptions.
1327 UsesUnknownObject = true;
1328 }
1329 }
1330
1331 // Nothing we can do if the used underlying object cannot be reliably
1332 // determined.
1333 if (UsesUnknownObject)
1334 continue;
1335
1336 // A function call can always get captured noalias pointers (via other
1337 // parameters, globals, etc.).
1338 if (IsFuncCall && !IsArgMemOnlyCall)
1339 RequiresNoCaptureBefore = true;
1340
1341 // First, we want to figure out all of the sets with which we definitely
1342 // don't alias. Iterate over all noalias set, and add those for which:
1343 // 1. The noalias argument is not in the set of objects from which we
1344 // definitely derive.
1345 // 2. The noalias argument has not yet been captured.
1346 // An arbitrary function that might load pointers could see captured
1347 // noalias arguments via other noalias arguments or globals, and so we
1348 // must always check for prior capture.
1349 for (const Argument *A : NoAliasArgs) {
1350 if (ObjSet.contains(Ptr: A))
1351 continue; // May be based on a noalias argument.
1352
1353 // It might be tempting to skip the PointerMayBeCapturedBefore check if
1354 // A->hasNoCaptureAttr() is true, but this is incorrect because
1355 // nocapture only guarantees that no copies outlive the function, not
1356 // that the value cannot be locally captured.
1357 if (!RequiresNoCaptureBefore ||
1358 !capturesAnything(CC: PointerMayBeCapturedBefore(
1359 V: A, /*ReturnCaptures=*/false, I, DT: &DT, /*IncludeI=*/false,
1360 Mask: CaptureComponents::Provenance)))
1361 NoAliases.push_back(Elt: NewScopes[A]);
1362 }
1363
1364 if (!NoAliases.empty())
1365 NI->setMetadata(KindID: LLVMContext::MD_noalias,
1366 Node: MDNode::concatenate(
1367 A: NI->getMetadata(KindID: LLVMContext::MD_noalias),
1368 B: MDNode::get(Context&: CalledFunc->getContext(), MDs: NoAliases)));
1369
1370 // Next, we want to figure out all of the sets to which we might belong.
1371 // We might belong to a set if the noalias argument is in the set of
1372 // underlying objects. If there is some non-noalias argument in our list
1373 // of underlying objects, then we cannot add a scope because the fact
1374 // that some access does not alias with any set of our noalias arguments
1375 // cannot itself guarantee that it does not alias with this access
1376 // (because there is some pointer of unknown origin involved and the
1377 // other access might also depend on this pointer). We also cannot add
1378 // scopes to arbitrary functions unless we know they don't access any
1379 // non-parameter pointer-values.
1380 bool CanAddScopes = !UsesAliasingPtr;
1381 if (CanAddScopes && IsFuncCall)
1382 CanAddScopes = IsArgMemOnlyCall;
1383
1384 if (CanAddScopes)
1385 for (const Argument *A : NoAliasArgs) {
1386 if (ObjSet.count(Ptr: A))
1387 Scopes.push_back(Elt: NewScopes[A]);
1388 }
1389
1390 if (!Scopes.empty())
1391 NI->setMetadata(
1392 KindID: LLVMContext::MD_alias_scope,
1393 Node: MDNode::concatenate(A: NI->getMetadata(KindID: LLVMContext::MD_alias_scope),
1394 B: MDNode::get(Context&: CalledFunc->getContext(), MDs: Scopes)));
1395 }
1396 }
1397}
1398
1399static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
1400 ReturnInst *End) {
1401
1402 assert(Begin->getParent() == End->getParent() &&
1403 "Expected to be in same basic block!");
1404 auto BeginIt = Begin->getIterator();
1405 assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator");
1406 return !llvm::isGuaranteedToTransferExecutionToSuccessor(
1407 Begin: ++BeginIt, End: End->getIterator(), ScanLimit: InlinerAttributeWindow + 1);
1408}
1409
1410// Add attributes from CB params and Fn attributes that can always be propagated
1411// to the corresponding argument / inner callbases.
1412static void AddParamAndFnBasicAttributes(const CallBase &CB,
1413 ValueToValueMapTy &VMap,
1414 ClonedCodeInfo &InlinedFunctionInfo) {
1415 auto *CalledFunction = CB.getCalledFunction();
1416 auto &Context = CalledFunction->getContext();
1417
1418 // Collect valid attributes for all params.
1419 SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
1420 bool HasAttrToPropagate = false;
1421
1422 // Attributes we can only propagate if the exact parameter is forwarded.
1423 // We can propagate both poison generating and UB generating attributes
1424 // without any extra checks. The only attribute that is tricky to propagate
1425 // is `noundef` (skipped for now) as that can create new UB where previous
1426 // behavior was just using a poison value.
1427 static const Attribute::AttrKind ExactAttrsToPropagate[] = {
1428 Attribute::Dereferenceable, Attribute::DereferenceableOrNull,
1429 Attribute::NonNull, Attribute::NoFPClass,
1430 Attribute::Alignment, Attribute::Range};
1431
1432 for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
1433 ValidObjParamAttrs.emplace_back(Args: AttrBuilder{CB.getContext()});
1434 ValidExactParamAttrs.emplace_back(Args: AttrBuilder{CB.getContext()});
1435 // Access attributes can be propagated to any param with the same underlying
1436 // object as the argument.
1437 if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadNone))
1438 ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadNone);
1439 if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadOnly))
1440 ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadOnly);
1441
1442 for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
1443 Attribute Attr = CB.getParamAttr(ArgNo: I, Kind: AK);
1444 if (Attr.isValid())
1445 ValidExactParamAttrs.back().addAttribute(A: Attr);
1446 }
1447
1448 HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes();
1449 HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes();
1450 }
1451
1452 // Won't be able to propagate anything.
1453 if (!HasAttrToPropagate)
1454 return;
1455
1456 for (BasicBlock &BB : *CalledFunction) {
1457 for (Instruction &Ins : BB) {
1458 const auto *InnerCB = dyn_cast<CallBase>(Val: &Ins);
1459 if (!InnerCB)
1460 continue;
1461 auto *NewInnerCB = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: InnerCB));
1462 if (!NewInnerCB)
1463 continue;
1464 // The InnerCB might have be simplified during the inlining
1465 // process which can make propagation incorrect.
1466 if (InlinedFunctionInfo.isSimplified(From: InnerCB, To: NewInnerCB))
1467 continue;
1468
1469 AttributeList AL = NewInnerCB->getAttributes();
1470 for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
1471 // It's unsound or requires special handling to propagate
1472 // attributes to byval arguments. Even if CalledFunction
1473 // doesn't e.g. write to the argument (readonly), the call to
1474 // NewInnerCB may write to its by-value copy.
1475 if (NewInnerCB->paramHasAttr(ArgNo: I, Kind: Attribute::ByVal))
1476 continue;
1477
1478 // Don't bother propagating attrs to constants.
1479 if (match(V: NewInnerCB->getArgOperand(i: I),
1480 P: llvm::PatternMatch::m_ImmConstant()))
1481 continue;
1482
1483 // Check if the underlying value for the parameter is an argument.
1484 const Argument *Arg = dyn_cast<Argument>(Val: InnerCB->getArgOperand(i: I));
1485 unsigned ArgNo;
1486 if (Arg) {
1487 ArgNo = Arg->getArgNo();
1488 // For dereferenceable, dereferenceable_or_null, align, etc...
1489 // we don't want to propagate if the existing param has the same
1490 // attribute with "better" constraints. So remove from the
1491 // new AL if the region of the existing param is larger than
1492 // what we can propagate.
1493 AttrBuilder NewAB{
1494 Context, AttributeSet::get(C&: Context, B: ValidExactParamAttrs[ArgNo])};
1495 if (AL.getParamDereferenceableBytes(Index: I) >
1496 NewAB.getDereferenceableBytes())
1497 NewAB.removeAttribute(Val: Attribute::Dereferenceable);
1498 if (AL.getParamDereferenceableOrNullBytes(ArgNo: I) >
1499 NewAB.getDereferenceableOrNullBytes())
1500 NewAB.removeAttribute(Val: Attribute::DereferenceableOrNull);
1501 if (AL.getParamAlignment(ArgNo: I).valueOrOne() >
1502 NewAB.getAlignment().valueOrOne())
1503 NewAB.removeAttribute(Val: Attribute::Alignment);
1504 if (auto ExistingRange = AL.getParamRange(ArgNo: I)) {
1505 if (auto NewRange = NewAB.getRange()) {
1506 ConstantRange CombinedRange =
1507 ExistingRange->intersectWith(CR: *NewRange);
1508 NewAB.removeAttribute(Val: Attribute::Range);
1509 NewAB.addRangeAttr(CR: CombinedRange);
1510 }
1511 }
1512
1513 if (FPClassTest ExistingNoFP = AL.getParamNoFPClass(ArgNo: I))
1514 NewAB.addNoFPClassAttr(NoFPClassMask: ExistingNoFP | NewAB.getNoFPClass());
1515
1516 AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: NewAB);
1517 } else if (NewInnerCB->getArgOperand(i: I)->getType()->isPointerTy()) {
1518 // Check if the underlying value for the parameter is an argument.
1519 const Value *UnderlyingV =
1520 getUnderlyingObject(V: InnerCB->getArgOperand(i: I));
1521 Arg = dyn_cast<Argument>(Val: UnderlyingV);
1522 if (!Arg)
1523 continue;
1524 ArgNo = Arg->getArgNo();
1525 } else {
1526 continue;
1527 }
1528
1529 // If so, propagate its access attributes.
1530 AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: ValidObjParamAttrs[ArgNo]);
1531
1532 // We can have conflicting attributes from the inner callsite and
1533 // to-be-inlined callsite. In that case, choose the most
1534 // restrictive.
1535
1536 // readonly + writeonly means we can never deref so make readnone.
1537 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) &&
1538 AL.hasParamAttr(ArgNo: I, Kind: Attribute::WriteOnly))
1539 AL = AL.addParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadNone);
1540
1541 // If have readnone, need to clear readonly/writeonly
1542 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone)) {
1543 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadOnly);
1544 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::WriteOnly);
1545 }
1546
1547 // Writable cannot exist in conjunction w/ readonly/readnone
1548 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) ||
1549 AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone))
1550 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::Writable);
1551 }
1552 NewInnerCB->setAttributes(AL);
1553 }
1554 }
1555}
1556
1557// Only allow these white listed attributes to be propagated back to the
1558// callee. This is because other attributes may only be valid on the call
1559// itself, i.e. attributes such as signext and zeroext.
1560
1561// Attributes that are always okay to propagate as if they are violated its
1562// immediate UB.
1563static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) {
1564 AttrBuilder Valid(CB.getContext());
1565 if (auto DerefBytes = CB.getRetDereferenceableBytes())
1566 Valid.addDereferenceableAttr(Bytes: DerefBytes);
1567 if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes())
1568 Valid.addDereferenceableOrNullAttr(Bytes: DerefOrNullBytes);
1569 if (CB.hasRetAttr(Kind: Attribute::NoAlias))
1570 Valid.addAttribute(Val: Attribute::NoAlias);
1571 if (CB.hasRetAttr(Kind: Attribute::NoUndef))
1572 Valid.addAttribute(Val: Attribute::NoUndef);
1573 return Valid;
1574}
1575
1576// Attributes that need additional checks as propagating them may change
1577// behavior or cause new UB.
1578static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {
1579 AttrBuilder Valid(CB.getContext());
1580 if (CB.hasRetAttr(Kind: Attribute::NonNull))
1581 Valid.addAttribute(Val: Attribute::NonNull);
1582 if (CB.hasRetAttr(Kind: Attribute::Alignment))
1583 Valid.addAlignmentAttr(Align: CB.getRetAlign());
1584 if (std::optional<ConstantRange> Range = CB.getRange())
1585 Valid.addRangeAttr(CR: *Range);
1586 if (CB.hasRetAttr(Kind: Attribute::NoFPClass))
1587 Valid.addNoFPClassAttr(NoFPClassMask: CB.getRetNoFPClass());
1588 return Valid;
1589}
1590
1591static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap,
1592 ClonedCodeInfo &InlinedFunctionInfo) {
1593 AttrBuilder CallSiteValidUB = IdentifyValidUBGeneratingAttributes(CB);
1594 AttrBuilder CallSiteValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
1595 if (!CallSiteValidUB.hasAttributes() && !CallSiteValidPG.hasAttributes())
1596 return;
1597 auto *CalledFunction = CB.getCalledFunction();
1598 auto &Context = CalledFunction->getContext();
1599
1600 for (auto &BB : *CalledFunction) {
1601 auto *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator());
1602 if (!RI || !isa<CallBase>(Val: RI->getOperand(i_nocapture: 0)))
1603 continue;
1604 auto *RetVal = cast<CallBase>(Val: RI->getOperand(i_nocapture: 0));
1605 // Check that the cloned RetVal exists and is a call, otherwise we cannot
1606 // add the attributes on the cloned RetVal. Simplification during inlining
1607 // could have transformed the cloned instruction.
1608 auto *NewRetVal = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: RetVal));
1609 if (!NewRetVal)
1610 continue;
1611
1612 // The RetVal might have be simplified during the inlining
1613 // process which can make propagation incorrect.
1614 if (InlinedFunctionInfo.isSimplified(From: RetVal, To: NewRetVal))
1615 continue;
1616 // Backward propagation of attributes to the returned value may be incorrect
1617 // if it is control flow dependent.
1618 // Consider:
1619 // @callee {
1620 // %rv = call @foo()
1621 // %rv2 = call @bar()
1622 // if (%rv2 != null)
1623 // return %rv2
1624 // if (%rv == null)
1625 // exit()
1626 // return %rv
1627 // }
1628 // caller() {
1629 // %val = call nonnull @callee()
1630 // }
1631 // Here we cannot add the nonnull attribute on either foo or bar. So, we
1632 // limit the check to both RetVal and RI are in the same basic block and
1633 // there are no throwing/exiting instructions between these instructions.
1634 if (RI->getParent() != RetVal->getParent() ||
1635 MayContainThrowingOrExitingCallAfterCB(Begin: RetVal, End: RI))
1636 continue;
1637 // Add to the existing attributes of NewRetVal, i.e. the cloned call
1638 // instruction.
1639 // NB! When we have the same attribute already existing on NewRetVal, but
1640 // with a differing value, the AttributeList's merge API honours the already
1641 // existing attribute value (i.e. attributes such as dereferenceable,
1642 // dereferenceable_or_null etc). See AttrBuilder::merge for more details.
1643 AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);
1644 AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
1645 AttributeList AL = NewRetVal->getAttributes();
1646 if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes())
1647 ValidUB.removeAttribute(Val: Attribute::Dereferenceable);
1648 if (ValidUB.getDereferenceableOrNullBytes() <
1649 AL.getRetDereferenceableOrNullBytes())
1650 ValidUB.removeAttribute(Val: Attribute::DereferenceableOrNull);
1651 AttributeList NewAL = AL.addRetAttributes(C&: Context, B: ValidUB);
1652 // Attributes that may generate poison returns are a bit tricky. If we
1653 // propagate them, other uses of the callsite might have their behavior
1654 // change or cause UB (if they have noundef) b.c of the new potential
1655 // poison.
1656 // Take the following three cases:
1657 //
1658 // 1)
1659 // define nonnull ptr @foo() {
1660 // %p = call ptr @bar()
1661 // call void @use(ptr %p) willreturn nounwind
1662 // ret ptr %p
1663 // }
1664 //
1665 // 2)
1666 // define noundef nonnull ptr @foo() {
1667 // %p = call ptr @bar()
1668 // call void @use(ptr %p) willreturn nounwind
1669 // ret ptr %p
1670 // }
1671 //
1672 // 3)
1673 // define nonnull ptr @foo() {
1674 // %p = call noundef ptr @bar()
1675 // ret ptr %p
1676 // }
1677 //
1678 // In case 1, we can't propagate nonnull because poison value in @use may
1679 // change behavior or trigger UB.
1680 // In case 2, we don't need to be concerned about propagating nonnull, as
1681 // any new poison at @use will trigger UB anyways.
1682 // In case 3, we can never propagate nonnull because it may create UB due to
1683 // the noundef on @bar.
1684 if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne())
1685 ValidPG.removeAttribute(Val: Attribute::Alignment);
1686 if (ValidPG.hasAttributes()) {
1687 Attribute CBRange = ValidPG.getAttribute(Kind: Attribute::Range);
1688 if (CBRange.isValid()) {
1689 Attribute NewRange = AL.getRetAttr(Kind: Attribute::Range);
1690 if (NewRange.isValid()) {
1691 ValidPG.addRangeAttr(
1692 CR: CBRange.getRange().intersectWith(CR: NewRange.getRange()));
1693 }
1694 }
1695
1696 Attribute CBNoFPClass = ValidPG.getAttribute(Kind: Attribute::NoFPClass);
1697 if (CBNoFPClass.isValid() && AL.hasRetAttr(Kind: Attribute::NoFPClass)) {
1698 ValidPG.addNoFPClassAttr(
1699 NoFPClassMask: CBNoFPClass.getNoFPClass() |
1700 AL.getRetAttr(Kind: Attribute::NoFPClass).getNoFPClass());
1701 }
1702
1703 // Three checks.
1704 // If the callsite has `noundef`, then a poison due to violating the
1705 // return attribute will create UB anyways so we can always propagate.
1706 // Otherwise, if the return value (callee to be inlined) has `noundef`, we
1707 // can't propagate as a new poison return will cause UB.
1708 // Finally, check if the return value has no uses whose behavior may
1709 // change/may cause UB if we potentially return poison. At the moment this
1710 // is implemented overly conservatively with a single-use check.
1711 // TODO: Update the single-use check to iterate through uses and only bail
1712 // if we have a potentially dangerous use.
1713
1714 if (CB.hasRetAttr(Kind: Attribute::NoUndef) ||
1715 (RetVal->hasOneUse() && !RetVal->hasRetAttr(Kind: Attribute::NoUndef)))
1716 NewAL = NewAL.addRetAttributes(C&: Context, B: ValidPG);
1717 }
1718 NewRetVal->setAttributes(NewAL);
1719 }
1720}
1721
1722/// If the inlined function has non-byval align arguments, then
1723/// add @llvm.assume-based alignment assumptions to preserve this information.
1724static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
1725 if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
1726 return;
1727
1728 AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller());
1729 auto &DL = CB.getDataLayout();
1730
1731 // To avoid inserting redundant assumptions, we should check for assumptions
1732 // already in the caller. To do this, we might need a DT of the caller.
1733 DominatorTree DT;
1734 bool DTCalculated = false;
1735
1736 Function *CalledFunc = CB.getCalledFunction();
1737 for (Argument &Arg : CalledFunc->args()) {
1738 if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() ||
1739 Arg.use_empty())
1740 continue;
1741 MaybeAlign Alignment = Arg.getParamAlign();
1742 if (!Alignment)
1743 continue;
1744
1745 if (!DTCalculated) {
1746 DT.recalculate(Func&: *CB.getCaller());
1747 DTCalculated = true;
1748 }
1749 // If we can already prove the asserted alignment in the context of the
1750 // caller, then don't bother inserting the assumption.
1751 Value *ArgVal = CB.getArgOperand(i: Arg.getArgNo());
1752 if (getKnownAlignment(V: ArgVal, DL, CxtI: &CB, AC, DT: &DT) >= *Alignment)
1753 continue;
1754
1755 CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption(
1756 DL, PtrValue: ArgVal, Alignment: Alignment->value());
1757 AC->registerAssumption(CI: cast<AssumeInst>(Val: NewAsmp));
1758 }
1759}
1760
1761static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
1762 MaybeAlign SrcAlign, Module *M,
1763 BasicBlock *InsertBlock,
1764 InlineFunctionInfo &IFI,
1765 Function *CalledFunc) {
1766 IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
1767
1768 Value *Size =
1769 Builder.getInt64(C: M->getDataLayout().getTypeStoreSize(Ty: ByValType));
1770
1771 Align DstAlign = Dst->getPointerAlignment(DL: M->getDataLayout());
1772
1773 // Generate a memcpy with the correct alignments.
1774 CallInst *CI = Builder.CreateMemCpy(Dst, DstAlign, Src, SrcAlign, Size);
1775
1776 // The verifier requires that all calls of debug-info-bearing functions
1777 // from debug-info-bearing functions have a debug location (for inlining
1778 // purposes). Assign a dummy location to satisfy the constraint.
1779 if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram())
1780 if (DISubprogram *SP = CalledFunc->getSubprogram())
1781 CI->setDebugLoc(DILocation::get(Context&: SP->getContext(), Line: 0, Column: 0, Scope: SP));
1782}
1783
1784/// When inlining a call site that has a byval argument,
1785/// we have to make the implicit memcpy explicit by adding it.
1786static Value *HandleByValArgument(Type *ByValType, Value *Arg,
1787 Instruction *TheCall,
1788 const Function *CalledFunc,
1789 InlineFunctionInfo &IFI,
1790 MaybeAlign ByValAlignment) {
1791 Function *Caller = TheCall->getFunction();
1792 const DataLayout &DL = Caller->getDataLayout();
1793
1794 // If the called function is readonly, then it could not mutate the caller's
1795 // copy of the byval'd memory. In this case, it is safe to elide the copy and
1796 // temporary.
1797 if (CalledFunc->onlyReadsMemory()) {
1798 // If the byval argument has a specified alignment that is greater than the
1799 // passed in pointer, then we either have to round up the input pointer or
1800 // give up on this transformation.
1801 if (ByValAlignment.valueOrOne() == 1)
1802 return Arg;
1803
1804 AssumptionCache *AC =
1805 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
1806
1807 // If the pointer is already known to be sufficiently aligned, or if we can
1808 // round it up to a larger alignment, then we don't need a temporary.
1809 if (getOrEnforceKnownAlignment(V: Arg, PrefAlign: *ByValAlignment, DL, CxtI: TheCall, AC) >=
1810 *ByValAlignment)
1811 return Arg;
1812
1813 // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
1814 // for code quality, but rarely happens and is required for correctness.
1815 }
1816
1817 // Create the alloca. If we have DataLayout, use nice alignment.
1818 Align Alignment = DL.getPrefTypeAlign(Ty: ByValType);
1819
1820 // If the byval had an alignment specified, we *must* use at least that
1821 // alignment, as it is required by the byval argument (and uses of the
1822 // pointer inside the callee).
1823 if (ByValAlignment)
1824 Alignment = std::max(a: Alignment, b: *ByValAlignment);
1825
1826 AllocaInst *NewAlloca =
1827 new AllocaInst(ByValType, Arg->getType()->getPointerAddressSpace(),
1828 nullptr, Alignment, Arg->getName());
1829 NewAlloca->setDebugLoc(DebugLoc::getCompilerGenerated());
1830 NewAlloca->insertBefore(InsertPos: Caller->begin()->begin());
1831 IFI.StaticAllocas.push_back(Elt: NewAlloca);
1832
1833 // Uses of the argument in the function should use our new alloca
1834 // instead.
1835 return NewAlloca;
1836}
1837
1838// Check whether this Value is used by a lifetime intrinsic.
1839static bool isUsedByLifetimeMarker(Value *V) {
1840 for (User *U : V->users())
1841 if (isa<LifetimeIntrinsic>(Val: U))
1842 return true;
1843 return false;
1844}
1845
1846// Check whether the given alloca already has
1847// lifetime.start or lifetime.end intrinsics.
1848static bool hasLifetimeMarkers(AllocaInst *AI) {
1849 Type *Ty = AI->getType();
1850 Type *Int8PtrTy =
1851 PointerType::get(C&: Ty->getContext(), AddressSpace: Ty->getPointerAddressSpace());
1852 if (Ty == Int8PtrTy)
1853 return isUsedByLifetimeMarker(V: AI);
1854
1855 // Do a scan to find all the casts to i8*.
1856 for (User *U : AI->users()) {
1857 if (U->getType() != Int8PtrTy) continue;
1858 if (U->stripPointerCasts() != AI) continue;
1859 if (isUsedByLifetimeMarker(V: U))
1860 return true;
1861 }
1862 return false;
1863}
1864
1865/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
1866/// block. Allocas used in inalloca calls and allocas of dynamic array size
1867/// cannot be static.
1868static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
1869 return isa<Constant>(Val: AI->getArraySize()) && !AI->isUsedWithInAlloca();
1870}
1871
1872/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
1873/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
1874static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
1875 LLVMContext &Ctx,
1876 DenseMap<const MDNode *, MDNode *> &IANodes) {
1877 auto IA = DebugLoc::appendInlinedAt(DL: OrigDL, InlinedAt, Ctx, Cache&: IANodes);
1878 return DILocation::get(Context&: Ctx, Line: OrigDL.getLine(), Column: OrigDL.getCol(),
1879 Scope: OrigDL.getScope(), InlinedAt: IA, ImplicitCode: OrigDL.isImplicitCode(),
1880 AtomGroup: OrigDL->getAtomGroup(), AtomRank: OrigDL->getAtomRank());
1881}
1882
1883/// Update inlined instructions' line numbers to
1884/// to encode location where these instructions are inlined.
1885static void fixupLineNumbers(Function *Fn, Function::iterator FI,
1886 Instruction *TheCall, bool CalleeHasDebugInfo) {
1887 if (!TheCall->getDebugLoc())
1888 return;
1889
1890 // Don't propagate the source location atom from the call to inlined nodebug
1891 // instructions, and avoid putting it in the InlinedAt field of inlined
1892 // not-nodebug instructions. FIXME: Possibly worth transferring/generating
1893 // an atom for the returned value, otherwise we miss stepping on inlined
1894 // nodebug functions (which is different to existing behaviour).
1895 DebugLoc TheCallDL = TheCall->getDebugLoc()->getWithoutAtom();
1896
1897 auto &Ctx = Fn->getContext();
1898 DILocation *InlinedAtNode = TheCallDL;
1899
1900 // Create a unique call site, not to be confused with any other call from the
1901 // same location.
1902 InlinedAtNode = DILocation::getDistinct(
1903 Context&: Ctx, Line: InlinedAtNode->getLine(), Column: InlinedAtNode->getColumn(),
1904 Scope: InlinedAtNode->getScope(), InlinedAt: InlinedAtNode->getInlinedAt());
1905
1906 // Cache the inlined-at nodes as they're built so they are reused, without
1907 // this every instruction's inlined-at chain would become distinct from each
1908 // other.
1909 DenseMap<const MDNode *, MDNode *> IANodes;
1910
1911 // Check if we are not generating inline line tables and want to use
1912 // the call site location instead.
1913 bool NoInlineLineTables = Fn->hasFnAttribute(Kind: "no-inline-line-tables");
1914
1915 // Helper-util for updating the metadata attached to an instruction.
1916 auto UpdateInst = [&](Instruction &I) {
1917 // Loop metadata needs to be updated so that the start and end locs
1918 // reference inlined-at locations.
1919 auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
1920 &IANodes](Metadata *MD) -> Metadata * {
1921 if (auto *Loc = dyn_cast_or_null<DILocation>(Val: MD))
1922 return inlineDebugLoc(OrigDL: Loc, InlinedAt: InlinedAtNode, Ctx, IANodes).get();
1923 return MD;
1924 };
1925 updateLoopMetadataDebugLocations(I, Updater: updateLoopInfoLoc);
1926
1927 if (!NoInlineLineTables)
1928 if (DebugLoc DL = I.getDebugLoc()) {
1929 DebugLoc IDL =
1930 inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode, Ctx&: I.getContext(), IANodes);
1931 I.setDebugLoc(IDL);
1932 return;
1933 }
1934
1935 if (CalleeHasDebugInfo && !NoInlineLineTables)
1936 return;
1937
1938 // If the inlined instruction has no line number, or if inline info
1939 // is not being generated, make it look as if it originates from the call
1940 // location. This is important for ((__always_inline, __nodebug__))
1941 // functions which must use caller location for all instructions in their
1942 // function body.
1943
1944 // Don't update static allocas, as they may get moved later.
1945 if (auto *AI = dyn_cast<AllocaInst>(Val: &I))
1946 if (allocaWouldBeStaticInEntry(AI))
1947 return;
1948
1949 // Do not force a debug loc for pseudo probes, since they do not need to
1950 // be debuggable, and also they are expected to have a zero/null dwarf
1951 // discriminator at this point which could be violated otherwise.
1952 if (isa<PseudoProbeInst>(Val: I))
1953 return;
1954
1955 I.setDebugLoc(TheCallDL);
1956 };
1957
1958 // Helper-util for updating debug-info records attached to instructions.
1959 auto UpdateDVR = [&](DbgRecord *DVR) {
1960 assert(DVR->getDebugLoc() && "Debug Value must have debug loc");
1961 if (NoInlineLineTables) {
1962 DVR->setDebugLoc(TheCallDL);
1963 return;
1964 }
1965 DebugLoc DL = DVR->getDebugLoc();
1966 DebugLoc IDL =
1967 inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode,
1968 Ctx&: DVR->getMarker()->getParent()->getContext(), IANodes);
1969 DVR->setDebugLoc(IDL);
1970 };
1971
1972 // Iterate over all instructions, updating metadata and debug-info records.
1973 for (; FI != Fn->end(); ++FI) {
1974 for (Instruction &I : *FI) {
1975 UpdateInst(I);
1976 for (DbgRecord &DVR : I.getDbgRecordRange()) {
1977 UpdateDVR(&DVR);
1978 }
1979 }
1980
1981 // Remove debug info records if we're not keeping inline info.
1982 if (NoInlineLineTables) {
1983 BasicBlock::iterator BI = FI->begin();
1984 while (BI != FI->end()) {
1985 BI->dropDbgRecords();
1986 ++BI;
1987 }
1988 }
1989 }
1990}
1991
1992#undef DEBUG_TYPE
1993#define DEBUG_TYPE "assignment-tracking"
1994/// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB.
1995static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL,
1996 const CallBase &CB) {
1997 at::StorageToVarsMap EscapedLocals;
1998 SmallPtrSet<const Value *, 4> SeenBases;
1999
2000 LLVM_DEBUG(
2001 errs() << "# Finding caller local variables escaped by callee\n");
2002 for (const Value *Arg : CB.args()) {
2003 LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n");
2004 if (!Arg->getType()->isPointerTy()) {
2005 LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n");
2006 continue;
2007 }
2008
2009 const Instruction *I = dyn_cast<Instruction>(Val: Arg);
2010 if (!I) {
2011 LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n");
2012 continue;
2013 }
2014
2015 // Walk back to the base storage.
2016 assert(Arg->getType()->isPtrOrPtrVectorTy());
2017 APInt TmpOffset(DL.getIndexTypeSizeInBits(Ty: Arg->getType()), 0, false);
2018 const AllocaInst *Base = dyn_cast<AllocaInst>(
2019 Val: Arg->stripAndAccumulateConstantOffsets(DL, Offset&: TmpOffset, AllowNonInbounds: true));
2020 if (!Base) {
2021 LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n");
2022 continue;
2023 }
2024
2025 assert(Base);
2026 LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n");
2027 // We only need to process each base address once - skip any duplicates.
2028 if (!SeenBases.insert(Ptr: Base).second)
2029 continue;
2030
2031 // Find all local variables associated with the backing storage.
2032 auto CollectAssignsForStorage = [&](DbgVariableRecord *DbgAssign) {
2033 // Skip variables from inlined functions - they are not local variables.
2034 if (DbgAssign->getDebugLoc().getInlinedAt())
2035 return;
2036 LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n");
2037 EscapedLocals[Base].insert(X: at::VarRecord(DbgAssign));
2038 };
2039 for_each(Range: at::getDVRAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage);
2040 }
2041 return EscapedLocals;
2042}
2043
2044static void trackInlinedStores(Function::iterator Start, Function::iterator End,
2045 const CallBase &CB) {
2046 LLVM_DEBUG(errs() << "trackInlinedStores into "
2047 << Start->getParent()->getName() << " from "
2048 << CB.getCalledFunction()->getName() << "\n");
2049 const DataLayout &DL = CB.getDataLayout();
2050 at::trackAssignments(Start, End, Vars: collectEscapedLocals(DL, CB), DL);
2051}
2052
2053/// Update inlined instructions' DIAssignID metadata. We need to do this
2054/// otherwise a function inlined more than once into the same function
2055/// will cause DIAssignID to be shared by many instructions.
2056static void fixupAssignments(Function::iterator Start, Function::iterator End) {
2057 DenseMap<DIAssignID *, DIAssignID *> Map;
2058 // Loop over all the inlined instructions. If we find a DIAssignID
2059 // attachment or use, replace it with a new version.
2060 for (auto BBI = Start; BBI != End; ++BBI) {
2061 for (Instruction &I : *BBI)
2062 at::remapAssignID(Map, I);
2063 }
2064}
2065#undef DEBUG_TYPE
2066#define DEBUG_TYPE "inline-function"
2067
2068/// Update the block frequencies of the caller after a callee has been inlined.
2069///
2070/// Each block cloned into the caller has its block frequency scaled by the
2071/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
2072/// callee's entry block gets the same frequency as the callsite block and the
2073/// relative frequencies of all cloned blocks remain the same after cloning.
2074static void updateCallerBFI(BasicBlock *CallSiteBlock,
2075 const ValueToValueMapTy &VMap,
2076 BlockFrequencyInfo *CallerBFI,
2077 BlockFrequencyInfo *CalleeBFI,
2078 const BasicBlock &CalleeEntryBlock) {
2079 SmallPtrSet<BasicBlock *, 16> ClonedBBs;
2080 for (auto Entry : VMap) {
2081 if (!isa<BasicBlock>(Val: Entry.first) || !Entry.second)
2082 continue;
2083 auto *OrigBB = cast<BasicBlock>(Val: Entry.first);
2084 auto *ClonedBB = cast<BasicBlock>(Val: Entry.second);
2085 BlockFrequency Freq = CalleeBFI->getBlockFreq(BB: OrigBB);
2086 if (!ClonedBBs.insert(Ptr: ClonedBB).second) {
2087 // Multiple blocks in the callee might get mapped to one cloned block in
2088 // the caller since we prune the callee as we clone it. When that happens,
2089 // we want to use the maximum among the original blocks' frequencies.
2090 BlockFrequency NewFreq = CallerBFI->getBlockFreq(BB: ClonedBB);
2091 if (NewFreq > Freq)
2092 Freq = NewFreq;
2093 }
2094 CallerBFI->setBlockFreq(BB: ClonedBB, Freq);
2095 }
2096 BasicBlock *EntryClone = cast<BasicBlock>(Val: VMap.lookup(Val: &CalleeEntryBlock));
2097 CallerBFI->setBlockFreqAndScale(
2098 ReferenceBB: EntryClone, Freq: CallerBFI->getBlockFreq(BB: CallSiteBlock), BlocksToScale&: ClonedBBs);
2099}
2100
2101/// Update the branch metadata for cloned call instructions.
2102static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
2103 const ProfileCount &CalleeEntryCount,
2104 const CallBase &TheCall, ProfileSummaryInfo *PSI,
2105 BlockFrequencyInfo *CallerBFI) {
2106 if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1)
2107 return;
2108 auto CallSiteCount =
2109 PSI ? PSI->getProfileCount(CallInst: TheCall, BFI: CallerBFI) : std::nullopt;
2110 int64_t CallCount =
2111 std::min(a: CallSiteCount.value_or(u: 0), b: CalleeEntryCount.getCount());
2112 updateProfileCallee(Callee, EntryDelta: -CallCount, VMap: &VMap);
2113}
2114
2115void llvm::updateProfileCallee(
2116 Function *Callee, int64_t EntryDelta,
2117 const ValueMap<const Value *, WeakTrackingVH> *VMap) {
2118 auto CalleeCount = Callee->getEntryCount();
2119 if (!CalleeCount)
2120 return;
2121
2122 const uint64_t PriorEntryCount = CalleeCount->getCount();
2123
2124 // Since CallSiteCount is an estimate, it could exceed the original callee
2125 // count and has to be set to 0 so guard against underflow.
2126 const uint64_t NewEntryCount =
2127 (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount)
2128 ? 0
2129 : PriorEntryCount + EntryDelta;
2130
2131 auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount,
2132 const uint64_t PriorEntryCount) {
2133 Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB);
2134 if (VPtr)
2135 scaleProfData(I&: *VPtr, S: NewEntryCount, T: PriorEntryCount);
2136 };
2137
2138 // During inlining ?
2139 if (VMap) {
2140 uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
2141 for (auto Entry : *VMap) {
2142 if (isa<CallInst>(Val: Entry.first))
2143 if (auto *CI = dyn_cast_or_null<CallInst>(Val: Entry.second)) {
2144 CI->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount);
2145 updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount);
2146 }
2147
2148 if (isa<InvokeInst>(Val: Entry.first))
2149 if (auto *II = dyn_cast_or_null<InvokeInst>(Val: Entry.second)) {
2150 II->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount);
2151 updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount);
2152 }
2153 }
2154 }
2155
2156 if (EntryDelta) {
2157 Callee->setEntryCount(Count: NewEntryCount);
2158
2159 for (BasicBlock &BB : *Callee)
2160 // No need to update the callsite if it is pruned during inlining.
2161 if (!VMap || VMap->count(Val: &BB))
2162 for (Instruction &I : BB) {
2163 if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
2164 CI->updateProfWeight(S: NewEntryCount, T: PriorEntryCount);
2165 updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount);
2166 }
2167 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &I)) {
2168 II->updateProfWeight(S: NewEntryCount, T: PriorEntryCount);
2169 updateVTableProfWeight(II, NewEntryCount, PriorEntryCount);
2170 }
2171 }
2172 }
2173}
2174
2175/// An operand bundle "clang.arc.attachedcall" on a call indicates the call
2176/// result is implicitly consumed by a call to retainRV or claimRV immediately
2177/// after the call. This function inlines the retainRV/claimRV calls.
2178///
2179/// There are three cases to consider:
2180///
2181/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned
2182/// object in the callee return block, the autoreleaseRV call and the
2183/// retainRV/claimRV call in the caller cancel out. If the call in the caller
2184/// is a claimRV call, a call to objc_release is emitted.
2185///
2186/// 2. If there is a call in the callee return block that doesn't have operand
2187/// bundle "clang.arc.attachedcall", the operand bundle on the original call
2188/// is transferred to the call in the callee.
2189///
2190/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is
2191/// a retainRV call.
2192static void
2193inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
2194 const SmallVectorImpl<ReturnInst *> &Returns) {
2195 assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
2196 bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
2197 IsUnsafeClaimRV = !IsRetainRV;
2198
2199 for (auto *RI : Returns) {
2200 Value *RetOpnd = objcarc::GetRCIdentityRoot(V: RI->getOperand(i_nocapture: 0));
2201 bool InsertRetainCall = IsRetainRV;
2202 IRBuilder<> Builder(RI->getContext());
2203
2204 // Walk backwards through the basic block looking for either a matching
2205 // autoreleaseRV call or an unannotated call.
2206 auto InstRange = llvm::make_range(x: ++(RI->getIterator().getReverse()),
2207 y: RI->getParent()->rend());
2208 for (Instruction &I : llvm::make_early_inc_range(Range&: InstRange)) {
2209 // Ignore casts.
2210 if (isa<CastInst>(Val: I))
2211 continue;
2212
2213 if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) {
2214 if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue ||
2215 !II->use_empty() ||
2216 objcarc::GetRCIdentityRoot(V: II->getOperand(i_nocapture: 0)) != RetOpnd)
2217 break;
2218
2219 // If we've found a matching authoreleaseRV call:
2220 // - If claimRV is attached to the call, insert a call to objc_release
2221 // and erase the autoreleaseRV call.
2222 // - If retainRV is attached to the call, just erase the autoreleaseRV
2223 // call.
2224 if (IsUnsafeClaimRV) {
2225 Builder.SetInsertPoint(II);
2226 Builder.CreateIntrinsic(ID: Intrinsic::objc_release, Args: RetOpnd);
2227 }
2228 II->eraseFromParent();
2229 InsertRetainCall = false;
2230 break;
2231 }
2232
2233 auto *CI = dyn_cast<CallInst>(Val: &I);
2234
2235 if (!CI)
2236 break;
2237
2238 if (objcarc::GetRCIdentityRoot(V: CI) != RetOpnd ||
2239 objcarc::hasAttachedCallOpBundle(CB: CI))
2240 break;
2241
2242 // If we've found an unannotated call that defines RetOpnd, add a
2243 // "clang.arc.attachedcall" operand bundle.
2244 Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(CB: &CB)};
2245 OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
2246 auto *NewCall = CallBase::addOperandBundle(
2247 CB: CI, ID: LLVMContext::OB_clang_arc_attachedcall, OB, InsertPt: CI->getIterator());
2248 NewCall->copyMetadata(SrcInst: *CI);
2249 CI->replaceAllUsesWith(V: NewCall);
2250 CI->eraseFromParent();
2251 InsertRetainCall = false;
2252 break;
2253 }
2254
2255 if (InsertRetainCall) {
2256 // The retainRV is attached to the call and we've failed to find a
2257 // matching autoreleaseRV or an annotated call in the callee. Emit a call
2258 // to objc_retain.
2259 Builder.SetInsertPoint(RI);
2260 Builder.CreateIntrinsic(ID: Intrinsic::objc_retain, Args: RetOpnd);
2261 }
2262 }
2263}
2264
2265// In contextual profiling, when an inline succeeds, we want to remap the
2266// indices of the callee into the index space of the caller. We can't just leave
2267// them as-is because the same callee may appear in other places in this caller
2268// (other callsites), and its (callee's) counters and sub-contextual profile
2269// tree would be potentially different.
2270// Not all BBs of the callee may survive the opportunistic DCE InlineFunction
2271// does (same goes for callsites in the callee).
2272// We will return a pair of vectors, one for basic block IDs and one for
2273// callsites. For such a vector V, V[Idx] will be -1 if the callee
2274// instrumentation with index Idx did not survive inlining, and a new value
2275// otherwise.
2276// This function will update the caller's instrumentation intrinsics
2277// accordingly, mapping indices as described above. We also replace the "name"
2278// operand because we use it to distinguish between "own" instrumentation and
2279// "from callee" instrumentation when performing the traversal of the CFG of the
2280// caller. We traverse depth-first from the callsite's BB and up to the point we
2281// hit BBs owned by the caller.
2282// The return values will be then used to update the contextual
2283// profile. Note: we only update the "name" and "index" operands in the
2284// instrumentation intrinsics, we leave the hash and total nr of indices as-is,
2285// it's not worth updating those.
2286static std::pair<std::vector<int64_t>, std::vector<int64_t>>
2287remapIndices(Function &Caller, BasicBlock *StartBB,
2288 PGOContextualProfile &CtxProf, uint32_t CalleeCounters,
2289 uint32_t CalleeCallsites) {
2290 // We'll allocate a new ID to imported callsite counters and callsites. We're
2291 // using -1 to indicate a counter we delete. Most likely the entry ID, for
2292 // example, will be deleted - we don't want 2 IDs in the same BB, and the
2293 // entry would have been cloned in the callsite's old BB.
2294 std::vector<int64_t> CalleeCounterMap;
2295 std::vector<int64_t> CalleeCallsiteMap;
2296 CalleeCounterMap.resize(new_size: CalleeCounters, x: -1);
2297 CalleeCallsiteMap.resize(new_size: CalleeCallsites, x: -1);
2298
2299 auto RewriteInstrIfNeeded = [&](InstrProfIncrementInst &Ins) -> bool {
2300 if (Ins.getNameValue() == &Caller)
2301 return false;
2302 const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2303 if (CalleeCounterMap[OldID] == -1)
2304 CalleeCounterMap[OldID] = CtxProf.allocateNextCounterIndex(F: Caller);
2305 const auto NewID = static_cast<uint32_t>(CalleeCounterMap[OldID]);
2306
2307 Ins.setNameValue(&Caller);
2308 Ins.setIndex(NewID);
2309 return true;
2310 };
2311
2312 auto RewriteCallsiteInsIfNeeded = [&](InstrProfCallsite &Ins) -> bool {
2313 if (Ins.getNameValue() == &Caller)
2314 return false;
2315 const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2316 if (CalleeCallsiteMap[OldID] == -1)
2317 CalleeCallsiteMap[OldID] = CtxProf.allocateNextCallsiteIndex(F: Caller);
2318 const auto NewID = static_cast<uint32_t>(CalleeCallsiteMap[OldID]);
2319
2320 Ins.setNameValue(&Caller);
2321 Ins.setIndex(NewID);
2322 return true;
2323 };
2324
2325 std::deque<BasicBlock *> Worklist;
2326 DenseSet<const BasicBlock *> Seen;
2327 // We will traverse the BBs starting from the callsite BB. The callsite BB
2328 // will have at least a BB ID - maybe its own, and in any case the one coming
2329 // from the cloned function's entry BB. The other BBs we'll start seeing from
2330 // there on may or may not have BB IDs. BBs with IDs belonging to our caller
2331 // are definitely not coming from the imported function and form a boundary
2332 // past which we don't need to traverse anymore. BBs may have no
2333 // instrumentation (because we originally inserted instrumentation as per
2334 // MST), in which case we'll traverse past them. An invariant we'll keep is
2335 // that a BB will have at most 1 BB ID. For example, in the callsite BB, we
2336 // will delete the callee BB's instrumentation. This doesn't result in
2337 // information loss: the entry BB of the callee will have the same count as
2338 // the callsite's BB. At the end of this traversal, all the callee's
2339 // instrumentation would be mapped into the caller's instrumentation index
2340 // space. Some of the callee's counters may be deleted (as mentioned, this
2341 // should result in no loss of information).
2342 Worklist.push_back(x: StartBB);
2343 while (!Worklist.empty()) {
2344 auto *BB = Worklist.front();
2345 Worklist.pop_front();
2346 bool Changed = false;
2347 auto *BBID = CtxProfAnalysis::getBBInstrumentation(BB&: *BB);
2348 if (BBID) {
2349 Changed |= RewriteInstrIfNeeded(*BBID);
2350 // this may be the entryblock from the inlined callee, coming into a BB
2351 // that didn't have instrumentation because of MST decisions. Let's make
2352 // sure it's placed accordingly. This is a noop elsewhere.
2353 BBID->moveBefore(InsertPos: BB->getFirstInsertionPt());
2354 }
2355 for (auto &I : llvm::make_early_inc_range(Range&: *BB)) {
2356 if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Val: &I)) {
2357 if (isa<InstrProfIncrementInstStep>(Val: Inc)) {
2358 // Step instrumentation is used for select instructions. Inlining may
2359 // have propagated a constant resulting in the condition of the select
2360 // being resolved, case in which function cloning resolves the value
2361 // of the select, and elides the select instruction. If that is the
2362 // case, the step parameter of the instrumentation will reflect that.
2363 // We can delete the instrumentation in that case.
2364 if (isa<Constant>(Val: Inc->getStep())) {
2365 assert(!Inc->getNextNode() || !isa<SelectInst>(Inc->getNextNode()));
2366 Inc->eraseFromParent();
2367 } else {
2368 assert(isa_and_nonnull<SelectInst>(Inc->getNextNode()));
2369 RewriteInstrIfNeeded(*Inc);
2370 }
2371 } else if (Inc != BBID) {
2372 // If we're here it means that the BB had more than 1 IDs, presumably
2373 // some coming from the callee. We "made up our mind" to keep the
2374 // first one (which may or may not have been originally the caller's).
2375 // All the others are superfluous and we delete them.
2376 Inc->eraseFromParent();
2377 Changed = true;
2378 }
2379 } else if (auto *CS = dyn_cast<InstrProfCallsite>(Val: &I)) {
2380 Changed |= RewriteCallsiteInsIfNeeded(*CS);
2381 }
2382 }
2383 if (!BBID || Changed)
2384 for (auto *Succ : successors(BB))
2385 if (Seen.insert(V: Succ).second)
2386 Worklist.push_back(x: Succ);
2387 }
2388
2389 assert(!llvm::is_contained(CalleeCounterMap, 0) &&
2390 "Counter index mapping should be either to -1 or to non-zero index, "
2391 "because the 0 "
2392 "index corresponds to the entry BB of the caller");
2393 assert(!llvm::is_contained(CalleeCallsiteMap, 0) &&
2394 "Callsite index mapping should be either to -1 or to non-zero index, "
2395 "because there should have been at least a callsite - the inlined one "
2396 "- which would have had a 0 index.");
2397
2398 return {std::move(CalleeCounterMap), std::move(CalleeCallsiteMap)};
2399}
2400
2401// Inline. If successful, update the contextual profile (if a valid one is
2402// given).
2403// The contextual profile data is organized in trees, as follows:
2404// - each node corresponds to a function
2405// - the root of each tree corresponds to an "entrypoint" - e.g.
2406// RPC handler for server side
2407// - the path from the root to a node is a particular call path
2408// - the counters stored in a node are counter values observed in that
2409// particular call path ("context")
2410// - the edges between nodes are annotated with callsite IDs.
2411//
2412// Updating the contextual profile after an inlining means, at a high level,
2413// copying over the data of the callee, **intentionally without any value
2414// scaling**, and copying over the callees of the inlined callee.
2415llvm::InlineResult llvm::InlineFunction(
2416 CallBase &CB, InlineFunctionInfo &IFI, PGOContextualProfile &CtxProf,
2417 bool MergeAttributes, AAResults *CalleeAAR, bool InsertLifetime,
2418 Function *ForwardVarArgsTo, OptimizationRemarkEmitter *ORE) {
2419 if (!CtxProf.isInSpecializedModule())
2420 return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
2421 ForwardVarArgsTo, ORE);
2422
2423 auto &Caller = *CB.getCaller();
2424 auto &Callee = *CB.getCalledFunction();
2425 auto *StartBB = CB.getParent();
2426
2427 // Get some preliminary data about the callsite before it might get inlined.
2428 // Inlining shouldn't delete the callee, but it's cleaner (and low-cost) to
2429 // get this data upfront and rely less on InlineFunction's behavior.
2430 const auto CalleeGUID = AssignGUIDPass::getGUID(F: Callee);
2431 auto *CallsiteIDIns = CtxProfAnalysis::getCallsiteInstrumentation(CB);
2432 const auto CallsiteID =
2433 static_cast<uint32_t>(CallsiteIDIns->getIndex()->getZExtValue());
2434
2435 const auto NumCalleeCounters = CtxProf.getNumCounters(F: Callee);
2436 const auto NumCalleeCallsites = CtxProf.getNumCallsites(F: Callee);
2437
2438 auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
2439 ForwardVarArgsTo, ORE);
2440 if (!Ret.isSuccess())
2441 return Ret;
2442
2443 // Inlining succeeded, we don't need the instrumentation of the inlined
2444 // callsite.
2445 CallsiteIDIns->eraseFromParent();
2446
2447 // Assinging Maps and then capturing references into it in the lambda because
2448 // captured structured bindings are a C++20 extension. We do also need a
2449 // capture here, though.
2450 const auto IndicesMaps = remapIndices(Caller, StartBB, CtxProf,
2451 CalleeCounters: NumCalleeCounters, CalleeCallsites: NumCalleeCallsites);
2452 const uint32_t NewCountersSize = CtxProf.getNumCounters(F: Caller);
2453
2454 auto Updater = [&](PGOCtxProfContext &Ctx) {
2455 assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller));
2456 const auto &[CalleeCounterMap, CalleeCallsiteMap] = IndicesMaps;
2457 assert(
2458 (Ctx.counters().size() +
2459 llvm::count_if(CalleeCounterMap, [](auto V) { return V != -1; }) ==
2460 NewCountersSize) &&
2461 "The caller's counters size should have grown by the number of new "
2462 "distinct counters inherited from the inlined callee.");
2463 Ctx.resizeCounters(Size: NewCountersSize);
2464 // If the callsite wasn't exercised in this context, the value of the
2465 // counters coming from it is 0 - which it is right now, after resizing them
2466 // - and so we're done.
2467 auto CSIt = Ctx.callsites().find(x: CallsiteID);
2468 if (CSIt == Ctx.callsites().end())
2469 return;
2470 auto CalleeCtxIt = CSIt->second.find(x: CalleeGUID);
2471 // The callsite was exercised, but not with this callee (so presumably this
2472 // is an indirect callsite). Again, we're done here.
2473 if (CalleeCtxIt == CSIt->second.end())
2474 return;
2475
2476 // Let's pull in the counter values and the subcontexts coming from the
2477 // inlined callee.
2478 auto &CalleeCtx = CalleeCtxIt->second;
2479 assert(CalleeCtx.guid() == CalleeGUID);
2480
2481 for (auto I = 0U; I < CalleeCtx.counters().size(); ++I) {
2482 const int64_t NewIndex = CalleeCounterMap[I];
2483 if (NewIndex >= 0) {
2484 assert(NewIndex != 0 && "counter index mapping shouldn't happen to a 0 "
2485 "index, that's the caller's entry BB");
2486 Ctx.counters()[NewIndex] = CalleeCtx.counters()[I];
2487 }
2488 }
2489 for (auto &[I, OtherSet] : CalleeCtx.callsites()) {
2490 const int64_t NewCSIdx = CalleeCallsiteMap[I];
2491 if (NewCSIdx >= 0) {
2492 assert(NewCSIdx != 0 &&
2493 "callsite index mapping shouldn't happen to a 0 index, the "
2494 "caller must've had at least one callsite (with such an index)");
2495 Ctx.ingestAllContexts(CSId: NewCSIdx, Other: std::move(OtherSet));
2496 }
2497 }
2498 // We know the traversal is preorder, so it wouldn't have yet looked at the
2499 // sub-contexts of this context that it's currently visiting. Meaning, the
2500 // erase below invalidates no iterators.
2501 auto Deleted = Ctx.callsites().erase(x: CallsiteID);
2502 assert(Deleted);
2503 (void)Deleted;
2504 };
2505 CtxProf.update(Updater, F: Caller);
2506 return Ret;
2507}
2508
2509llvm::InlineResult llvm::CanInlineCallSite(const CallBase &CB,
2510 InlineFunctionInfo &IFI) {
2511 assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
2512
2513 // FIXME: we don't inline callbr yet.
2514 if (isa<CallBrInst>(Val: CB))
2515 return InlineResult::failure(Reason: "We don't inline callbr yet.");
2516
2517 // If IFI has any state in it, zap it before we fill it in.
2518 IFI.reset();
2519
2520 Function *CalledFunc = CB.getCalledFunction();
2521 if (!CalledFunc || // Can't inline external function or indirect
2522 CalledFunc->isDeclaration()) // call!
2523 return InlineResult::failure(Reason: "external or indirect");
2524
2525 // The inliner does not know how to inline through calls with operand bundles
2526 // in general ...
2527 if (CB.hasOperandBundles()) {
2528 for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
2529 auto OBUse = CB.getOperandBundleAt(Index: i);
2530 uint32_t Tag = OBUse.getTagID();
2531 // ... but it knows how to inline through "deopt" operand bundles ...
2532 if (Tag == LLVMContext::OB_deopt)
2533 continue;
2534 // ... and "funclet" operand bundles.
2535 if (Tag == LLVMContext::OB_funclet)
2536 continue;
2537 if (Tag == LLVMContext::OB_clang_arc_attachedcall)
2538 continue;
2539 if (Tag == LLVMContext::OB_kcfi)
2540 continue;
2541 if (Tag == LLVMContext::OB_convergencectrl) {
2542 IFI.ConvergenceControlToken = OBUse.Inputs[0].get();
2543 continue;
2544 }
2545
2546 return InlineResult::failure(Reason: "unsupported operand bundle");
2547 }
2548 }
2549
2550 // FIXME: The check below is redundant and incomplete. According to spec, if a
2551 // convergent call is missing a token, then the caller is using uncontrolled
2552 // convergence. If the callee has an entry intrinsic, then the callee is using
2553 // controlled convergence, and the call cannot be inlined. A proper
2554 // implemenation of this check requires a whole new analysis that identifies
2555 // convergence in every function. For now, we skip that and just do this one
2556 // cursory check. The underlying assumption is that in a compiler flow that
2557 // fully implements convergence control tokens, there is no mixing of
2558 // controlled and uncontrolled convergent operations in the whole program.
2559 if (CB.isConvergent()) {
2560 if (!IFI.ConvergenceControlToken &&
2561 getConvergenceEntry(BB&: CalledFunc->getEntryBlock())) {
2562 return InlineResult::failure(
2563 Reason: "convergent call needs convergencectrl operand");
2564 }
2565 }
2566
2567 const BasicBlock *OrigBB = CB.getParent();
2568 const Function *Caller = OrigBB->getParent();
2569
2570 // GC poses two hazards to inlining, which only occur when the callee has GC:
2571 // 1. If the caller has no GC, then the callee's GC must be propagated to the
2572 // caller.
2573 // 2. If the caller has a differing GC, it is invalid to inline.
2574 if (CalledFunc->hasGC()) {
2575 if (Caller->hasGC() && CalledFunc->getGC() != Caller->getGC())
2576 return InlineResult::failure(Reason: "incompatible GC");
2577 }
2578
2579 // Get the personality function from the callee if it contains a landing pad.
2580 Constant *CalledPersonality =
2581 CalledFunc->hasPersonalityFn()
2582 ? CalledFunc->getPersonalityFn()->stripPointerCasts()
2583 : nullptr;
2584
2585 // Find the personality function used by the landing pads of the caller. If it
2586 // exists, then check to see that it matches the personality function used in
2587 // the callee.
2588 Constant *CallerPersonality =
2589 Caller->hasPersonalityFn()
2590 ? Caller->getPersonalityFn()->stripPointerCasts()
2591 : nullptr;
2592 if (CalledPersonality) {
2593 // If the personality functions match, then we can perform the
2594 // inlining. Otherwise, we can't inline.
2595 // TODO: This isn't 100% true. Some personality functions are proper
2596 // supersets of others and can be used in place of the other.
2597 if (CallerPersonality && CalledPersonality != CallerPersonality)
2598 return InlineResult::failure(Reason: "incompatible personality");
2599 }
2600
2601 // We need to figure out which funclet the callsite was in so that we may
2602 // properly nest the callee.
2603 if (CallerPersonality) {
2604 EHPersonality Personality = classifyEHPersonality(Pers: CallerPersonality);
2605 if (isScopedEHPersonality(Pers: Personality)) {
2606 std::optional<OperandBundleUse> ParentFunclet =
2607 CB.getOperandBundle(ID: LLVMContext::OB_funclet);
2608 if (ParentFunclet)
2609 IFI.CallSiteEHPad = cast<FuncletPadInst>(Val: ParentFunclet->Inputs.front());
2610
2611 // OK, the inlining site is legal. What about the target function?
2612
2613 if (IFI.CallSiteEHPad) {
2614 if (Personality == EHPersonality::MSVC_CXX) {
2615 // The MSVC personality cannot tolerate catches getting inlined into
2616 // cleanup funclets.
2617 if (isa<CleanupPadInst>(Val: IFI.CallSiteEHPad)) {
2618 // Ok, the call site is within a cleanuppad. Let's check the callee
2619 // for catchpads.
2620 for (const BasicBlock &CalledBB : *CalledFunc) {
2621 if (isa<CatchSwitchInst>(Val: CalledBB.getFirstNonPHIIt()))
2622 return InlineResult::failure(Reason: "catch in cleanup funclet");
2623 }
2624 }
2625 } else if (isAsynchronousEHPersonality(Pers: Personality)) {
2626 // SEH is even less tolerant, there may not be any sort of exceptional
2627 // funclet in the callee.
2628 for (const BasicBlock &CalledBB : *CalledFunc) {
2629 if (CalledBB.isEHPad())
2630 return InlineResult::failure(Reason: "SEH in cleanup funclet");
2631 }
2632 }
2633 }
2634 }
2635 }
2636
2637 return InlineResult::success();
2638}
2639
2640/// This function inlines the called function into the basic block of the
2641/// caller. This returns false if it is not possible to inline this call.
2642/// The program is still in a well defined state if this occurs though.
2643///
2644/// Note that this only does one level of inlining. For example, if the
2645/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
2646/// exists in the instruction stream. Similarly this will inline a recursive
2647/// function by one level.
2648void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI,
2649 bool MergeAttributes, AAResults *CalleeAAR,
2650 bool InsertLifetime, Function *ForwardVarArgsTo,
2651 OptimizationRemarkEmitter *ORE) {
2652 BasicBlock *OrigBB = CB.getParent();
2653 Function *Caller = OrigBB->getParent();
2654 Function *CalledFunc = CB.getCalledFunction();
2655 assert(CalledFunc && !CalledFunc->isDeclaration() &&
2656 "CanInlineCallSite should have verified direct call to definition");
2657
2658 // Determine if we are dealing with a call in an EHPad which does not unwind
2659 // to caller.
2660 bool EHPadForCallUnwindsLocally = false;
2661 if (IFI.CallSiteEHPad && isa<CallInst>(Val: CB)) {
2662 UnwindDestMemoTy FuncletUnwindMap;
2663 Value *CallSiteUnwindDestToken =
2664 getUnwindDestToken(EHPad: IFI.CallSiteEHPad, MemoMap&: FuncletUnwindMap);
2665
2666 EHPadForCallUnwindsLocally =
2667 CallSiteUnwindDestToken &&
2668 !isa<ConstantTokenNone>(Val: CallSiteUnwindDestToken);
2669 }
2670
2671 // Get an iterator to the last basic block in the function, which will have
2672 // the new function inlined after it.
2673 Function::iterator LastBlock = --Caller->end();
2674
2675 // Make sure to capture all of the return instructions from the cloned
2676 // function.
2677 SmallVector<ReturnInst*, 8> Returns;
2678 ClonedCodeInfo InlinedFunctionInfo;
2679 Function::iterator FirstNewBlock;
2680
2681 // GC poses two hazards to inlining, which only occur when the callee has GC:
2682 // 1. If the caller has no GC, then the callee's GC must be propagated to the
2683 // caller.
2684 // 2. If the caller has a differing GC, it is invalid to inline.
2685 if (CalledFunc->hasGC()) {
2686 if (!Caller->hasGC())
2687 Caller->setGC(CalledFunc->getGC());
2688 else {
2689 assert(CalledFunc->getGC() == Caller->getGC() &&
2690 "CanInlineCallSite should have verified compatible GCs");
2691 }
2692 }
2693
2694 if (CalledFunc->hasPersonalityFn()) {
2695 Constant *CalledPersonality =
2696 CalledFunc->getPersonalityFn()->stripPointerCasts();
2697 if (!Caller->hasPersonalityFn()) {
2698 Caller->setPersonalityFn(CalledPersonality);
2699 } else
2700 assert(Caller->getPersonalityFn()->stripPointerCasts() ==
2701 CalledPersonality &&
2702 "CanInlineCallSite should have verified compatible personality");
2703 }
2704
2705 { // Scope to destroy VMap after cloning.
2706 ValueToValueMapTy VMap;
2707 struct ByValInit {
2708 Value *Dst;
2709 Value *Src;
2710 MaybeAlign SrcAlign;
2711 Type *Ty;
2712 };
2713 // Keep a list of tuples (dst, src, src_align) to emit byval
2714 // initializations. Src Alignment is only available though the callbase,
2715 // therefore has to be saved.
2716 SmallVector<ByValInit, 4> ByValInits;
2717
2718 // When inlining a function that contains noalias scope metadata,
2719 // this metadata needs to be cloned so that the inlined blocks
2720 // have different "unique scopes" at every call site.
2721 // Track the metadata that must be cloned. Do this before other changes to
2722 // the function, so that we do not get in trouble when inlining caller ==
2723 // callee.
2724 ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());
2725
2726 auto &DL = Caller->getDataLayout();
2727
2728 // Calculate the vector of arguments to pass into the function cloner, which
2729 // matches up the formal to the actual argument values.
2730 auto AI = CB.arg_begin();
2731 unsigned ArgNo = 0;
2732 for (Function::arg_iterator I = CalledFunc->arg_begin(),
2733 E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
2734 Value *ActualArg = *AI;
2735
2736 // When byval arguments actually inlined, we need to make the copy implied
2737 // by them explicit. However, we don't do this if the callee is readonly
2738 // or readnone, because the copy would be unneeded: the callee doesn't
2739 // modify the struct.
2740 if (CB.isByValArgument(ArgNo)) {
2741 ActualArg = HandleByValArgument(ByValType: CB.getParamByValType(ArgNo), Arg: ActualArg,
2742 TheCall: &CB, CalledFunc, IFI,
2743 ByValAlignment: CalledFunc->getParamAlign(ArgNo));
2744 if (ActualArg != *AI)
2745 ByValInits.push_back(Elt: {.Dst: ActualArg, .Src: (Value *)*AI,
2746 .SrcAlign: CB.getParamAlign(ArgNo),
2747 .Ty: CB.getParamByValType(ArgNo)});
2748 }
2749
2750 VMap[&*I] = ActualArg;
2751 }
2752
2753 // TODO: Remove this when users have been updated to the assume bundles.
2754 // Add alignment assumptions if necessary. We do this before the inlined
2755 // instructions are actually cloned into the caller so that we can easily
2756 // check what will be known at the start of the inlined code.
2757 AddAlignmentAssumptions(CB, IFI);
2758
2759 AssumptionCache *AC =
2760 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
2761
2762 /// Preserve all attributes on of the call and its parameters.
2763 salvageKnowledge(I: &CB, AC);
2764
2765 // We want the inliner to prune the code as it copies. We would LOVE to
2766 // have no dead or constant instructions leftover after inlining occurs
2767 // (which can happen, e.g., because an argument was constant), but we'll be
2768 // happy with whatever the cloner can do.
2769 CloneAndPruneFunctionInto(NewFunc: Caller, OldFunc: CalledFunc, VMap,
2770 /*ModuleLevelChanges=*/false, Returns, NameSuffix: ".i",
2771 CodeInfo: &InlinedFunctionInfo);
2772 // Remember the first block that is newly cloned over.
2773 FirstNewBlock = LastBlock; ++FirstNewBlock;
2774
2775 // Insert retainRV/clainRV runtime calls.
2776 objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(CB: &CB);
2777 if (RVCallKind != objcarc::ARCInstKind::None)
2778 inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns);
2779
2780 // Updated caller/callee profiles only when requested. For sample loader
2781 // inlining, the context-sensitive inlinee profile doesn't need to be
2782 // subtracted from callee profile, and the inlined clone also doesn't need
2783 // to be scaled based on call site count.
2784 if (IFI.UpdateProfile) {
2785 if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
2786 // Update the BFI of blocks cloned into the caller.
2787 updateCallerBFI(CallSiteBlock: OrigBB, VMap, CallerBFI: IFI.CallerBFI, CalleeBFI: IFI.CalleeBFI,
2788 CalleeEntryBlock: CalledFunc->front());
2789
2790 if (auto Profile = CalledFunc->getEntryCount())
2791 updateCallProfile(Callee: CalledFunc, VMap, CalleeEntryCount: *Profile, TheCall: CB, PSI: IFI.PSI,
2792 CallerBFI: IFI.CallerBFI);
2793 }
2794
2795 // Inject byval arguments initialization.
2796 for (ByValInit &Init : ByValInits)
2797 HandleByValArgumentInit(ByValType: Init.Ty, Dst: Init.Dst, Src: Init.Src, SrcAlign: Init.SrcAlign,
2798 M: Caller->getParent(), InsertBlock: &*FirstNewBlock, IFI,
2799 CalledFunc);
2800
2801 std::optional<OperandBundleUse> ParentDeopt =
2802 CB.getOperandBundle(ID: LLVMContext::OB_deopt);
2803 if (ParentDeopt) {
2804 SmallVector<OperandBundleDef, 2> OpDefs;
2805
2806 for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
2807 CallBase *ICS = dyn_cast_or_null<CallBase>(Val&: VH);
2808 if (!ICS)
2809 continue; // instruction was DCE'd or RAUW'ed to undef
2810
2811 OpDefs.clear();
2812
2813 OpDefs.reserve(N: ICS->getNumOperandBundles());
2814
2815 for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe;
2816 ++COBi) {
2817 auto ChildOB = ICS->getOperandBundleAt(Index: COBi);
2818 if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
2819 // If the inlined call has other operand bundles, let them be
2820 OpDefs.emplace_back(Args&: ChildOB);
2821 continue;
2822 }
2823
2824 // It may be useful to separate this logic (of handling operand
2825 // bundles) out to a separate "policy" component if this gets crowded.
2826 // Prepend the parent's deoptimization continuation to the newly
2827 // inlined call's deoptimization continuation.
2828 std::vector<Value *> MergedDeoptArgs;
2829 MergedDeoptArgs.reserve(n: ParentDeopt->Inputs.size() +
2830 ChildOB.Inputs.size());
2831
2832 llvm::append_range(C&: MergedDeoptArgs, R&: ParentDeopt->Inputs);
2833 llvm::append_range(C&: MergedDeoptArgs, R&: ChildOB.Inputs);
2834
2835 OpDefs.emplace_back(Args: "deopt", Args: std::move(MergedDeoptArgs));
2836 }
2837
2838 Instruction *NewI = CallBase::Create(CB: ICS, Bundles: OpDefs, InsertPt: ICS->getIterator());
2839
2840 // Note: the RAUW does the appropriate fixup in VMap, so we need to do
2841 // this even if the call returns void.
2842 ICS->replaceAllUsesWith(V: NewI);
2843
2844 VH = nullptr;
2845 ICS->eraseFromParent();
2846 }
2847 }
2848
2849 // For 'nodebug' functions, the associated DISubprogram is always null.
2850 // Conservatively avoid propagating the callsite debug location to
2851 // instructions inlined from a function whose DISubprogram is not null.
2852 fixupLineNumbers(Fn: Caller, FI: FirstNewBlock, TheCall: &CB,
2853 CalleeHasDebugInfo: CalledFunc->getSubprogram() != nullptr);
2854
2855 if (isAssignmentTrackingEnabled(M: *Caller->getParent())) {
2856 // Interpret inlined stores to caller-local variables as assignments.
2857 trackInlinedStores(Start: FirstNewBlock, End: Caller->end(), CB);
2858
2859 // Update DIAssignID metadata attachments and uses so that they are
2860 // unique to this inlined instance.
2861 fixupAssignments(Start: FirstNewBlock, End: Caller->end());
2862 }
2863
2864 // Now clone the inlined noalias scope metadata.
2865 SAMetadataCloner.clone();
2866 SAMetadataCloner.remap(FStart: FirstNewBlock, FEnd: Caller->end());
2867
2868 // Add noalias metadata if necessary.
2869 AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);
2870
2871 // Clone return attributes on the callsite into the calls within the inlined
2872 // function which feed into its return value.
2873 AddReturnAttributes(CB, VMap, InlinedFunctionInfo);
2874
2875 // Clone attributes on the params of the callsite to calls within the
2876 // inlined function which use the same param.
2877 AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo);
2878
2879 propagateMemProfMetadata(
2880 Callee: CalledFunc, CB, ContainsMemProfMetadata: InlinedFunctionInfo.ContainsMemProfMetadata, VMap, ORE);
2881
2882 // Propagate metadata on the callsite if necessary.
2883 PropagateCallSiteMetadata(CB, FStart: FirstNewBlock, FEnd: Caller->end());
2884
2885 // Propagate implicit ref metadata.
2886 if (CalledFunc->hasMetadata(KindID: LLVMContext::MD_implicit_ref)) {
2887 SmallVector<MDNode *> MDs;
2888 CalledFunc->getMetadata(KindID: LLVMContext::MD_implicit_ref, MDs);
2889 for (MDNode *MD : MDs) {
2890 Caller->addMetadata(KindID: LLVMContext::MD_implicit_ref, MD&: *MD);
2891 }
2892 }
2893
2894 // Propagate inlined.from metadata for dontcall diagnostics.
2895 PropagateInlinedFromMetadata(CB, CalledFuncName: CalledFunc->getName(), CallerFuncName: Caller->getName(),
2896 FStart: FirstNewBlock, FEnd: Caller->end());
2897
2898 // Register any cloned assumptions.
2899 if (IFI.GetAssumptionCache)
2900 for (BasicBlock &NewBlock :
2901 make_range(x: FirstNewBlock->getIterator(), y: Caller->end()))
2902 for (Instruction &I : NewBlock)
2903 if (auto *II = dyn_cast<AssumeInst>(Val: &I))
2904 IFI.GetAssumptionCache(*Caller).registerAssumption(CI: II);
2905 }
2906
2907 if (IFI.ConvergenceControlToken) {
2908 IntrinsicInst *IntrinsicCall = getConvergenceEntry(BB&: *FirstNewBlock);
2909 if (IntrinsicCall) {
2910 IntrinsicCall->replaceAllUsesWith(V: IFI.ConvergenceControlToken);
2911 IntrinsicCall->eraseFromParent();
2912 }
2913 }
2914
2915 // If there are any alloca instructions in the block that used to be the entry
2916 // block for the callee, move them to the entry block of the caller. First
2917 // calculate which instruction they should be inserted before. We insert the
2918 // instructions at the end of the current alloca list.
2919 {
2920 BasicBlock::iterator InsertPoint = Caller->begin()->begin();
2921 for (BasicBlock::iterator I = FirstNewBlock->begin(),
2922 E = FirstNewBlock->end(); I != E; ) {
2923 AllocaInst *AI = dyn_cast<AllocaInst>(Val: I++);
2924 if (!AI) continue;
2925
2926 // If the alloca is now dead, remove it. This often occurs due to code
2927 // specialization.
2928 if (AI->use_empty()) {
2929 AI->eraseFromParent();
2930 continue;
2931 }
2932
2933 if (!allocaWouldBeStaticInEntry(AI))
2934 continue;
2935
2936 // Keep track of the static allocas that we inline into the caller.
2937 IFI.StaticAllocas.push_back(Elt: AI);
2938
2939 // Scan for the block of allocas that we can move over, and move them
2940 // all at once.
2941 while (isa<AllocaInst>(Val: I) &&
2942 !cast<AllocaInst>(Val&: I)->use_empty() &&
2943 allocaWouldBeStaticInEntry(AI: cast<AllocaInst>(Val&: I))) {
2944 IFI.StaticAllocas.push_back(Elt: cast<AllocaInst>(Val&: I));
2945 ++I;
2946 }
2947
2948 // Transfer all of the allocas over in a block. Using splice means
2949 // that the instructions aren't removed from the symbol table, then
2950 // reinserted.
2951 I.setTailBit(true);
2952 Caller->getEntryBlock().splice(ToIt: InsertPoint, FromBB: &*FirstNewBlock,
2953 FromBeginIt: AI->getIterator(), FromEndIt: I);
2954 }
2955 }
2956
2957 // If the call to the callee cannot throw, set the 'nounwind' flag on any
2958 // calls that we inline.
2959 bool MarkNoUnwind = CB.doesNotThrow();
2960
2961 SmallVector<Value*,4> VarArgsToForward;
2962 SmallVector<AttributeSet, 4> VarArgsAttrs;
2963 for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
2964 i < CB.arg_size(); i++) {
2965 VarArgsToForward.push_back(Elt: CB.getArgOperand(i));
2966 VarArgsAttrs.push_back(Elt: CB.getAttributes().getParamAttrs(ArgNo: i));
2967 }
2968
2969 bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
2970 if (InlinedFunctionInfo.ContainsCalls) {
2971 CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
2972 if (CallInst *CI = dyn_cast<CallInst>(Val: &CB))
2973 CallSiteTailKind = CI->getTailCallKind();
2974
2975 // For inlining purposes, the "notail" marker is the same as no marker.
2976 if (CallSiteTailKind == CallInst::TCK_NoTail)
2977 CallSiteTailKind = CallInst::TCK_None;
2978
2979 for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
2980 ++BB) {
2981 for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
2982 CallInst *CI = dyn_cast<CallInst>(Val: &I);
2983 if (!CI)
2984 continue;
2985
2986 // Forward varargs from inlined call site to calls to the
2987 // ForwardVarArgsTo function, if requested, and to musttail calls.
2988 if (!VarArgsToForward.empty() &&
2989 ((ForwardVarArgsTo &&
2990 CI->getCalledFunction() == ForwardVarArgsTo) ||
2991 CI->isMustTailCall())) {
2992 // Collect attributes for non-vararg parameters.
2993 AttributeList Attrs = CI->getAttributes();
2994 SmallVector<AttributeSet, 8> ArgAttrs;
2995 if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
2996 for (unsigned ArgNo = 0;
2997 ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
2998 ArgAttrs.push_back(Elt: Attrs.getParamAttrs(ArgNo));
2999 }
3000
3001 // Add VarArg attributes.
3002 ArgAttrs.append(in_start: VarArgsAttrs.begin(), in_end: VarArgsAttrs.end());
3003 Attrs = AttributeList::get(C&: CI->getContext(), FnAttrs: Attrs.getFnAttrs(),
3004 RetAttrs: Attrs.getRetAttrs(), ArgAttrs);
3005 // Add VarArgs to existing parameters.
3006 SmallVector<Value *, 6> Params(CI->args());
3007 Params.append(in_start: VarArgsToForward.begin(), in_end: VarArgsToForward.end());
3008 CallInst *NewCI = CallInst::Create(
3009 Ty: CI->getFunctionType(), Func: CI->getCalledOperand(), Args: Params, NameStr: "", InsertBefore: CI->getIterator());
3010 NewCI->setDebugLoc(CI->getDebugLoc());
3011 NewCI->setAttributes(Attrs);
3012 NewCI->setCallingConv(CI->getCallingConv());
3013 CI->replaceAllUsesWith(V: NewCI);
3014 CI->eraseFromParent();
3015 CI = NewCI;
3016 }
3017
3018 if (Function *F = CI->getCalledFunction())
3019 InlinedDeoptimizeCalls |=
3020 F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
3021
3022 // We need to reduce the strength of any inlined tail calls. For
3023 // musttail, we have to avoid introducing potential unbounded stack
3024 // growth. For example, if functions 'f' and 'g' are mutually recursive
3025 // with musttail, we can inline 'g' into 'f' so long as we preserve
3026 // musttail on the cloned call to 'f'. If either the inlined call site
3027 // or the cloned call site is *not* musttail, the program already has
3028 // one frame of stack growth, so it's safe to remove musttail. Here is
3029 // a table of example transformations:
3030 //
3031 // f -> musttail g -> musttail f ==> f -> musttail f
3032 // f -> musttail g -> tail f ==> f -> tail f
3033 // f -> g -> musttail f ==> f -> f
3034 // f -> g -> tail f ==> f -> f
3035 //
3036 // Inlined notail calls should remain notail calls.
3037 CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
3038 if (ChildTCK != CallInst::TCK_NoTail)
3039 ChildTCK = std::min(a: CallSiteTailKind, b: ChildTCK);
3040 CI->setTailCallKind(ChildTCK);
3041 InlinedMustTailCalls |= CI->isMustTailCall();
3042
3043 // Call sites inlined through a 'nounwind' call site should be
3044 // 'nounwind' as well. However, avoid marking call sites explicitly
3045 // where possible. This helps expose more opportunities for CSE after
3046 // inlining, commonly when the callee is an intrinsic.
3047 if (MarkNoUnwind && !CI->doesNotThrow())
3048 CI->setDoesNotThrow();
3049 }
3050 }
3051 }
3052
3053 // Leave lifetime markers for the static alloca's, scoping them to the
3054 // function we just inlined.
3055 // We need to insert lifetime intrinsics even at O0 to avoid invalid
3056 // access caused by multithreaded coroutines. The check
3057 // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.
3058 if ((InsertLifetime || Caller->isPresplitCoroutine()) &&
3059 !IFI.StaticAllocas.empty()) {
3060 IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin());
3061 for (AllocaInst *AI : IFI.StaticAllocas) {
3062 // Don't mark swifterror allocas. They can't have bitcast uses.
3063 if (AI->isSwiftError())
3064 continue;
3065
3066 // If the alloca is already scoped to something smaller than the whole
3067 // function then there's no need to add redundant, less accurate markers.
3068 if (hasLifetimeMarkers(AI))
3069 continue;
3070
3071 std::optional<TypeSize> Size = AI->getAllocationSize(DL: AI->getDataLayout());
3072 if (Size && Size->isZero())
3073 continue;
3074
3075 builder.CreateLifetimeStart(Ptr: AI);
3076 for (ReturnInst *RI : Returns) {
3077 // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
3078 // call and a return. The return kills all local allocas.
3079 if (InlinedMustTailCalls &&
3080 RI->getParent()->getTerminatingMustTailCall())
3081 continue;
3082 if (InlinedDeoptimizeCalls &&
3083 RI->getParent()->getTerminatingDeoptimizeCall())
3084 continue;
3085 IRBuilder<>(RI).CreateLifetimeEnd(Ptr: AI);
3086 }
3087 }
3088 }
3089
3090 // If the inlined code contained dynamic alloca instructions, wrap the inlined
3091 // code with llvm.stacksave/llvm.stackrestore intrinsics.
3092 if (InlinedFunctionInfo.ContainsDynamicAllocas) {
3093 // Insert the llvm.stacksave.
3094 CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
3095 .CreateStackSave(Name: "savedstack");
3096
3097 // Insert a call to llvm.stackrestore before any return instructions in the
3098 // inlined function.
3099 for (ReturnInst *RI : Returns) {
3100 // Don't insert llvm.stackrestore calls between a musttail or deoptimize
3101 // call and a return. The return will restore the stack pointer.
3102 if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
3103 continue;
3104 if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
3105 continue;
3106 IRBuilder<>(RI).CreateStackRestore(Ptr: SavedPtr);
3107 }
3108 }
3109
3110 // If we are inlining for an invoke instruction, we must make sure to rewrite
3111 // any call instructions into invoke instructions. This is sensitive to which
3112 // funclet pads were top-level in the inlinee, so must be done before
3113 // rewriting the "parent pad" links.
3114 if (auto *II = dyn_cast<InvokeInst>(Val: &CB)) {
3115 BasicBlock *UnwindDest = II->getUnwindDest();
3116 BasicBlock::iterator FirstNonPHI = UnwindDest->getFirstNonPHIIt();
3117 if (isa<LandingPadInst>(Val: FirstNonPHI)) {
3118 HandleInlinedLandingPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
3119 } else {
3120 HandleInlinedEHPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
3121 }
3122 }
3123
3124 // Update the lexical scopes of the new funclets and callsites.
3125 // Anything that had 'none' as its parent is now nested inside the callsite's
3126 // EHPad.
3127 if (IFI.CallSiteEHPad) {
3128 for (Function::iterator BB = FirstNewBlock->getIterator(),
3129 E = Caller->end();
3130 BB != E; ++BB) {
3131 // Add bundle operands to inlined call sites.
3132 PropagateOperandBundles(InlinedBB: BB, CallSiteEHPad: IFI.CallSiteEHPad);
3133
3134 // It is problematic if the inlinee has a cleanupret which unwinds to
3135 // caller and we inline it into a call site which doesn't unwind but into
3136 // an EH pad that does. Such an edge must be dynamically unreachable.
3137 // As such, we replace the cleanupret with unreachable.
3138 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator()))
3139 if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
3140 changeToUnreachable(I: CleanupRet);
3141
3142 BasicBlock::iterator I = BB->getFirstNonPHIIt();
3143 if (!I->isEHPad())
3144 continue;
3145
3146 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) {
3147 if (isa<ConstantTokenNone>(Val: CatchSwitch->getParentPad()))
3148 CatchSwitch->setParentPad(IFI.CallSiteEHPad);
3149 } else {
3150 auto *FPI = cast<FuncletPadInst>(Val&: I);
3151 if (isa<ConstantTokenNone>(Val: FPI->getParentPad()))
3152 FPI->setParentPad(IFI.CallSiteEHPad);
3153 }
3154 }
3155 }
3156
3157 if (InlinedDeoptimizeCalls) {
3158 // We need to at least remove the deoptimizing returns from the Return set,
3159 // so that the control flow from those returns does not get merged into the
3160 // caller (but terminate it instead). If the caller's return type does not
3161 // match the callee's return type, we also need to change the return type of
3162 // the intrinsic.
3163 if (Caller->getReturnType() == CB.getType()) {
3164 llvm::erase_if(C&: Returns, P: [](ReturnInst *RI) {
3165 return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
3166 });
3167 } else {
3168 SmallVector<ReturnInst *, 8> NormalReturns;
3169 Function *NewDeoptIntrinsic = Intrinsic::getOrInsertDeclaration(
3170 M: Caller->getParent(), id: Intrinsic::experimental_deoptimize,
3171 OverloadTys: {Caller->getReturnType()});
3172
3173 for (ReturnInst *RI : Returns) {
3174 CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
3175 if (!DeoptCall) {
3176 NormalReturns.push_back(Elt: RI);
3177 continue;
3178 }
3179
3180 // The calling convention on the deoptimize call itself may be bogus,
3181 // since the code we're inlining may have undefined behavior (and may
3182 // never actually execute at runtime); but all
3183 // @llvm.experimental.deoptimize declarations have to have the same
3184 // calling convention in a well-formed module.
3185 auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
3186 NewDeoptIntrinsic->setCallingConv(CallingConv);
3187 auto *CurBB = RI->getParent();
3188 RI->eraseFromParent();
3189
3190 SmallVector<Value *, 4> CallArgs(DeoptCall->args());
3191
3192 SmallVector<OperandBundleDef, 1> OpBundles;
3193 DeoptCall->getOperandBundlesAsDefs(Defs&: OpBundles);
3194 auto DeoptAttributes = DeoptCall->getAttributes();
3195 DeoptCall->eraseFromParent();
3196 assert(!OpBundles.empty() &&
3197 "Expected at least the deopt operand bundle");
3198
3199 IRBuilder<> Builder(CurBB);
3200 CallInst *NewDeoptCall =
3201 Builder.CreateCall(Callee: NewDeoptIntrinsic, Args: CallArgs, OpBundles);
3202 NewDeoptCall->setCallingConv(CallingConv);
3203 NewDeoptCall->setAttributes(DeoptAttributes);
3204 if (NewDeoptCall->getType()->isVoidTy())
3205 Builder.CreateRetVoid();
3206 else
3207 Builder.CreateRet(V: NewDeoptCall);
3208 // Since the ret type is changed, remove the incompatible attributes.
3209 NewDeoptCall->removeRetAttrs(AttrsToRemove: AttributeFuncs::typeIncompatible(
3210 Ty: NewDeoptCall->getType(), AS: NewDeoptCall->getRetAttributes()));
3211 }
3212
3213 // Leave behind the normal returns so we can merge control flow.
3214 std::swap(LHS&: Returns, RHS&: NormalReturns);
3215 }
3216 }
3217
3218 // Handle any inlined musttail call sites. In order for a new call site to be
3219 // musttail, the source of the clone and the inlined call site must have been
3220 // musttail. Therefore it's safe to return without merging control into the
3221 // phi below.
3222 if (InlinedMustTailCalls) {
3223 // Check if we need to bitcast the result of any musttail calls.
3224 Type *NewRetTy = Caller->getReturnType();
3225 bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy;
3226
3227 // Handle the returns preceded by musttail calls separately.
3228 SmallVector<ReturnInst *, 8> NormalReturns;
3229 for (ReturnInst *RI : Returns) {
3230 CallInst *ReturnedMustTail =
3231 RI->getParent()->getTerminatingMustTailCall();
3232 if (!ReturnedMustTail) {
3233 NormalReturns.push_back(Elt: RI);
3234 continue;
3235 }
3236 if (!NeedBitCast)
3237 continue;
3238
3239 // Delete the old return and any preceding bitcast.
3240 BasicBlock *CurBB = RI->getParent();
3241 auto *OldCast = dyn_cast_or_null<BitCastInst>(Val: RI->getReturnValue());
3242 RI->eraseFromParent();
3243 if (OldCast)
3244 OldCast->eraseFromParent();
3245
3246 // Insert a new bitcast and return with the right type.
3247 IRBuilder<> Builder(CurBB);
3248 Builder.CreateRet(V: Builder.CreateBitCast(V: ReturnedMustTail, DestTy: NewRetTy));
3249 }
3250
3251 // Leave behind the normal returns so we can merge control flow.
3252 std::swap(LHS&: Returns, RHS&: NormalReturns);
3253 }
3254
3255 // Now that all of the transforms on the inlined code have taken place but
3256 // before we splice the inlined code into the CFG and lose track of which
3257 // blocks were actually inlined, collect the call sites. We only do this if
3258 // call graph updates weren't requested, as those provide value handle based
3259 // tracking of inlined call sites instead. Calls to intrinsics are not
3260 // collected because they are not inlineable.
3261 if (InlinedFunctionInfo.ContainsCalls) {
3262 // Otherwise just collect the raw call sites that were inlined.
3263 for (BasicBlock &NewBB :
3264 make_range(x: FirstNewBlock->getIterator(), y: Caller->end()))
3265 for (Instruction &I : NewBB)
3266 if (auto *CB = dyn_cast<CallBase>(Val: &I))
3267 if (!(CB->getCalledFunction() &&
3268 CB->getCalledFunction()->isIntrinsic()))
3269 IFI.InlinedCallSites.push_back(Elt: CB);
3270 }
3271
3272 // If we cloned in _exactly one_ basic block, and if that block ends in a
3273 // return instruction, we splice the body of the inlined callee directly into
3274 // the calling basic block.
3275 if (Returns.size() == 1 && std::distance(first: FirstNewBlock, last: Caller->end()) == 1) {
3276 // Move all of the instructions right before the call.
3277 OrigBB->splice(ToIt: CB.getIterator(), FromBB: &*FirstNewBlock, FromBeginIt: FirstNewBlock->begin(),
3278 FromEndIt: FirstNewBlock->end());
3279 // Remove the cloned basic block.
3280 Caller->back().eraseFromParent();
3281
3282 // If the call site was an invoke instruction, add a branch to the normal
3283 // destination.
3284 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
3285 UncondBrInst *NewBr =
3286 UncondBrInst::Create(Target: II->getNormalDest(), InsertBefore: CB.getIterator());
3287 NewBr->setDebugLoc(Returns[0]->getDebugLoc());
3288 }
3289
3290 // If the return instruction returned a value, replace uses of the call with
3291 // uses of the returned value.
3292 if (!CB.use_empty()) {
3293 ReturnInst *R = Returns[0];
3294 if (&CB == R->getReturnValue())
3295 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3296 else
3297 CB.replaceAllUsesWith(V: R->getReturnValue());
3298 }
3299 // Since we are now done with the Call/Invoke, we can delete it.
3300 CB.eraseFromParent();
3301
3302 // Since we are now done with the return instruction, delete it also.
3303 Returns[0]->eraseFromParent();
3304
3305 if (MergeAttributes)
3306 AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc);
3307
3308 // We are now done with the inlining.
3309 return;
3310 }
3311
3312 // Otherwise, we have the normal case, of more than one block to inline or
3313 // multiple return sites.
3314
3315 // We want to clone the entire callee function into the hole between the
3316 // "starter" and "ender" blocks. How we accomplish this depends on whether
3317 // this is an invoke instruction or a call instruction.
3318 BasicBlock *AfterCallBB;
3319 UncondBrInst *CreatedBranchToNormalDest = nullptr;
3320 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
3321
3322 // Add an unconditional branch to make this look like the CallInst case...
3323 CreatedBranchToNormalDest =
3324 UncondBrInst::Create(Target: II->getNormalDest(), InsertBefore: CB.getIterator());
3325 // We intend to replace this DebugLoc with another later.
3326 CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getTemporary());
3327
3328 // Split the basic block. This guarantees that no PHI nodes will have to be
3329 // updated due to new incoming edges, and make the invoke case more
3330 // symmetric to the call case.
3331 AfterCallBB =
3332 OrigBB->splitBasicBlock(I: CreatedBranchToNormalDest->getIterator(),
3333 BBName: CalledFunc->getName() + ".exit");
3334
3335 } else { // It's a call
3336 // If this is a call instruction, we need to split the basic block that
3337 // the call lives in.
3338 //
3339 AfterCallBB = OrigBB->splitBasicBlock(I: CB.getIterator(),
3340 BBName: CalledFunc->getName() + ".exit");
3341 }
3342
3343 if (IFI.CallerBFI) {
3344 // Copy original BB's block frequency to AfterCallBB
3345 IFI.CallerBFI->setBlockFreq(BB: AfterCallBB,
3346 Freq: IFI.CallerBFI->getBlockFreq(BB: OrigBB));
3347 }
3348
3349 // Change the branch that used to go to AfterCallBB to branch to the first
3350 // basic block of the inlined function.
3351 //
3352 UncondBrInst *Br = cast<UncondBrInst>(Val: OrigBB->getTerminator());
3353 Br->setSuccessor(&*FirstNewBlock);
3354
3355 // Now that the function is correct, make it a little bit nicer. In
3356 // particular, move the basic blocks inserted from the end of the function
3357 // into the space made by splitting the source basic block.
3358 Caller->splice(ToIt: AfterCallBB->getIterator(), FromF: Caller, FromBeginIt: FirstNewBlock,
3359 FromEndIt: Caller->end());
3360
3361 // Handle all of the return instructions that we just cloned in, and eliminate
3362 // any users of the original call/invoke instruction.
3363 Type *RTy = CalledFunc->getReturnType();
3364
3365 PHINode *PHI = nullptr;
3366 if (Returns.size() > 1) {
3367 // The PHI node should go at the front of the new basic block to merge all
3368 // possible incoming values.
3369 if (!CB.use_empty()) {
3370 PHI = PHINode::Create(Ty: RTy, NumReservedValues: Returns.size(), NameStr: CB.getName());
3371 PHI->insertBefore(InsertPos: AfterCallBB->begin());
3372 // Anything that used the result of the function call should now use the
3373 // PHI node as their operand.
3374 CB.replaceAllUsesWith(V: PHI);
3375 }
3376
3377 // Loop over all of the return instructions adding entries to the PHI node
3378 // as appropriate.
3379 if (PHI) {
3380 for (ReturnInst *RI : Returns) {
3381 assert(RI->getReturnValue()->getType() == PHI->getType() &&
3382 "Ret value not consistent in function!");
3383 PHI->addIncoming(V: RI->getReturnValue(), BB: RI->getParent());
3384 }
3385 }
3386
3387 // Add a branch to the merge points and remove return instructions.
3388 DebugLoc Loc;
3389 for (ReturnInst *RI : Returns) {
3390 UncondBrInst *BI = UncondBrInst::Create(Target: AfterCallBB, InsertBefore: RI->getIterator());
3391 Loc = RI->getDebugLoc();
3392 BI->setDebugLoc(Loc);
3393 RI->eraseFromParent();
3394 }
3395 // We need to set the debug location to *somewhere* inside the
3396 // inlined function. The line number may be nonsensical, but the
3397 // instruction will at least be associated with the right
3398 // function.
3399 if (CreatedBranchToNormalDest)
3400 CreatedBranchToNormalDest->setDebugLoc(Loc);
3401 } else if (!Returns.empty()) {
3402 // Otherwise, if there is exactly one return value, just replace anything
3403 // using the return value of the call with the computed value.
3404 if (!CB.use_empty()) {
3405 if (&CB == Returns[0]->getReturnValue())
3406 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3407 else
3408 CB.replaceAllUsesWith(V: Returns[0]->getReturnValue());
3409 }
3410
3411 // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
3412 BasicBlock *ReturnBB = Returns[0]->getParent();
3413 ReturnBB->replaceAllUsesWith(V: AfterCallBB);
3414
3415 // Splice the code from the return block into the block that it will return
3416 // to, which contains the code that was after the call.
3417 AfterCallBB->splice(ToIt: AfterCallBB->begin(), FromBB: ReturnBB);
3418
3419 if (CreatedBranchToNormalDest)
3420 CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
3421
3422 // Delete the return instruction now and empty ReturnBB now.
3423 Returns[0]->eraseFromParent();
3424 ReturnBB->eraseFromParent();
3425 } else if (!CB.use_empty()) {
3426 // In this case there are no returns to use, so there is no clear source
3427 // location for the "return".
3428 // FIXME: It may be correct to use the scope end line of the function here,
3429 // since this likely means we are falling out of the function.
3430 if (CreatedBranchToNormalDest)
3431 CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getUnknown());
3432 // No returns, but something is using the return value of the call. Just
3433 // nuke the result.
3434 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3435 }
3436
3437 // Since we are now done with the Call/Invoke, we can delete it.
3438 CB.eraseFromParent();
3439
3440 // If we inlined any musttail calls and the original return is now
3441 // unreachable, delete it. It can only contain a bitcast and ret.
3442 if (InlinedMustTailCalls && pred_empty(BB: AfterCallBB))
3443 AfterCallBB->eraseFromParent();
3444
3445 // We should always be able to fold the entry block of the function into the
3446 // single predecessor of the block...
3447 BasicBlock *CalleeEntry = Br->getSuccessor();
3448
3449 // Splice the code entry block into calling block, right before the
3450 // unconditional branch.
3451 CalleeEntry->replaceAllUsesWith(V: OrigBB); // Update PHI nodes
3452 OrigBB->splice(ToIt: Br->getIterator(), FromBB: CalleeEntry);
3453
3454 // Remove the unconditional branch.
3455 Br->eraseFromParent();
3456
3457 // Now we can remove the CalleeEntry block, which is now empty.
3458 CalleeEntry->eraseFromParent();
3459
3460 // If we inserted a phi node, check to see if it has a single value (e.g. all
3461 // the entries are the same or undef). If so, remove the PHI so it doesn't
3462 // block other optimizations.
3463 if (PHI) {
3464 AssumptionCache *AC =
3465 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
3466 auto &DL = Caller->getDataLayout();
3467 if (Value *V = simplifyInstruction(I: PHI, Q: {DL, nullptr, nullptr, AC})) {
3468 PHI->replaceAllUsesWith(V);
3469 PHI->eraseFromParent();
3470 }
3471 }
3472
3473 if (MergeAttributes)
3474 AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc);
3475}
3476
3477llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
3478 bool MergeAttributes,
3479 AAResults *CalleeAAR,
3480 bool InsertLifetime,
3481 Function *ForwardVarArgsTo,
3482 OptimizationRemarkEmitter *ORE) {
3483 llvm::InlineResult Result = CanInlineCallSite(CB, IFI);
3484 if (Result.isSuccess()) {
3485 InlineFunctionImpl(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
3486 ForwardVarArgsTo, ORE);
3487 }
3488
3489 return Result;
3490}
3491
3492bool llvm::inlineHistoryIncludes(
3493 Function *F, int InlineHistoryID,
3494 ArrayRef<std::pair<Function *, int>> InlineHistory) {
3495 while (InlineHistoryID != -1) {
3496 assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
3497 "Invalid inline history ID");
3498 if (InlineHistory[InlineHistoryID].first == F)
3499 return true;
3500 InlineHistoryID = InlineHistory[InlineHistoryID].second;
3501 }
3502 return false;
3503}
3504