1//===- InlineFunction.cpp - Code to perform function inlining -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements inlining of a function into a call site, resolving
10// parameters and the return value as appropriate.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/SetVector.h"
17#include "llvm/ADT/SmallPtrSet.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/ADT/iterator_range.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Analysis/AssumptionCache.h"
23#include "llvm/Analysis/BlockFrequencyInfo.h"
24#include "llvm/Analysis/CallGraph.h"
25#include "llvm/Analysis/CaptureTracking.h"
26#include "llvm/Analysis/CtxProfAnalysis.h"
27#include "llvm/Analysis/IndirectCallVisitor.h"
28#include "llvm/Analysis/InstructionSimplify.h"
29#include "llvm/Analysis/MemoryProfileInfo.h"
30#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
31#include "llvm/Analysis/ObjCARCUtil.h"
32#include "llvm/Analysis/ProfileSummaryInfo.h"
33#include "llvm/Analysis/ValueTracking.h"
34#include "llvm/Analysis/VectorUtils.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/AttributeMask.h"
37#include "llvm/IR/Attributes.h"
38#include "llvm/IR/BasicBlock.h"
39#include "llvm/IR/CFG.h"
40#include "llvm/IR/Constant.h"
41#include "llvm/IR/ConstantRange.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/DataLayout.h"
44#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/DebugInfoMetadata.h"
46#include "llvm/IR/DebugLoc.h"
47#include "llvm/IR/DerivedTypes.h"
48#include "llvm/IR/Dominators.h"
49#include "llvm/IR/EHPersonalities.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalVariable.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InlineAsm.h"
54#include "llvm/IR/InstrTypes.h"
55#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Instructions.h"
57#include "llvm/IR/IntrinsicInst.h"
58#include "llvm/IR/Intrinsics.h"
59#include "llvm/IR/LLVMContext.h"
60#include "llvm/IR/MDBuilder.h"
61#include "llvm/IR/Metadata.h"
62#include "llvm/IR/Module.h"
63#include "llvm/IR/PatternMatch.h"
64#include "llvm/IR/ProfDataUtils.h"
65#include "llvm/IR/Type.h"
66#include "llvm/IR/User.h"
67#include "llvm/IR/Value.h"
68#include "llvm/Support/Casting.h"
69#include "llvm/Support/CommandLine.h"
70#include "llvm/Support/ErrorHandling.h"
71#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
72#include "llvm/Transforms/Utils/Cloning.h"
73#include "llvm/Transforms/Utils/Local.h"
74#include "llvm/Transforms/Utils/ValueMapper.h"
75#include <algorithm>
76#include <cassert>
77#include <cstdint>
78#include <deque>
79#include <iterator>
80#include <optional>
81#include <string>
82#include <utility>
83#include <vector>
84
85#define DEBUG_TYPE "inline-function"
86
87using namespace llvm;
88using namespace llvm::memprof;
89using ProfileCount = Function::ProfileCount;
90
91static cl::opt<bool>
92EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(Val: true),
93 cl::Hidden,
94 cl::desc("Convert noalias attributes to metadata during inlining."));
95
96static cl::opt<bool>
97 UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
98 cl::init(Val: true),
99 cl::desc("Use the llvm.experimental.noalias.scope.decl "
100 "intrinsic during inlining."));
101
102// Disabled by default, because the added alignment assumptions may increase
103// compile-time and block optimizations. This option is not suitable for use
104// with frontends that emit comprehensive parameter alignment annotations.
105static cl::opt<bool>
106PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
107 cl::init(Val: false), cl::Hidden,
108 cl::desc("Convert align attributes to assumptions during inlining."));
109
110static cl::opt<unsigned> InlinerAttributeWindow(
111 "max-inst-checked-for-throw-during-inlining", cl::Hidden,
112 cl::desc("the maximum number of instructions analyzed for may throw during "
113 "attribute inference in inlined body"),
114 cl::init(Val: 4));
115
116namespace {
117
118 /// A class for recording information about inlining a landing pad.
119 class LandingPadInliningInfo {
120 /// Destination of the invoke's unwind.
121 BasicBlock *OuterResumeDest;
122
123 /// Destination for the callee's resume.
124 BasicBlock *InnerResumeDest = nullptr;
125
126 /// LandingPadInst associated with the invoke.
127 LandingPadInst *CallerLPad = nullptr;
128
129 /// PHI for EH values from landingpad insts.
130 PHINode *InnerEHValuesPHI = nullptr;
131
132 SmallVector<Value*, 8> UnwindDestPHIValues;
133
134 public:
135 LandingPadInliningInfo(InvokeInst *II)
136 : OuterResumeDest(II->getUnwindDest()) {
137 // If there are PHI nodes in the unwind destination block, we need to keep
138 // track of which values came into them from the invoke before removing
139 // the edge from this block.
140 BasicBlock *InvokeBB = II->getParent();
141 BasicBlock::iterator I = OuterResumeDest->begin();
142 for (; isa<PHINode>(Val: I); ++I) {
143 // Save the value to use for this edge.
144 PHINode *PHI = cast<PHINode>(Val&: I);
145 UnwindDestPHIValues.push_back(Elt: PHI->getIncomingValueForBlock(BB: InvokeBB));
146 }
147
148 CallerLPad = cast<LandingPadInst>(Val&: I);
149 }
150
151 /// The outer unwind destination is the target of
152 /// unwind edges introduced for calls within the inlined function.
153 BasicBlock *getOuterResumeDest() const {
154 return OuterResumeDest;
155 }
156
157 BasicBlock *getInnerResumeDest();
158
159 LandingPadInst *getLandingPadInst() const { return CallerLPad; }
160
161 /// Forward the 'resume' instruction to the caller's landing pad block.
162 /// When the landing pad block has only one predecessor, this is
163 /// a simple branch. When there is more than one predecessor, we need to
164 /// split the landing pad block after the landingpad instruction and jump
165 /// to there.
166 void forwardResume(ResumeInst *RI,
167 SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
168
169 /// Add incoming-PHI values to the unwind destination block for the given
170 /// basic block, using the values for the original invoke's source block.
171 void addIncomingPHIValuesFor(BasicBlock *BB) const {
172 addIncomingPHIValuesForInto(src: BB, dest: OuterResumeDest);
173 }
174
175 void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
176 BasicBlock::iterator I = dest->begin();
177 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
178 PHINode *phi = cast<PHINode>(Val&: I);
179 phi->addIncoming(V: UnwindDestPHIValues[i], BB: src);
180 }
181 }
182 };
183} // end anonymous namespace
184
185static IntrinsicInst *getConvergenceEntry(BasicBlock &BB) {
186 BasicBlock::iterator It = BB.getFirstNonPHIIt();
187 while (It != BB.end()) {
188 if (auto *IntrinsicCall = dyn_cast<ConvergenceControlInst>(Val&: It)) {
189 if (IntrinsicCall->isEntry()) {
190 return IntrinsicCall;
191 }
192 }
193 It = std::next(x: It);
194 }
195 return nullptr;
196}
197
198/// Get or create a target for the branch from ResumeInsts.
199BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
200 if (InnerResumeDest) return InnerResumeDest;
201
202 // Split the landing pad.
203 BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
204 InnerResumeDest =
205 OuterResumeDest->splitBasicBlock(I: SplitPoint,
206 BBName: OuterResumeDest->getName() + ".body");
207
208 // The number of incoming edges we expect to the inner landing pad.
209 const unsigned PHICapacity = 2;
210
211 // Create corresponding new PHIs for all the PHIs in the outer landing pad.
212 BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
213 BasicBlock::iterator I = OuterResumeDest->begin();
214 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
215 PHINode *OuterPHI = cast<PHINode>(Val&: I);
216 PHINode *InnerPHI = PHINode::Create(Ty: OuterPHI->getType(), NumReservedValues: PHICapacity,
217 NameStr: OuterPHI->getName() + ".lpad-body");
218 InnerPHI->insertBefore(InsertPos: InsertPoint);
219 OuterPHI->replaceAllUsesWith(V: InnerPHI);
220 InnerPHI->addIncoming(V: OuterPHI, BB: OuterResumeDest);
221 }
222
223 // Create a PHI for the exception values.
224 InnerEHValuesPHI =
225 PHINode::Create(Ty: CallerLPad->getType(), NumReservedValues: PHICapacity, NameStr: "eh.lpad-body");
226 InnerEHValuesPHI->insertBefore(InsertPos: InsertPoint);
227 CallerLPad->replaceAllUsesWith(V: InnerEHValuesPHI);
228 InnerEHValuesPHI->addIncoming(V: CallerLPad, BB: OuterResumeDest);
229
230 // All done.
231 return InnerResumeDest;
232}
233
234/// Forward the 'resume' instruction to the caller's landing pad block.
235/// When the landing pad block has only one predecessor, this is a simple
236/// branch. When there is more than one predecessor, we need to split the
237/// landing pad block after the landingpad instruction and jump to there.
238void LandingPadInliningInfo::forwardResume(
239 ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
240 BasicBlock *Dest = getInnerResumeDest();
241 BasicBlock *Src = RI->getParent();
242
243 auto *BI = BranchInst::Create(IfTrue: Dest, InsertBefore: Src);
244 BI->setDebugLoc(RI->getDebugLoc());
245
246 // Update the PHIs in the destination. They were inserted in an order which
247 // makes this work.
248 addIncomingPHIValuesForInto(src: Src, dest: Dest);
249
250 InnerEHValuesPHI->addIncoming(V: RI->getOperand(i_nocapture: 0), BB: Src);
251 RI->eraseFromParent();
252}
253
254/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
255static Value *getParentPad(Value *EHPad) {
256 if (auto *FPI = dyn_cast<FuncletPadInst>(Val: EHPad))
257 return FPI->getParentPad();
258 return cast<CatchSwitchInst>(Val: EHPad)->getParentPad();
259}
260
261using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;
262
263/// Helper for getUnwindDestToken that does the descendant-ward part of
264/// the search.
265static Value *getUnwindDestTokenHelper(Instruction *EHPad,
266 UnwindDestMemoTy &MemoMap) {
267 SmallVector<Instruction *, 8> Worklist(1, EHPad);
268
269 while (!Worklist.empty()) {
270 Instruction *CurrentPad = Worklist.pop_back_val();
271 // We only put pads on the worklist that aren't in the MemoMap. When
272 // we find an unwind dest for a pad we may update its ancestors, but
273 // the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
274 // so they should never get updated while queued on the worklist.
275 assert(!MemoMap.count(CurrentPad));
276 Value *UnwindDestToken = nullptr;
277 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: CurrentPad)) {
278 if (CatchSwitch->hasUnwindDest()) {
279 UnwindDestToken = &*CatchSwitch->getUnwindDest()->getFirstNonPHIIt();
280 } else {
281 // Catchswitch doesn't have a 'nounwind' variant, and one might be
282 // annotated as "unwinds to caller" when really it's nounwind (see
283 // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
284 // parent's unwind dest from this. We can check its catchpads'
285 // descendants, since they might include a cleanuppad with an
286 // "unwinds to caller" cleanupret, which can be trusted.
287 for (auto HI = CatchSwitch->handler_begin(),
288 HE = CatchSwitch->handler_end();
289 HI != HE && !UnwindDestToken; ++HI) {
290 BasicBlock *HandlerBlock = *HI;
291 auto *CatchPad =
292 cast<CatchPadInst>(Val: &*HandlerBlock->getFirstNonPHIIt());
293 for (User *Child : CatchPad->users()) {
294 // Intentionally ignore invokes here -- since the catchswitch is
295 // marked "unwind to caller", it would be a verifier error if it
296 // contained an invoke which unwinds out of it, so any invoke we'd
297 // encounter must unwind to some child of the catch.
298 if (!isa<CleanupPadInst>(Val: Child) && !isa<CatchSwitchInst>(Val: Child))
299 continue;
300
301 Instruction *ChildPad = cast<Instruction>(Val: Child);
302 auto Memo = MemoMap.find(Val: ChildPad);
303 if (Memo == MemoMap.end()) {
304 // Haven't figured out this child pad yet; queue it.
305 Worklist.push_back(Elt: ChildPad);
306 continue;
307 }
308 // We've already checked this child, but might have found that
309 // it offers no proof either way.
310 Value *ChildUnwindDestToken = Memo->second;
311 if (!ChildUnwindDestToken)
312 continue;
313 // We already know the child's unwind dest, which can either
314 // be ConstantTokenNone to indicate unwind to caller, or can
315 // be another child of the catchpad. Only the former indicates
316 // the unwind dest of the catchswitch.
317 if (isa<ConstantTokenNone>(Val: ChildUnwindDestToken)) {
318 UnwindDestToken = ChildUnwindDestToken;
319 break;
320 }
321 assert(getParentPad(ChildUnwindDestToken) == CatchPad);
322 }
323 }
324 }
325 } else {
326 auto *CleanupPad = cast<CleanupPadInst>(Val: CurrentPad);
327 for (User *U : CleanupPad->users()) {
328 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: U)) {
329 if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
330 UnwindDestToken = &*RetUnwindDest->getFirstNonPHIIt();
331 else
332 UnwindDestToken = ConstantTokenNone::get(Context&: CleanupPad->getContext());
333 break;
334 }
335 Value *ChildUnwindDestToken;
336 if (auto *Invoke = dyn_cast<InvokeInst>(Val: U)) {
337 ChildUnwindDestToken = &*Invoke->getUnwindDest()->getFirstNonPHIIt();
338 } else if (isa<CleanupPadInst>(Val: U) || isa<CatchSwitchInst>(Val: U)) {
339 Instruction *ChildPad = cast<Instruction>(Val: U);
340 auto Memo = MemoMap.find(Val: ChildPad);
341 if (Memo == MemoMap.end()) {
342 // Haven't resolved this child yet; queue it and keep searching.
343 Worklist.push_back(Elt: ChildPad);
344 continue;
345 }
346 // We've checked this child, but still need to ignore it if it
347 // had no proof either way.
348 ChildUnwindDestToken = Memo->second;
349 if (!ChildUnwindDestToken)
350 continue;
351 } else {
352 // Not a relevant user of the cleanuppad
353 continue;
354 }
355 // In a well-formed program, the child/invoke must either unwind to
356 // an(other) child of the cleanup, or exit the cleanup. In the
357 // first case, continue searching.
358 if (isa<Instruction>(Val: ChildUnwindDestToken) &&
359 getParentPad(EHPad: ChildUnwindDestToken) == CleanupPad)
360 continue;
361 UnwindDestToken = ChildUnwindDestToken;
362 break;
363 }
364 }
365 // If we haven't found an unwind dest for CurrentPad, we may have queued its
366 // children, so move on to the next in the worklist.
367 if (!UnwindDestToken)
368 continue;
369
370 // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
371 // any ancestors of CurrentPad up to but not including UnwindDestToken's
372 // parent pad. Record this in the memo map, and check to see if the
373 // original EHPad being queried is one of the ones exited.
374 Value *UnwindParent;
375 if (auto *UnwindPad = dyn_cast<Instruction>(Val: UnwindDestToken))
376 UnwindParent = getParentPad(EHPad: UnwindPad);
377 else
378 UnwindParent = nullptr;
379 bool ExitedOriginalPad = false;
380 for (Instruction *ExitedPad = CurrentPad;
381 ExitedPad && ExitedPad != UnwindParent;
382 ExitedPad = dyn_cast<Instruction>(Val: getParentPad(EHPad: ExitedPad))) {
383 // Skip over catchpads since they just follow their catchswitches.
384 if (isa<CatchPadInst>(Val: ExitedPad))
385 continue;
386 MemoMap[ExitedPad] = UnwindDestToken;
387 ExitedOriginalPad |= (ExitedPad == EHPad);
388 }
389
390 if (ExitedOriginalPad)
391 return UnwindDestToken;
392
393 // Continue the search.
394 }
395
396 // No definitive information is contained within this funclet.
397 return nullptr;
398}
399
400/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
401/// return that pad instruction. If it unwinds to caller, return
402/// ConstantTokenNone. If it does not have a definitive unwind destination,
403/// return nullptr.
404///
405/// This routine gets invoked for calls in funclets in inlinees when inlining
406/// an invoke. Since many funclets don't have calls inside them, it's queried
407/// on-demand rather than building a map of pads to unwind dests up front.
408/// Determining a funclet's unwind dest may require recursively searching its
409/// descendants, and also ancestors and cousins if the descendants don't provide
410/// an answer. Since most funclets will have their unwind dest immediately
411/// available as the unwind dest of a catchswitch or cleanupret, this routine
412/// searches top-down from the given pad and then up. To avoid worst-case
413/// quadratic run-time given that approach, it uses a memo map to avoid
414/// re-processing funclet trees. The callers that rewrite the IR as they go
415/// take advantage of this, for correctness, by checking/forcing rewritten
416/// pads' entries to match the original callee view.
417static Value *getUnwindDestToken(Instruction *EHPad,
418 UnwindDestMemoTy &MemoMap) {
419 // Catchpads unwind to the same place as their catchswitch;
420 // redirct any queries on catchpads so the code below can
421 // deal with just catchswitches and cleanuppads.
422 if (auto *CPI = dyn_cast<CatchPadInst>(Val: EHPad))
423 EHPad = CPI->getCatchSwitch();
424
425 // Check if we've already determined the unwind dest for this pad.
426 auto Memo = MemoMap.find(Val: EHPad);
427 if (Memo != MemoMap.end())
428 return Memo->second;
429
430 // Search EHPad and, if necessary, its descendants.
431 Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
432 assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
433 if (UnwindDestToken)
434 return UnwindDestToken;
435
436 // No information is available for this EHPad from itself or any of its
437 // descendants. An unwind all the way out to a pad in the caller would
438 // need also to agree with the unwind dest of the parent funclet, so
439 // search up the chain to try to find a funclet with information. Put
440 // null entries in the memo map to avoid re-processing as we go up.
441 MemoMap[EHPad] = nullptr;
442#ifndef NDEBUG
443 SmallPtrSet<Instruction *, 4> TempMemos;
444 TempMemos.insert(EHPad);
445#endif
446 Instruction *LastUselessPad = EHPad;
447 Value *AncestorToken;
448 for (AncestorToken = getParentPad(EHPad);
449 auto *AncestorPad = dyn_cast<Instruction>(Val: AncestorToken);
450 AncestorToken = getParentPad(EHPad: AncestorToken)) {
451 // Skip over catchpads since they just follow their catchswitches.
452 if (isa<CatchPadInst>(Val: AncestorPad))
453 continue;
454 // If the MemoMap had an entry mapping AncestorPad to nullptr, since we
455 // haven't yet called getUnwindDestTokenHelper for AncestorPad in this
456 // call to getUnwindDestToken, that would mean that AncestorPad had no
457 // information in itself, its descendants, or its ancestors. If that
458 // were the case, then we should also have recorded the lack of information
459 // for the descendant that we're coming from. So assert that we don't
460 // find a null entry in the MemoMap for AncestorPad.
461 assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
462 auto AncestorMemo = MemoMap.find(Val: AncestorPad);
463 if (AncestorMemo == MemoMap.end()) {
464 UnwindDestToken = getUnwindDestTokenHelper(EHPad: AncestorPad, MemoMap);
465 } else {
466 UnwindDestToken = AncestorMemo->second;
467 }
468 if (UnwindDestToken)
469 break;
470 LastUselessPad = AncestorPad;
471 MemoMap[LastUselessPad] = nullptr;
472#ifndef NDEBUG
473 TempMemos.insert(LastUselessPad);
474#endif
475 }
476
477 // We know that getUnwindDestTokenHelper was called on LastUselessPad and
478 // returned nullptr (and likewise for EHPad and any of its ancestors up to
479 // LastUselessPad), so LastUselessPad has no information from below. Since
480 // getUnwindDestTokenHelper must investigate all downward paths through
481 // no-information nodes to prove that a node has no information like this,
482 // and since any time it finds information it records it in the MemoMap for
483 // not just the immediately-containing funclet but also any ancestors also
484 // exited, it must be the case that, walking downward from LastUselessPad,
485 // visiting just those nodes which have not been mapped to an unwind dest
486 // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
487 // they are just used to keep getUnwindDestTokenHelper from repeating work),
488 // any node visited must have been exhaustively searched with no information
489 // for it found.
490 SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
491 while (!Worklist.empty()) {
492 Instruction *UselessPad = Worklist.pop_back_val();
493 auto Memo = MemoMap.find(Val: UselessPad);
494 if (Memo != MemoMap.end() && Memo->second) {
495 // Here the name 'UselessPad' is a bit of a misnomer, because we've found
496 // that it is a funclet that does have information about unwinding to
497 // a particular destination; its parent was a useless pad.
498 // Since its parent has no information, the unwind edge must not escape
499 // the parent, and must target a sibling of this pad. This local unwind
500 // gives us no information about EHPad. Leave it and the subtree rooted
501 // at it alone.
502 assert(getParentPad(Memo->second) == getParentPad(UselessPad));
503 continue;
504 }
505 // We know we don't have information for UselesPad. If it has an entry in
506 // the MemoMap (mapping it to nullptr), it must be one of the TempMemos
507 // added on this invocation of getUnwindDestToken; if a previous invocation
508 // recorded nullptr, it would have had to prove that the ancestors of
509 // UselessPad, which include LastUselessPad, had no information, and that
510 // in turn would have required proving that the descendants of
511 // LastUselesPad, which include EHPad, have no information about
512 // LastUselessPad, which would imply that EHPad was mapped to nullptr in
513 // the MemoMap on that invocation, which isn't the case if we got here.
514 assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
515 // Assert as we enumerate users that 'UselessPad' doesn't have any unwind
516 // information that we'd be contradicting by making a map entry for it
517 // (which is something that getUnwindDestTokenHelper must have proved for
518 // us to get here). Just assert on is direct users here; the checks in
519 // this downward walk at its descendants will verify that they don't have
520 // any unwind edges that exit 'UselessPad' either (i.e. they either have no
521 // unwind edges or unwind to a sibling).
522 MemoMap[UselessPad] = UnwindDestToken;
523 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: UselessPad)) {
524 assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
525 for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
526 auto *CatchPad = &*HandlerBlock->getFirstNonPHIIt();
527 for (User *U : CatchPad->users()) {
528 assert((!isa<InvokeInst>(U) ||
529 (getParentPad(&*cast<InvokeInst>(U)
530 ->getUnwindDest()
531 ->getFirstNonPHIIt()) == CatchPad)) &&
532 "Expected useless pad");
533 if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U))
534 Worklist.push_back(Elt: cast<Instruction>(Val: U));
535 }
536 }
537 } else {
538 assert(isa<CleanupPadInst>(UselessPad));
539 for (User *U : UselessPad->users()) {
540 assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
541 assert(
542 (!isa<InvokeInst>(U) ||
543 (getParentPad(
544 &*cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHIIt()) ==
545 UselessPad)) &&
546 "Expected useless pad");
547 if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U))
548 Worklist.push_back(Elt: cast<Instruction>(Val: U));
549 }
550 }
551 }
552
553 return UnwindDestToken;
554}
555
556/// When we inline a basic block into an invoke,
557/// we have to turn all of the calls that can throw into invokes.
558/// This function analyze BB to see if there are any calls, and if so,
559/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
560/// nodes in that block with the values specified in InvokeDestPHIValues.
561static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
562 BasicBlock *BB, BasicBlock *UnwindEdge,
563 UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
564 for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
565 // We only need to check for function calls: inlined invoke
566 // instructions require no special handling.
567 CallInst *CI = dyn_cast<CallInst>(Val: &I);
568
569 if (!CI || CI->doesNotThrow())
570 continue;
571
572 // We do not need to (and in fact, cannot) convert possibly throwing calls
573 // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
574 // invokes. The caller's "segment" of the deoptimization continuation
575 // attached to the newly inlined @llvm.experimental_deoptimize
576 // (resp. @llvm.experimental.guard) call should contain the exception
577 // handling logic, if any.
578 if (auto *F = CI->getCalledFunction())
579 if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
580 F->getIntrinsicID() == Intrinsic::experimental_guard)
581 continue;
582
583 if (auto FuncletBundle = CI->getOperandBundle(ID: LLVMContext::OB_funclet)) {
584 // This call is nested inside a funclet. If that funclet has an unwind
585 // destination within the inlinee, then unwinding out of this call would
586 // be UB. Rewriting this call to an invoke which targets the inlined
587 // invoke's unwind dest would give the call's parent funclet multiple
588 // unwind destinations, which is something that subsequent EH table
589 // generation can't handle and that the veirifer rejects. So when we
590 // see such a call, leave it as a call.
591 auto *FuncletPad = cast<Instruction>(Val: FuncletBundle->Inputs[0]);
592 Value *UnwindDestToken =
593 getUnwindDestToken(EHPad: FuncletPad, MemoMap&: *FuncletUnwindMap);
594 if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
595 continue;
596#ifndef NDEBUG
597 Instruction *MemoKey;
598 if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
599 MemoKey = CatchPad->getCatchSwitch();
600 else
601 MemoKey = FuncletPad;
602 assert(FuncletUnwindMap->count(MemoKey) &&
603 (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
604 "must get memoized to avoid confusing later searches");
605#endif // NDEBUG
606 }
607
608 changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
609 return BB;
610 }
611 return nullptr;
612}
613
614/// If we inlined an invoke site, we need to convert calls
615/// in the body of the inlined function into invokes.
616///
617/// II is the invoke instruction being inlined. FirstNewBlock is the first
618/// block of the inlined code (the last block is the end of the function),
619/// and InlineCodeInfo is information about the code that got inlined.
620static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
621 ClonedCodeInfo &InlinedCodeInfo) {
622 BasicBlock *InvokeDest = II->getUnwindDest();
623
624 Function *Caller = FirstNewBlock->getParent();
625
626 // The inlined code is currently at the end of the function, scan from the
627 // start of the inlined code to its end, checking for stuff we need to
628 // rewrite.
629 LandingPadInliningInfo Invoke(II);
630
631 // Get all of the inlined landing pad instructions.
632 SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
633 for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
634 I != E; ++I)
635 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: I->getTerminator()))
636 InlinedLPads.insert(Ptr: II->getLandingPadInst());
637
638 // Append the clauses from the outer landing pad instruction into the inlined
639 // landing pad instructions.
640 LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
641 for (LandingPadInst *InlinedLPad : InlinedLPads) {
642 unsigned OuterNum = OuterLPad->getNumClauses();
643 InlinedLPad->reserveClauses(Size: OuterNum);
644 for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
645 InlinedLPad->addClause(ClauseVal: OuterLPad->getClause(Idx: OuterIdx));
646 if (OuterLPad->isCleanup())
647 InlinedLPad->setCleanup(true);
648 }
649
650 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
651 BB != E; ++BB) {
652 if (InlinedCodeInfo.ContainsCalls)
653 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
654 BB: &*BB, UnwindEdge: Invoke.getOuterResumeDest()))
655 // Update any PHI nodes in the exceptional block to indicate that there
656 // is now a new entry in them.
657 Invoke.addIncomingPHIValuesFor(BB: NewBB);
658
659 // Forward any resumes that are remaining here.
660 if (ResumeInst *RI = dyn_cast<ResumeInst>(Val: BB->getTerminator()))
661 Invoke.forwardResume(RI, InlinedLPads);
662 }
663
664 // Now that everything is happy, we have one final detail. The PHI nodes in
665 // the exception destination block still have entries due to the original
666 // invoke instruction. Eliminate these entries (which might even delete the
667 // PHI node) now.
668 InvokeDest->removePredecessor(Pred: II->getParent());
669}
670
671/// If we inlined an invoke site, we need to convert calls
672/// in the body of the inlined function into invokes.
673///
674/// II is the invoke instruction being inlined. FirstNewBlock is the first
675/// block of the inlined code (the last block is the end of the function),
676/// and InlineCodeInfo is information about the code that got inlined.
677static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
678 ClonedCodeInfo &InlinedCodeInfo) {
679 BasicBlock *UnwindDest = II->getUnwindDest();
680 Function *Caller = FirstNewBlock->getParent();
681
682 assert(UnwindDest->getFirstNonPHIIt()->isEHPad() && "unexpected BasicBlock!");
683
684 // If there are PHI nodes in the unwind destination block, we need to keep
685 // track of which values came into them from the invoke before removing the
686 // edge from this block.
687 SmallVector<Value *, 8> UnwindDestPHIValues;
688 BasicBlock *InvokeBB = II->getParent();
689 for (PHINode &PHI : UnwindDest->phis()) {
690 // Save the value to use for this edge.
691 UnwindDestPHIValues.push_back(Elt: PHI.getIncomingValueForBlock(BB: InvokeBB));
692 }
693
694 // Add incoming-PHI values to the unwind destination block for the given basic
695 // block, using the values for the original invoke's source block.
696 auto UpdatePHINodes = [&](BasicBlock *Src) {
697 BasicBlock::iterator I = UnwindDest->begin();
698 for (Value *V : UnwindDestPHIValues) {
699 PHINode *PHI = cast<PHINode>(Val&: I);
700 PHI->addIncoming(V, BB: Src);
701 ++I;
702 }
703 };
704
705 // This connects all the instructions which 'unwind to caller' to the invoke
706 // destination.
707 UnwindDestMemoTy FuncletUnwindMap;
708 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
709 BB != E; ++BB) {
710 if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator())) {
711 if (CRI->unwindsToCaller()) {
712 auto *CleanupPad = CRI->getCleanupPad();
713 CleanupReturnInst::Create(CleanupPad, UnwindBB: UnwindDest, InsertBefore: CRI->getIterator());
714 CRI->eraseFromParent();
715 UpdatePHINodes(&*BB);
716 // Finding a cleanupret with an unwind destination would confuse
717 // subsequent calls to getUnwindDestToken, so map the cleanuppad
718 // to short-circuit any such calls and recognize this as an "unwind
719 // to caller" cleanup.
720 assert(!FuncletUnwindMap.count(CleanupPad) ||
721 isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
722 FuncletUnwindMap[CleanupPad] =
723 ConstantTokenNone::get(Context&: Caller->getContext());
724 }
725 }
726
727 BasicBlock::iterator I = BB->getFirstNonPHIIt();
728 if (!I->isEHPad())
729 continue;
730
731 Instruction *Replacement = nullptr;
732 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) {
733 if (CatchSwitch->unwindsToCaller()) {
734 Value *UnwindDestToken;
735 if (auto *ParentPad =
736 dyn_cast<Instruction>(Val: CatchSwitch->getParentPad())) {
737 // This catchswitch is nested inside another funclet. If that
738 // funclet has an unwind destination within the inlinee, then
739 // unwinding out of this catchswitch would be UB. Rewriting this
740 // catchswitch to unwind to the inlined invoke's unwind dest would
741 // give the parent funclet multiple unwind destinations, which is
742 // something that subsequent EH table generation can't handle and
743 // that the veirifer rejects. So when we see such a call, leave it
744 // as "unwind to caller".
745 UnwindDestToken = getUnwindDestToken(EHPad: ParentPad, MemoMap&: FuncletUnwindMap);
746 if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
747 continue;
748 } else {
749 // This catchswitch has no parent to inherit constraints from, and
750 // none of its descendants can have an unwind edge that exits it and
751 // targets another funclet in the inlinee. It may or may not have a
752 // descendant that definitively has an unwind to caller. In either
753 // case, we'll have to assume that any unwinds out of it may need to
754 // be routed to the caller, so treat it as though it has a definitive
755 // unwind to caller.
756 UnwindDestToken = ConstantTokenNone::get(Context&: Caller->getContext());
757 }
758 auto *NewCatchSwitch = CatchSwitchInst::Create(
759 ParentPad: CatchSwitch->getParentPad(), UnwindDest,
760 NumHandlers: CatchSwitch->getNumHandlers(), NameStr: CatchSwitch->getName(),
761 InsertBefore: CatchSwitch->getIterator());
762 for (BasicBlock *PadBB : CatchSwitch->handlers())
763 NewCatchSwitch->addHandler(Dest: PadBB);
764 // Propagate info for the old catchswitch over to the new one in
765 // the unwind map. This also serves to short-circuit any subsequent
766 // checks for the unwind dest of this catchswitch, which would get
767 // confused if they found the outer handler in the callee.
768 FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
769 Replacement = NewCatchSwitch;
770 }
771 } else if (!isa<FuncletPadInst>(Val: I)) {
772 llvm_unreachable("unexpected EHPad!");
773 }
774
775 if (Replacement) {
776 Replacement->takeName(V: &*I);
777 I->replaceAllUsesWith(V: Replacement);
778 I->eraseFromParent();
779 UpdatePHINodes(&*BB);
780 }
781 }
782
783 if (InlinedCodeInfo.ContainsCalls)
784 for (Function::iterator BB = FirstNewBlock->getIterator(),
785 E = Caller->end();
786 BB != E; ++BB)
787 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
788 BB: &*BB, UnwindEdge: UnwindDest, FuncletUnwindMap: &FuncletUnwindMap))
789 // Update any PHI nodes in the exceptional block to indicate that there
790 // is now a new entry in them.
791 UpdatePHINodes(NewBB);
792
793 // Now that everything is happy, we have one final detail. The PHI nodes in
794 // the exception destination block still have entries due to the original
795 // invoke instruction. Eliminate these entries (which might even delete the
796 // PHI node) now.
797 UnwindDest->removePredecessor(Pred: InvokeBB);
798}
799
800static bool haveCommonPrefix(MDNode *MIBStackContext,
801 MDNode *CallsiteStackContext) {
802 assert(MIBStackContext->getNumOperands() > 0 &&
803 CallsiteStackContext->getNumOperands() > 0);
804 // Because of the context trimming performed during matching, the callsite
805 // context could have more stack ids than the MIB. We match up to the end of
806 // the shortest stack context.
807 for (auto MIBStackIter = MIBStackContext->op_begin(),
808 CallsiteStackIter = CallsiteStackContext->op_begin();
809 MIBStackIter != MIBStackContext->op_end() &&
810 CallsiteStackIter != CallsiteStackContext->op_end();
811 MIBStackIter++, CallsiteStackIter++) {
812 auto *Val1 = mdconst::dyn_extract<ConstantInt>(MD: *MIBStackIter);
813 auto *Val2 = mdconst::dyn_extract<ConstantInt>(MD: *CallsiteStackIter);
814 assert(Val1 && Val2);
815 if (Val1->getZExtValue() != Val2->getZExtValue())
816 return false;
817 }
818 return true;
819}
820
821static void removeMemProfMetadata(CallBase *Call) {
822 Call->setMetadata(KindID: LLVMContext::MD_memprof, Node: nullptr);
823}
824
825static void removeCallsiteMetadata(CallBase *Call) {
826 Call->setMetadata(KindID: LLVMContext::MD_callsite, Node: nullptr);
827}
828
829static void updateMemprofMetadata(CallBase *CI,
830 const std::vector<Metadata *> &MIBList,
831 OptimizationRemarkEmitter *ORE) {
832 assert(!MIBList.empty());
833 // Remove existing memprof, which will either be replaced or may not be needed
834 // if we are able to use a single allocation type function attribute.
835 removeMemProfMetadata(Call: CI);
836 CallStackTrie CallStack(ORE);
837 for (Metadata *MIB : MIBList)
838 CallStack.addCallStack(MIB: cast<MDNode>(Val: MIB));
839 bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI);
840 assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof));
841 if (!MemprofMDAttached)
842 // If we used a function attribute remove the callsite metadata as well.
843 removeCallsiteMetadata(Call: CI);
844}
845
846// Update the metadata on the inlined copy ClonedCall of a call OrigCall in the
847// inlined callee body, based on the callsite metadata InlinedCallsiteMD from
848// the call that was inlined.
849static void propagateMemProfHelper(const CallBase *OrigCall,
850 CallBase *ClonedCall,
851 MDNode *InlinedCallsiteMD,
852 OptimizationRemarkEmitter *ORE) {
853 MDNode *OrigCallsiteMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_callsite);
854 MDNode *ClonedCallsiteMD = nullptr;
855 // Check if the call originally had callsite metadata, and update it for the
856 // new call in the inlined body.
857 if (OrigCallsiteMD) {
858 // The cloned call's context is now the concatenation of the original call's
859 // callsite metadata and the callsite metadata on the call where it was
860 // inlined.
861 ClonedCallsiteMD = MDNode::concatenate(A: OrigCallsiteMD, B: InlinedCallsiteMD);
862 ClonedCall->setMetadata(KindID: LLVMContext::MD_callsite, Node: ClonedCallsiteMD);
863 }
864
865 // Update any memprof metadata on the cloned call.
866 MDNode *OrigMemProfMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_memprof);
867 if (!OrigMemProfMD)
868 return;
869 // We currently expect that allocations with memprof metadata also have
870 // callsite metadata for the allocation's part of the context.
871 assert(OrigCallsiteMD);
872
873 // New call's MIB list.
874 std::vector<Metadata *> NewMIBList;
875
876 // For each MIB metadata, check if its call stack context starts with the
877 // new clone's callsite metadata. If so, that MIB goes onto the cloned call in
878 // the inlined body. If not, it stays on the out-of-line original call.
879 for (auto &MIBOp : OrigMemProfMD->operands()) {
880 MDNode *MIB = dyn_cast<MDNode>(Val: MIBOp);
881 // Stack is first operand of MIB.
882 MDNode *StackMD = getMIBStackNode(MIB);
883 assert(StackMD);
884 // See if the new cloned callsite context matches this profiled context.
885 if (haveCommonPrefix(MIBStackContext: StackMD, CallsiteStackContext: ClonedCallsiteMD))
886 // Add it to the cloned call's MIB list.
887 NewMIBList.push_back(x: MIB);
888 }
889 if (NewMIBList.empty()) {
890 removeMemProfMetadata(Call: ClonedCall);
891 removeCallsiteMetadata(Call: ClonedCall);
892 return;
893 }
894 if (NewMIBList.size() < OrigMemProfMD->getNumOperands())
895 updateMemprofMetadata(CI: ClonedCall, MIBList: NewMIBList, ORE);
896}
897
898// Update memprof related metadata (!memprof and !callsite) based on the
899// inlining of Callee into the callsite at CB. The updates include merging the
900// inlined callee's callsite metadata with that of the inlined call,
901// and moving the subset of any memprof contexts to the inlined callee
902// allocations if they match the new inlined call stack.
903static void
904propagateMemProfMetadata(Function *Callee, CallBase &CB,
905 bool ContainsMemProfMetadata,
906 const ValueMap<const Value *, WeakTrackingVH> &VMap,
907 OptimizationRemarkEmitter *ORE) {
908 MDNode *CallsiteMD = CB.getMetadata(KindID: LLVMContext::MD_callsite);
909 // Only need to update if the inlined callsite had callsite metadata, or if
910 // there was any memprof metadata inlined.
911 if (!CallsiteMD && !ContainsMemProfMetadata)
912 return;
913
914 // Propagate metadata onto the cloned calls in the inlined callee.
915 for (const auto &Entry : VMap) {
916 // See if this is a call that has been inlined and remapped, and not
917 // simplified away in the process.
918 auto *OrigCall = dyn_cast_or_null<CallBase>(Val: Entry.first);
919 auto *ClonedCall = dyn_cast_or_null<CallBase>(Val: Entry.second);
920 if (!OrigCall || !ClonedCall)
921 continue;
922 // If the inlined callsite did not have any callsite metadata, then it isn't
923 // involved in any profiled call contexts, and we can remove any memprof
924 // metadata on the cloned call.
925 if (!CallsiteMD) {
926 removeMemProfMetadata(Call: ClonedCall);
927 removeCallsiteMetadata(Call: ClonedCall);
928 continue;
929 }
930 propagateMemProfHelper(OrigCall, ClonedCall, InlinedCallsiteMD: CallsiteMD, ORE);
931 }
932}
933
934/// When inlining a call site that has !llvm.mem.parallel_loop_access,
935/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
936/// be propagated to all memory-accessing cloned instructions.
937static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
938 Function::iterator FEnd) {
939 MDNode *MemParallelLoopAccess =
940 CB.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
941 MDNode *AccessGroup = CB.getMetadata(KindID: LLVMContext::MD_access_group);
942 MDNode *AliasScope = CB.getMetadata(KindID: LLVMContext::MD_alias_scope);
943 MDNode *NoAlias = CB.getMetadata(KindID: LLVMContext::MD_noalias);
944 if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
945 return;
946
947 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
948 for (Instruction &I : BB) {
949 // This metadata is only relevant for instructions that access memory.
950 if (!I.mayReadOrWriteMemory())
951 continue;
952
953 if (MemParallelLoopAccess) {
954 // TODO: This probably should not overwrite MemParalleLoopAccess.
955 MemParallelLoopAccess = MDNode::concatenate(
956 A: I.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access),
957 B: MemParallelLoopAccess);
958 I.setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access,
959 Node: MemParallelLoopAccess);
960 }
961
962 if (AccessGroup)
963 I.setMetadata(KindID: LLVMContext::MD_access_group, Node: uniteAccessGroups(
964 AccGroups1: I.getMetadata(KindID: LLVMContext::MD_access_group), AccGroups2: AccessGroup));
965
966 if (AliasScope)
967 I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MDNode::concatenate(
968 A: I.getMetadata(KindID: LLVMContext::MD_alias_scope), B: AliasScope));
969
970 if (NoAlias)
971 I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::concatenate(
972 A: I.getMetadata(KindID: LLVMContext::MD_noalias), B: NoAlias));
973 }
974 }
975}
976
977/// Bundle operands of the inlined function must be added to inlined call sites.
978static void PropagateOperandBundles(Function::iterator InlinedBB,
979 Instruction *CallSiteEHPad) {
980 for (Instruction &II : llvm::make_early_inc_range(Range&: *InlinedBB)) {
981 CallBase *I = dyn_cast<CallBase>(Val: &II);
982 if (!I)
983 continue;
984 // Skip call sites which already have a "funclet" bundle.
985 if (I->getOperandBundle(ID: LLVMContext::OB_funclet))
986 continue;
987 // Skip call sites which are nounwind intrinsics (as long as they don't
988 // lower into regular function calls in the course of IR transformations).
989 auto *CalledFn =
990 dyn_cast<Function>(Val: I->getCalledOperand()->stripPointerCasts());
991 if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() &&
992 !IntrinsicInst::mayLowerToFunctionCall(IID: CalledFn->getIntrinsicID()))
993 continue;
994
995 SmallVector<OperandBundleDef, 1> OpBundles;
996 I->getOperandBundlesAsDefs(Defs&: OpBundles);
997 OpBundles.emplace_back(Args: "funclet", Args&: CallSiteEHPad);
998
999 Instruction *NewInst = CallBase::Create(CB: I, Bundles: OpBundles, InsertPt: I->getIterator());
1000 NewInst->takeName(V: I);
1001 I->replaceAllUsesWith(V: NewInst);
1002 I->eraseFromParent();
1003 }
1004}
1005
1006namespace {
1007/// Utility for cloning !noalias and !alias.scope metadata. When a code region
1008/// using scoped alias metadata is inlined, the aliasing relationships may not
1009/// hold between the two version. It is necessary to create a deep clone of the
1010/// metadata, putting the two versions in separate scope domains.
1011class ScopedAliasMetadataDeepCloner {
1012 using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;
1013 SetVector<const MDNode *> MD;
1014 MetadataMap MDMap;
1015 void addRecursiveMetadataUses();
1016
1017public:
1018 ScopedAliasMetadataDeepCloner(const Function *F);
1019
1020 /// Create a new clone of the scoped alias metadata, which will be used by
1021 /// subsequent remap() calls.
1022 void clone();
1023
1024 /// Remap instructions in the given range from the original to the cloned
1025 /// metadata.
1026 void remap(Function::iterator FStart, Function::iterator FEnd);
1027};
1028} // namespace
1029
1030ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
1031 const Function *F) {
1032 for (const BasicBlock &BB : *F) {
1033 for (const Instruction &I : BB) {
1034 if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1035 MD.insert(X: M);
1036 if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1037 MD.insert(X: M);
1038
1039 // We also need to clone the metadata in noalias intrinsics.
1040 if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1041 MD.insert(X: Decl->getScopeList());
1042 }
1043 }
1044 addRecursiveMetadataUses();
1045}
1046
1047void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {
1048 SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
1049 while (!Queue.empty()) {
1050 const MDNode *M = cast<MDNode>(Val: Queue.pop_back_val());
1051 for (const Metadata *Op : M->operands())
1052 if (const MDNode *OpMD = dyn_cast<MDNode>(Val: Op))
1053 if (MD.insert(X: OpMD))
1054 Queue.push_back(Elt: OpMD);
1055 }
1056}
1057
1058void ScopedAliasMetadataDeepCloner::clone() {
1059 assert(MDMap.empty() && "clone() already called ?");
1060
1061 SmallVector<TempMDTuple, 16> DummyNodes;
1062 for (const MDNode *I : MD) {
1063 DummyNodes.push_back(Elt: MDTuple::getTemporary(Context&: I->getContext(), MDs: {}));
1064 MDMap[I].reset(MD: DummyNodes.back().get());
1065 }
1066
1067 // Create new metadata nodes to replace the dummy nodes, replacing old
1068 // metadata references with either a dummy node or an already-created new
1069 // node.
1070 SmallVector<Metadata *, 4> NewOps;
1071 for (const MDNode *I : MD) {
1072 for (const Metadata *Op : I->operands()) {
1073 if (const MDNode *M = dyn_cast<MDNode>(Val: Op))
1074 NewOps.push_back(Elt: MDMap[M]);
1075 else
1076 NewOps.push_back(Elt: const_cast<Metadata *>(Op));
1077 }
1078
1079 MDNode *NewM = MDNode::get(Context&: I->getContext(), MDs: NewOps);
1080 MDTuple *TempM = cast<MDTuple>(Val&: MDMap[I]);
1081 assert(TempM->isTemporary() && "Expected temporary node");
1082
1083 TempM->replaceAllUsesWith(MD: NewM);
1084 NewOps.clear();
1085 }
1086}
1087
1088void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
1089 Function::iterator FEnd) {
1090 if (MDMap.empty())
1091 return; // Nothing to do.
1092
1093 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
1094 for (Instruction &I : BB) {
1095 // TODO: The null checks for the MDMap.lookup() results should no longer
1096 // be necessary.
1097 if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1098 if (MDNode *MNew = MDMap.lookup(Val: M))
1099 I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MNew);
1100
1101 if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1102 if (MDNode *MNew = MDMap.lookup(Val: M))
1103 I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MNew);
1104
1105 if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1106 if (MDNode *MNew = MDMap.lookup(Val: Decl->getScopeList()))
1107 Decl->setScopeList(MNew);
1108 }
1109 }
1110}
1111
1112/// If the inlined function has noalias arguments,
1113/// then add new alias scopes for each noalias argument, tag the mapped noalias
1114/// parameters with noalias metadata specifying the new scope, and tag all
1115/// non-derived loads, stores and memory intrinsics with the new alias scopes.
1116static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
1117 const DataLayout &DL, AAResults *CalleeAAR,
1118 ClonedCodeInfo &InlinedFunctionInfo) {
1119 if (!EnableNoAliasConversion)
1120 return;
1121
1122 const Function *CalledFunc = CB.getCalledFunction();
1123 SmallVector<const Argument *, 4> NoAliasArgs;
1124
1125 for (const Argument &Arg : CalledFunc->args())
1126 if (CB.paramHasAttr(ArgNo: Arg.getArgNo(), Kind: Attribute::NoAlias) && !Arg.use_empty())
1127 NoAliasArgs.push_back(Elt: &Arg);
1128
1129 if (NoAliasArgs.empty())
1130 return;
1131
1132 // To do a good job, if a noalias variable is captured, we need to know if
1133 // the capture point dominates the particular use we're considering.
1134 DominatorTree DT;
1135 DT.recalculate(Func&: const_cast<Function&>(*CalledFunc));
1136
1137 // noalias indicates that pointer values based on the argument do not alias
1138 // pointer values which are not based on it. So we add a new "scope" for each
1139 // noalias function argument. Accesses using pointers based on that argument
1140 // become part of that alias scope, accesses using pointers not based on that
1141 // argument are tagged as noalias with that scope.
1142
1143 DenseMap<const Argument *, MDNode *> NewScopes;
1144 MDBuilder MDB(CalledFunc->getContext());
1145
1146 // Create a new scope domain for this function.
1147 MDNode *NewDomain =
1148 MDB.createAnonymousAliasScopeDomain(Name: CalledFunc->getName());
1149 for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
1150 const Argument *A = NoAliasArgs[i];
1151
1152 std::string Name = std::string(CalledFunc->getName());
1153 if (A->hasName()) {
1154 Name += ": %";
1155 Name += A->getName();
1156 } else {
1157 Name += ": argument ";
1158 Name += utostr(X: i);
1159 }
1160
1161 // Note: We always create a new anonymous root here. This is true regardless
1162 // of the linkage of the callee because the aliasing "scope" is not just a
1163 // property of the callee, but also all control dependencies in the caller.
1164 MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name);
1165 NewScopes.insert(KV: std::make_pair(x&: A, y&: NewScope));
1166
1167 if (UseNoAliasIntrinsic) {
1168 // Introduce a llvm.experimental.noalias.scope.decl for the noalias
1169 // argument.
1170 MDNode *AScopeList = MDNode::get(Context&: CalledFunc->getContext(), MDs: NewScope);
1171 auto *NoAliasDecl =
1172 IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(ScopeTag: AScopeList);
1173 // Ignore the result for now. The result will be used when the
1174 // llvm.noalias intrinsic is introduced.
1175 (void)NoAliasDecl;
1176 }
1177 }
1178
1179 // Iterate over all new instructions in the map; for all memory-access
1180 // instructions, add the alias scope metadata.
1181 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
1182 VMI != VMIE; ++VMI) {
1183 if (const Instruction *I = dyn_cast<Instruction>(Val: VMI->first)) {
1184 if (!VMI->second)
1185 continue;
1186
1187 Instruction *NI = dyn_cast<Instruction>(Val&: VMI->second);
1188 if (!NI || InlinedFunctionInfo.isSimplified(From: I, To: NI))
1189 continue;
1190
1191 bool IsArgMemOnlyCall = false, IsFuncCall = false;
1192 SmallVector<const Value *, 2> PtrArgs;
1193
1194 if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I))
1195 PtrArgs.push_back(Elt: LI->getPointerOperand());
1196 else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1197 PtrArgs.push_back(Elt: SI->getPointerOperand());
1198 else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(Val: I))
1199 PtrArgs.push_back(Elt: VAAI->getPointerOperand());
1200 else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Val: I))
1201 PtrArgs.push_back(Elt: CXI->getPointerOperand());
1202 else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Val: I))
1203 PtrArgs.push_back(Elt: RMWI->getPointerOperand());
1204 else if (const auto *Call = dyn_cast<CallBase>(Val: I)) {
1205 // If we know that the call does not access memory, then we'll still
1206 // know that about the inlined clone of this call site, and we don't
1207 // need to add metadata.
1208 if (Call->doesNotAccessMemory())
1209 continue;
1210
1211 IsFuncCall = true;
1212 if (CalleeAAR) {
1213 MemoryEffects ME = CalleeAAR->getMemoryEffects(Call);
1214
1215 // We'll retain this knowledge without additional metadata.
1216 if (ME.onlyAccessesInaccessibleMem())
1217 continue;
1218
1219 if (ME.onlyAccessesArgPointees())
1220 IsArgMemOnlyCall = true;
1221 }
1222
1223 for (Value *Arg : Call->args()) {
1224 // Only care about pointer arguments. If a noalias argument is
1225 // accessed through a non-pointer argument, it must be captured
1226 // first (e.g. via ptrtoint), and we protect against captures below.
1227 if (!Arg->getType()->isPointerTy())
1228 continue;
1229
1230 PtrArgs.push_back(Elt: Arg);
1231 }
1232 }
1233
1234 // If we found no pointers, then this instruction is not suitable for
1235 // pairing with an instruction to receive aliasing metadata.
1236 // However, if this is a call, this we might just alias with none of the
1237 // noalias arguments.
1238 if (PtrArgs.empty() && !IsFuncCall)
1239 continue;
1240
1241 // It is possible that there is only one underlying object, but you
1242 // need to go through several PHIs to see it, and thus could be
1243 // repeated in the Objects list.
1244 SmallPtrSet<const Value *, 4> ObjSet;
1245 SmallVector<Metadata *, 4> Scopes, NoAliases;
1246
1247 for (const Value *V : PtrArgs) {
1248 SmallVector<const Value *, 4> Objects;
1249 getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
1250
1251 ObjSet.insert_range(R&: Objects);
1252 }
1253
1254 // Figure out if we're derived from anything that is not a noalias
1255 // argument.
1256 bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false,
1257 UsesUnknownObject = false;
1258 for (const Value *V : ObjSet) {
1259 // Is this value a constant that cannot be derived from any pointer
1260 // value (we need to exclude constant expressions, for example, that
1261 // are formed from arithmetic on global symbols).
1262 bool IsNonPtrConst = isa<ConstantInt>(Val: V) || isa<ConstantFP>(Val: V) ||
1263 isa<ConstantPointerNull>(Val: V) ||
1264 isa<ConstantDataVector>(Val: V) || isa<UndefValue>(Val: V);
1265 if (IsNonPtrConst)
1266 continue;
1267
1268 // If this is anything other than a noalias argument, then we cannot
1269 // completely describe the aliasing properties using alias.scope
1270 // metadata (and, thus, won't add any).
1271 if (const Argument *A = dyn_cast<Argument>(Val: V)) {
1272 if (!CB.paramHasAttr(ArgNo: A->getArgNo(), Kind: Attribute::NoAlias))
1273 UsesAliasingPtr = true;
1274 } else {
1275 UsesAliasingPtr = true;
1276 }
1277
1278 if (isEscapeSource(V)) {
1279 // An escape source can only alias with a noalias argument if it has
1280 // been captured beforehand.
1281 RequiresNoCaptureBefore = true;
1282 } else if (!isa<Argument>(Val: V) && !isIdentifiedObject(V)) {
1283 // If this is neither an escape source, nor some identified object
1284 // (which cannot directly alias a noalias argument), nor some other
1285 // argument (which, by definition, also cannot alias a noalias
1286 // argument), conservatively do not make any assumptions.
1287 UsesUnknownObject = true;
1288 }
1289 }
1290
1291 // Nothing we can do if the used underlying object cannot be reliably
1292 // determined.
1293 if (UsesUnknownObject)
1294 continue;
1295
1296 // A function call can always get captured noalias pointers (via other
1297 // parameters, globals, etc.).
1298 if (IsFuncCall && !IsArgMemOnlyCall)
1299 RequiresNoCaptureBefore = true;
1300
1301 // First, we want to figure out all of the sets with which we definitely
1302 // don't alias. Iterate over all noalias set, and add those for which:
1303 // 1. The noalias argument is not in the set of objects from which we
1304 // definitely derive.
1305 // 2. The noalias argument has not yet been captured.
1306 // An arbitrary function that might load pointers could see captured
1307 // noalias arguments via other noalias arguments or globals, and so we
1308 // must always check for prior capture.
1309 for (const Argument *A : NoAliasArgs) {
1310 if (ObjSet.contains(Ptr: A))
1311 continue; // May be based on a noalias argument.
1312
1313 // It might be tempting to skip the PointerMayBeCapturedBefore check if
1314 // A->hasNoCaptureAttr() is true, but this is incorrect because
1315 // nocapture only guarantees that no copies outlive the function, not
1316 // that the value cannot be locally captured.
1317 if (!RequiresNoCaptureBefore ||
1318 !capturesAnything(CC: PointerMayBeCapturedBefore(
1319 V: A, /*ReturnCaptures=*/false, I, DT: &DT, /*IncludeI=*/false,
1320 Mask: CaptureComponents::Provenance)))
1321 NoAliases.push_back(Elt: NewScopes[A]);
1322 }
1323
1324 if (!NoAliases.empty())
1325 NI->setMetadata(KindID: LLVMContext::MD_noalias,
1326 Node: MDNode::concatenate(
1327 A: NI->getMetadata(KindID: LLVMContext::MD_noalias),
1328 B: MDNode::get(Context&: CalledFunc->getContext(), MDs: NoAliases)));
1329
1330 // Next, we want to figure out all of the sets to which we might belong.
1331 // We might belong to a set if the noalias argument is in the set of
1332 // underlying objects. If there is some non-noalias argument in our list
1333 // of underlying objects, then we cannot add a scope because the fact
1334 // that some access does not alias with any set of our noalias arguments
1335 // cannot itself guarantee that it does not alias with this access
1336 // (because there is some pointer of unknown origin involved and the
1337 // other access might also depend on this pointer). We also cannot add
1338 // scopes to arbitrary functions unless we know they don't access any
1339 // non-parameter pointer-values.
1340 bool CanAddScopes = !UsesAliasingPtr;
1341 if (CanAddScopes && IsFuncCall)
1342 CanAddScopes = IsArgMemOnlyCall;
1343
1344 if (CanAddScopes)
1345 for (const Argument *A : NoAliasArgs) {
1346 if (ObjSet.count(Ptr: A))
1347 Scopes.push_back(Elt: NewScopes[A]);
1348 }
1349
1350 if (!Scopes.empty())
1351 NI->setMetadata(
1352 KindID: LLVMContext::MD_alias_scope,
1353 Node: MDNode::concatenate(A: NI->getMetadata(KindID: LLVMContext::MD_alias_scope),
1354 B: MDNode::get(Context&: CalledFunc->getContext(), MDs: Scopes)));
1355 }
1356 }
1357}
1358
1359static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
1360 ReturnInst *End) {
1361
1362 assert(Begin->getParent() == End->getParent() &&
1363 "Expected to be in same basic block!");
1364 auto BeginIt = Begin->getIterator();
1365 assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator");
1366 return !llvm::isGuaranteedToTransferExecutionToSuccessor(
1367 Begin: ++BeginIt, End: End->getIterator(), ScanLimit: InlinerAttributeWindow + 1);
1368}
1369
1370// Add attributes from CB params and Fn attributes that can always be propagated
1371// to the corresponding argument / inner callbases.
1372static void AddParamAndFnBasicAttributes(const CallBase &CB,
1373 ValueToValueMapTy &VMap,
1374 ClonedCodeInfo &InlinedFunctionInfo) {
1375 auto *CalledFunction = CB.getCalledFunction();
1376 auto &Context = CalledFunction->getContext();
1377
1378 // Collect valid attributes for all params.
1379 SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
1380 bool HasAttrToPropagate = false;
1381
1382 // Attributes we can only propagate if the exact parameter is forwarded.
1383 // We can propagate both poison generating and UB generating attributes
1384 // without any extra checks. The only attribute that is tricky to propagate
1385 // is `noundef` (skipped for now) as that can create new UB where previous
1386 // behavior was just using a poison value.
1387 static const Attribute::AttrKind ExactAttrsToPropagate[] = {
1388 Attribute::Dereferenceable, Attribute::DereferenceableOrNull,
1389 Attribute::NonNull, Attribute::NoFPClass,
1390 Attribute::Alignment, Attribute::Range};
1391
1392 for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
1393 ValidObjParamAttrs.emplace_back(Args: AttrBuilder{CB.getContext()});
1394 ValidExactParamAttrs.emplace_back(Args: AttrBuilder{CB.getContext()});
1395 // Access attributes can be propagated to any param with the same underlying
1396 // object as the argument.
1397 if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadNone))
1398 ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadNone);
1399 if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadOnly))
1400 ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadOnly);
1401
1402 for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
1403 Attribute Attr = CB.getParamAttr(ArgNo: I, Kind: AK);
1404 if (Attr.isValid())
1405 ValidExactParamAttrs.back().addAttribute(A: Attr);
1406 }
1407
1408 HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes();
1409 HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes();
1410 }
1411
1412 // Won't be able to propagate anything.
1413 if (!HasAttrToPropagate)
1414 return;
1415
1416 for (BasicBlock &BB : *CalledFunction) {
1417 for (Instruction &Ins : BB) {
1418 const auto *InnerCB = dyn_cast<CallBase>(Val: &Ins);
1419 if (!InnerCB)
1420 continue;
1421 auto *NewInnerCB = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: InnerCB));
1422 if (!NewInnerCB)
1423 continue;
1424 // The InnerCB might have be simplified during the inlining
1425 // process which can make propagation incorrect.
1426 if (InlinedFunctionInfo.isSimplified(From: InnerCB, To: NewInnerCB))
1427 continue;
1428
1429 AttributeList AL = NewInnerCB->getAttributes();
1430 for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
1431 // It's unsound or requires special handling to propagate
1432 // attributes to byval arguments. Even if CalledFunction
1433 // doesn't e.g. write to the argument (readonly), the call to
1434 // NewInnerCB may write to its by-value copy.
1435 if (NewInnerCB->paramHasAttr(ArgNo: I, Kind: Attribute::ByVal))
1436 continue;
1437
1438 // Don't bother propagating attrs to constants.
1439 if (match(V: NewInnerCB->getArgOperand(i: I),
1440 P: llvm::PatternMatch::m_ImmConstant()))
1441 continue;
1442
1443 // Check if the underlying value for the parameter is an argument.
1444 const Argument *Arg = dyn_cast<Argument>(Val: InnerCB->getArgOperand(i: I));
1445 unsigned ArgNo;
1446 if (Arg) {
1447 ArgNo = Arg->getArgNo();
1448 // For dereferenceable, dereferenceable_or_null, align, etc...
1449 // we don't want to propagate if the existing param has the same
1450 // attribute with "better" constraints. So remove from the
1451 // new AL if the region of the existing param is larger than
1452 // what we can propagate.
1453 AttrBuilder NewAB{
1454 Context, AttributeSet::get(C&: Context, B: ValidExactParamAttrs[ArgNo])};
1455 if (AL.getParamDereferenceableBytes(Index: I) >
1456 NewAB.getDereferenceableBytes())
1457 NewAB.removeAttribute(Val: Attribute::Dereferenceable);
1458 if (AL.getParamDereferenceableOrNullBytes(ArgNo: I) >
1459 NewAB.getDereferenceableOrNullBytes())
1460 NewAB.removeAttribute(Val: Attribute::DereferenceableOrNull);
1461 if (AL.getParamAlignment(ArgNo: I).valueOrOne() >
1462 NewAB.getAlignment().valueOrOne())
1463 NewAB.removeAttribute(Val: Attribute::Alignment);
1464 if (auto ExistingRange = AL.getParamRange(ArgNo: I)) {
1465 if (auto NewRange = NewAB.getRange()) {
1466 ConstantRange CombinedRange =
1467 ExistingRange->intersectWith(CR: *NewRange);
1468 NewAB.removeAttribute(Val: Attribute::Range);
1469 NewAB.addRangeAttr(CR: CombinedRange);
1470 }
1471 }
1472
1473 if (FPClassTest ExistingNoFP = AL.getParamNoFPClass(ArgNo: I))
1474 NewAB.addNoFPClassAttr(NoFPClassMask: ExistingNoFP | NewAB.getNoFPClass());
1475
1476 AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: NewAB);
1477 } else if (NewInnerCB->getArgOperand(i: I)->getType()->isPointerTy()) {
1478 // Check if the underlying value for the parameter is an argument.
1479 const Value *UnderlyingV =
1480 getUnderlyingObject(V: InnerCB->getArgOperand(i: I));
1481 Arg = dyn_cast<Argument>(Val: UnderlyingV);
1482 if (!Arg)
1483 continue;
1484 ArgNo = Arg->getArgNo();
1485 } else {
1486 continue;
1487 }
1488
1489 // If so, propagate its access attributes.
1490 AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: ValidObjParamAttrs[ArgNo]);
1491
1492 // We can have conflicting attributes from the inner callsite and
1493 // to-be-inlined callsite. In that case, choose the most
1494 // restrictive.
1495
1496 // readonly + writeonly means we can never deref so make readnone.
1497 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) &&
1498 AL.hasParamAttr(ArgNo: I, Kind: Attribute::WriteOnly))
1499 AL = AL.addParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadNone);
1500
1501 // If have readnone, need to clear readonly/writeonly
1502 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone)) {
1503 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadOnly);
1504 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::WriteOnly);
1505 }
1506
1507 // Writable cannot exist in conjunction w/ readonly/readnone
1508 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) ||
1509 AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone))
1510 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::Writable);
1511 }
1512 NewInnerCB->setAttributes(AL);
1513 }
1514 }
1515}
1516
1517// Only allow these white listed attributes to be propagated back to the
1518// callee. This is because other attributes may only be valid on the call
1519// itself, i.e. attributes such as signext and zeroext.
1520
1521// Attributes that are always okay to propagate as if they are violated its
1522// immediate UB.
1523static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) {
1524 AttrBuilder Valid(CB.getContext());
1525 if (auto DerefBytes = CB.getRetDereferenceableBytes())
1526 Valid.addDereferenceableAttr(Bytes: DerefBytes);
1527 if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes())
1528 Valid.addDereferenceableOrNullAttr(Bytes: DerefOrNullBytes);
1529 if (CB.hasRetAttr(Kind: Attribute::NoAlias))
1530 Valid.addAttribute(Val: Attribute::NoAlias);
1531 if (CB.hasRetAttr(Kind: Attribute::NoUndef))
1532 Valid.addAttribute(Val: Attribute::NoUndef);
1533 return Valid;
1534}
1535
1536// Attributes that need additional checks as propagating them may change
1537// behavior or cause new UB.
1538static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {
1539 AttrBuilder Valid(CB.getContext());
1540 if (CB.hasRetAttr(Kind: Attribute::NonNull))
1541 Valid.addAttribute(Val: Attribute::NonNull);
1542 if (CB.hasRetAttr(Kind: Attribute::Alignment))
1543 Valid.addAlignmentAttr(Align: CB.getRetAlign());
1544 if (std::optional<ConstantRange> Range = CB.getRange())
1545 Valid.addRangeAttr(CR: *Range);
1546 return Valid;
1547}
1548
1549static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap,
1550 ClonedCodeInfo &InlinedFunctionInfo) {
1551 AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);
1552 AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
1553 if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes())
1554 return;
1555 auto *CalledFunction = CB.getCalledFunction();
1556 auto &Context = CalledFunction->getContext();
1557
1558 for (auto &BB : *CalledFunction) {
1559 auto *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator());
1560 if (!RI || !isa<CallBase>(Val: RI->getOperand(i_nocapture: 0)))
1561 continue;
1562 auto *RetVal = cast<CallBase>(Val: RI->getOperand(i_nocapture: 0));
1563 // Check that the cloned RetVal exists and is a call, otherwise we cannot
1564 // add the attributes on the cloned RetVal. Simplification during inlining
1565 // could have transformed the cloned instruction.
1566 auto *NewRetVal = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: RetVal));
1567 if (!NewRetVal)
1568 continue;
1569
1570 // The RetVal might have be simplified during the inlining
1571 // process which can make propagation incorrect.
1572 if (InlinedFunctionInfo.isSimplified(From: RetVal, To: NewRetVal))
1573 continue;
1574 // Backward propagation of attributes to the returned value may be incorrect
1575 // if it is control flow dependent.
1576 // Consider:
1577 // @callee {
1578 // %rv = call @foo()
1579 // %rv2 = call @bar()
1580 // if (%rv2 != null)
1581 // return %rv2
1582 // if (%rv == null)
1583 // exit()
1584 // return %rv
1585 // }
1586 // caller() {
1587 // %val = call nonnull @callee()
1588 // }
1589 // Here we cannot add the nonnull attribute on either foo or bar. So, we
1590 // limit the check to both RetVal and RI are in the same basic block and
1591 // there are no throwing/exiting instructions between these instructions.
1592 if (RI->getParent() != RetVal->getParent() ||
1593 MayContainThrowingOrExitingCallAfterCB(Begin: RetVal, End: RI))
1594 continue;
1595 // Add to the existing attributes of NewRetVal, i.e. the cloned call
1596 // instruction.
1597 // NB! When we have the same attribute already existing on NewRetVal, but
1598 // with a differing value, the AttributeList's merge API honours the already
1599 // existing attribute value (i.e. attributes such as dereferenceable,
1600 // dereferenceable_or_null etc). See AttrBuilder::merge for more details.
1601 AttributeList AL = NewRetVal->getAttributes();
1602 if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes())
1603 ValidUB.removeAttribute(Val: Attribute::Dereferenceable);
1604 if (ValidUB.getDereferenceableOrNullBytes() <
1605 AL.getRetDereferenceableOrNullBytes())
1606 ValidUB.removeAttribute(Val: Attribute::DereferenceableOrNull);
1607 AttributeList NewAL = AL.addRetAttributes(C&: Context, B: ValidUB);
1608 // Attributes that may generate poison returns are a bit tricky. If we
1609 // propagate them, other uses of the callsite might have their behavior
1610 // change or cause UB (if they have noundef) b.c of the new potential
1611 // poison.
1612 // Take the following three cases:
1613 //
1614 // 1)
1615 // define nonnull ptr @foo() {
1616 // %p = call ptr @bar()
1617 // call void @use(ptr %p) willreturn nounwind
1618 // ret ptr %p
1619 // }
1620 //
1621 // 2)
1622 // define noundef nonnull ptr @foo() {
1623 // %p = call ptr @bar()
1624 // call void @use(ptr %p) willreturn nounwind
1625 // ret ptr %p
1626 // }
1627 //
1628 // 3)
1629 // define nonnull ptr @foo() {
1630 // %p = call noundef ptr @bar()
1631 // ret ptr %p
1632 // }
1633 //
1634 // In case 1, we can't propagate nonnull because poison value in @use may
1635 // change behavior or trigger UB.
1636 // In case 2, we don't need to be concerned about propagating nonnull, as
1637 // any new poison at @use will trigger UB anyways.
1638 // In case 3, we can never propagate nonnull because it may create UB due to
1639 // the noundef on @bar.
1640 if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne())
1641 ValidPG.removeAttribute(Val: Attribute::Alignment);
1642 if (ValidPG.hasAttributes()) {
1643 Attribute CBRange = ValidPG.getAttribute(Kind: Attribute::Range);
1644 if (CBRange.isValid()) {
1645 Attribute NewRange = AL.getRetAttr(Kind: Attribute::Range);
1646 if (NewRange.isValid()) {
1647 ValidPG.addRangeAttr(
1648 CR: CBRange.getRange().intersectWith(CR: NewRange.getRange()));
1649 }
1650 }
1651 // Three checks.
1652 // If the callsite has `noundef`, then a poison due to violating the
1653 // return attribute will create UB anyways so we can always propagate.
1654 // Otherwise, if the return value (callee to be inlined) has `noundef`, we
1655 // can't propagate as a new poison return will cause UB.
1656 // Finally, check if the return value has no uses whose behavior may
1657 // change/may cause UB if we potentially return poison. At the moment this
1658 // is implemented overly conservatively with a single-use check.
1659 // TODO: Update the single-use check to iterate through uses and only bail
1660 // if we have a potentially dangerous use.
1661
1662 if (CB.hasRetAttr(Kind: Attribute::NoUndef) ||
1663 (RetVal->hasOneUse() && !RetVal->hasRetAttr(Kind: Attribute::NoUndef)))
1664 NewAL = NewAL.addRetAttributes(C&: Context, B: ValidPG);
1665 }
1666 NewRetVal->setAttributes(NewAL);
1667 }
1668}
1669
1670/// If the inlined function has non-byval align arguments, then
1671/// add @llvm.assume-based alignment assumptions to preserve this information.
1672static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
1673 if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
1674 return;
1675
1676 AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller());
1677 auto &DL = CB.getDataLayout();
1678
1679 // To avoid inserting redundant assumptions, we should check for assumptions
1680 // already in the caller. To do this, we might need a DT of the caller.
1681 DominatorTree DT;
1682 bool DTCalculated = false;
1683
1684 Function *CalledFunc = CB.getCalledFunction();
1685 for (Argument &Arg : CalledFunc->args()) {
1686 if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() ||
1687 Arg.use_empty())
1688 continue;
1689 MaybeAlign Alignment = Arg.getParamAlign();
1690 if (!Alignment)
1691 continue;
1692
1693 if (!DTCalculated) {
1694 DT.recalculate(Func&: *CB.getCaller());
1695 DTCalculated = true;
1696 }
1697 // If we can already prove the asserted alignment in the context of the
1698 // caller, then don't bother inserting the assumption.
1699 Value *ArgVal = CB.getArgOperand(i: Arg.getArgNo());
1700 if (getKnownAlignment(V: ArgVal, DL, CxtI: &CB, AC, DT: &DT) >= *Alignment)
1701 continue;
1702
1703 CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption(
1704 DL, PtrValue: ArgVal, Alignment: Alignment->value());
1705 AC->registerAssumption(CI: cast<AssumeInst>(Val: NewAsmp));
1706 }
1707}
1708
1709static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
1710 MaybeAlign SrcAlign, Module *M,
1711 BasicBlock *InsertBlock,
1712 InlineFunctionInfo &IFI,
1713 Function *CalledFunc) {
1714 IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
1715
1716 Value *Size =
1717 Builder.getInt64(C: M->getDataLayout().getTypeStoreSize(Ty: ByValType));
1718
1719 Align DstAlign = Dst->getPointerAlignment(DL: M->getDataLayout());
1720
1721 // Generate a memcpy with the correct alignments.
1722 CallInst *CI = Builder.CreateMemCpy(Dst, DstAlign, Src, SrcAlign, Size);
1723
1724 // The verifier requires that all calls of debug-info-bearing functions
1725 // from debug-info-bearing functions have a debug location (for inlining
1726 // purposes). Assign a dummy location to satisfy the constraint.
1727 if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram())
1728 if (DISubprogram *SP = CalledFunc->getSubprogram())
1729 CI->setDebugLoc(DILocation::get(Context&: SP->getContext(), Line: 0, Column: 0, Scope: SP));
1730}
1731
1732/// When inlining a call site that has a byval argument,
1733/// we have to make the implicit memcpy explicit by adding it.
1734static Value *HandleByValArgument(Type *ByValType, Value *Arg,
1735 Instruction *TheCall,
1736 const Function *CalledFunc,
1737 InlineFunctionInfo &IFI,
1738 MaybeAlign ByValAlignment) {
1739 Function *Caller = TheCall->getFunction();
1740 const DataLayout &DL = Caller->getDataLayout();
1741
1742 // If the called function is readonly, then it could not mutate the caller's
1743 // copy of the byval'd memory. In this case, it is safe to elide the copy and
1744 // temporary.
1745 if (CalledFunc->onlyReadsMemory()) {
1746 // If the byval argument has a specified alignment that is greater than the
1747 // passed in pointer, then we either have to round up the input pointer or
1748 // give up on this transformation.
1749 if (ByValAlignment.valueOrOne() == 1)
1750 return Arg;
1751
1752 AssumptionCache *AC =
1753 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
1754
1755 // If the pointer is already known to be sufficiently aligned, or if we can
1756 // round it up to a larger alignment, then we don't need a temporary.
1757 if (getOrEnforceKnownAlignment(V: Arg, PrefAlign: *ByValAlignment, DL, CxtI: TheCall, AC) >=
1758 *ByValAlignment)
1759 return Arg;
1760
1761 // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
1762 // for code quality, but rarely happens and is required for correctness.
1763 }
1764
1765 // Create the alloca. If we have DataLayout, use nice alignment.
1766 Align Alignment = DL.getPrefTypeAlign(Ty: ByValType);
1767
1768 // If the byval had an alignment specified, we *must* use at least that
1769 // alignment, as it is required by the byval argument (and uses of the
1770 // pointer inside the callee).
1771 if (ByValAlignment)
1772 Alignment = std::max(a: Alignment, b: *ByValAlignment);
1773
1774 AllocaInst *NewAlloca =
1775 new AllocaInst(ByValType, Arg->getType()->getPointerAddressSpace(),
1776 nullptr, Alignment, Arg->getName());
1777 NewAlloca->setDebugLoc(DebugLoc::getCompilerGenerated());
1778 NewAlloca->insertBefore(InsertPos: Caller->begin()->begin());
1779 IFI.StaticAllocas.push_back(Elt: NewAlloca);
1780
1781 // Uses of the argument in the function should use our new alloca
1782 // instead.
1783 return NewAlloca;
1784}
1785
1786// Check whether this Value is used by a lifetime intrinsic.
1787static bool isUsedByLifetimeMarker(Value *V) {
1788 for (User *U : V->users())
1789 if (isa<LifetimeIntrinsic>(Val: U))
1790 return true;
1791 return false;
1792}
1793
1794// Check whether the given alloca already has
1795// lifetime.start or lifetime.end intrinsics.
1796static bool hasLifetimeMarkers(AllocaInst *AI) {
1797 Type *Ty = AI->getType();
1798 Type *Int8PtrTy =
1799 PointerType::get(C&: Ty->getContext(), AddressSpace: Ty->getPointerAddressSpace());
1800 if (Ty == Int8PtrTy)
1801 return isUsedByLifetimeMarker(V: AI);
1802
1803 // Do a scan to find all the casts to i8*.
1804 for (User *U : AI->users()) {
1805 if (U->getType() != Int8PtrTy) continue;
1806 if (U->stripPointerCasts() != AI) continue;
1807 if (isUsedByLifetimeMarker(V: U))
1808 return true;
1809 }
1810 return false;
1811}
1812
1813/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
1814/// block. Allocas used in inalloca calls and allocas of dynamic array size
1815/// cannot be static.
1816static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
1817 return isa<Constant>(Val: AI->getArraySize()) && !AI->isUsedWithInAlloca();
1818}
1819
1820/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
1821/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
1822static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
1823 LLVMContext &Ctx,
1824 DenseMap<const MDNode *, MDNode *> &IANodes) {
1825 auto IA = DebugLoc::appendInlinedAt(DL: OrigDL, InlinedAt, Ctx, Cache&: IANodes);
1826 return DILocation::get(Context&: Ctx, Line: OrigDL.getLine(), Column: OrigDL.getCol(),
1827 Scope: OrigDL.getScope(), InlinedAt: IA, ImplicitCode: OrigDL.isImplicitCode(),
1828 AtomGroup: OrigDL->getAtomGroup(), AtomRank: OrigDL->getAtomRank());
1829}
1830
1831/// Update inlined instructions' line numbers to
1832/// to encode location where these instructions are inlined.
1833static void fixupLineNumbers(Function *Fn, Function::iterator FI,
1834 Instruction *TheCall, bool CalleeHasDebugInfo) {
1835 if (!TheCall->getDebugLoc())
1836 return;
1837
1838 // Don't propagate the source location atom from the call to inlined nodebug
1839 // instructions, and avoid putting it in the InlinedAt field of inlined
1840 // not-nodebug instructions. FIXME: Possibly worth transferring/generating
1841 // an atom for the returned value, otherwise we miss stepping on inlined
1842 // nodebug functions (which is different to existing behaviour).
1843 DebugLoc TheCallDL = TheCall->getDebugLoc()->getWithoutAtom();
1844
1845 auto &Ctx = Fn->getContext();
1846 DILocation *InlinedAtNode = TheCallDL;
1847
1848 // Create a unique call site, not to be confused with any other call from the
1849 // same location.
1850 InlinedAtNode = DILocation::getDistinct(
1851 Context&: Ctx, Line: InlinedAtNode->getLine(), Column: InlinedAtNode->getColumn(),
1852 Scope: InlinedAtNode->getScope(), InlinedAt: InlinedAtNode->getInlinedAt());
1853
1854 // Cache the inlined-at nodes as they're built so they are reused, without
1855 // this every instruction's inlined-at chain would become distinct from each
1856 // other.
1857 DenseMap<const MDNode *, MDNode *> IANodes;
1858
1859 // Check if we are not generating inline line tables and want to use
1860 // the call site location instead.
1861 bool NoInlineLineTables = Fn->hasFnAttribute(Kind: "no-inline-line-tables");
1862
1863 // Helper-util for updating the metadata attached to an instruction.
1864 auto UpdateInst = [&](Instruction &I) {
1865 // Loop metadata needs to be updated so that the start and end locs
1866 // reference inlined-at locations.
1867 auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
1868 &IANodes](Metadata *MD) -> Metadata * {
1869 if (auto *Loc = dyn_cast_or_null<DILocation>(Val: MD))
1870 return inlineDebugLoc(OrigDL: Loc, InlinedAt: InlinedAtNode, Ctx, IANodes).get();
1871 return MD;
1872 };
1873 updateLoopMetadataDebugLocations(I, Updater: updateLoopInfoLoc);
1874
1875 if (!NoInlineLineTables)
1876 if (DebugLoc DL = I.getDebugLoc()) {
1877 DebugLoc IDL =
1878 inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode, Ctx&: I.getContext(), IANodes);
1879 I.setDebugLoc(IDL);
1880 return;
1881 }
1882
1883 if (CalleeHasDebugInfo && !NoInlineLineTables)
1884 return;
1885
1886 // If the inlined instruction has no line number, or if inline info
1887 // is not being generated, make it look as if it originates from the call
1888 // location. This is important for ((__always_inline, __nodebug__))
1889 // functions which must use caller location for all instructions in their
1890 // function body.
1891
1892 // Don't update static allocas, as they may get moved later.
1893 if (auto *AI = dyn_cast<AllocaInst>(Val: &I))
1894 if (allocaWouldBeStaticInEntry(AI))
1895 return;
1896
1897 // Do not force a debug loc for pseudo probes, since they do not need to
1898 // be debuggable, and also they are expected to have a zero/null dwarf
1899 // discriminator at this point which could be violated otherwise.
1900 if (isa<PseudoProbeInst>(Val: I))
1901 return;
1902
1903 I.setDebugLoc(TheCallDL);
1904 };
1905
1906 // Helper-util for updating debug-info records attached to instructions.
1907 auto UpdateDVR = [&](DbgRecord *DVR) {
1908 assert(DVR->getDebugLoc() && "Debug Value must have debug loc");
1909 if (NoInlineLineTables) {
1910 DVR->setDebugLoc(TheCallDL);
1911 return;
1912 }
1913 DebugLoc DL = DVR->getDebugLoc();
1914 DebugLoc IDL =
1915 inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode,
1916 Ctx&: DVR->getMarker()->getParent()->getContext(), IANodes);
1917 DVR->setDebugLoc(IDL);
1918 };
1919
1920 // Iterate over all instructions, updating metadata and debug-info records.
1921 for (; FI != Fn->end(); ++FI) {
1922 for (Instruction &I : *FI) {
1923 UpdateInst(I);
1924 for (DbgRecord &DVR : I.getDbgRecordRange()) {
1925 UpdateDVR(&DVR);
1926 }
1927 }
1928
1929 // Remove debug info records if we're not keeping inline info.
1930 if (NoInlineLineTables) {
1931 BasicBlock::iterator BI = FI->begin();
1932 while (BI != FI->end()) {
1933 BI->dropDbgRecords();
1934 ++BI;
1935 }
1936 }
1937 }
1938}
1939
1940#undef DEBUG_TYPE
1941#define DEBUG_TYPE "assignment-tracking"
1942/// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB.
1943static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL,
1944 const CallBase &CB) {
1945 at::StorageToVarsMap EscapedLocals;
1946 SmallPtrSet<const Value *, 4> SeenBases;
1947
1948 LLVM_DEBUG(
1949 errs() << "# Finding caller local variables escaped by callee\n");
1950 for (const Value *Arg : CB.args()) {
1951 LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n");
1952 if (!Arg->getType()->isPointerTy()) {
1953 LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n");
1954 continue;
1955 }
1956
1957 const Instruction *I = dyn_cast<Instruction>(Val: Arg);
1958 if (!I) {
1959 LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n");
1960 continue;
1961 }
1962
1963 // Walk back to the base storage.
1964 assert(Arg->getType()->isPtrOrPtrVectorTy());
1965 APInt TmpOffset(DL.getIndexTypeSizeInBits(Ty: Arg->getType()), 0, false);
1966 const AllocaInst *Base = dyn_cast<AllocaInst>(
1967 Val: Arg->stripAndAccumulateConstantOffsets(DL, Offset&: TmpOffset, AllowNonInbounds: true));
1968 if (!Base) {
1969 LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n");
1970 continue;
1971 }
1972
1973 assert(Base);
1974 LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n");
1975 // We only need to process each base address once - skip any duplicates.
1976 if (!SeenBases.insert(Ptr: Base).second)
1977 continue;
1978
1979 // Find all local variables associated with the backing storage.
1980 auto CollectAssignsForStorage = [&](DbgVariableRecord *DbgAssign) {
1981 // Skip variables from inlined functions - they are not local variables.
1982 if (DbgAssign->getDebugLoc().getInlinedAt())
1983 return;
1984 LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n");
1985 EscapedLocals[Base].insert(X: at::VarRecord(DbgAssign));
1986 };
1987 for_each(Range: at::getDVRAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage);
1988 }
1989 return EscapedLocals;
1990}
1991
1992static void trackInlinedStores(Function::iterator Start, Function::iterator End,
1993 const CallBase &CB) {
1994 LLVM_DEBUG(errs() << "trackInlinedStores into "
1995 << Start->getParent()->getName() << " from "
1996 << CB.getCalledFunction()->getName() << "\n");
1997 const DataLayout &DL = CB.getDataLayout();
1998 at::trackAssignments(Start, End, Vars: collectEscapedLocals(DL, CB), DL);
1999}
2000
2001/// Update inlined instructions' DIAssignID metadata. We need to do this
2002/// otherwise a function inlined more than once into the same function
2003/// will cause DIAssignID to be shared by many instructions.
2004static void fixupAssignments(Function::iterator Start, Function::iterator End) {
2005 DenseMap<DIAssignID *, DIAssignID *> Map;
2006 // Loop over all the inlined instructions. If we find a DIAssignID
2007 // attachment or use, replace it with a new version.
2008 for (auto BBI = Start; BBI != End; ++BBI) {
2009 for (Instruction &I : *BBI)
2010 at::remapAssignID(Map, I);
2011 }
2012}
2013#undef DEBUG_TYPE
2014#define DEBUG_TYPE "inline-function"
2015
2016/// Update the block frequencies of the caller after a callee has been inlined.
2017///
2018/// Each block cloned into the caller has its block frequency scaled by the
2019/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
2020/// callee's entry block gets the same frequency as the callsite block and the
2021/// relative frequencies of all cloned blocks remain the same after cloning.
2022static void updateCallerBFI(BasicBlock *CallSiteBlock,
2023 const ValueToValueMapTy &VMap,
2024 BlockFrequencyInfo *CallerBFI,
2025 BlockFrequencyInfo *CalleeBFI,
2026 const BasicBlock &CalleeEntryBlock) {
2027 SmallPtrSet<BasicBlock *, 16> ClonedBBs;
2028 for (auto Entry : VMap) {
2029 if (!isa<BasicBlock>(Val: Entry.first) || !Entry.second)
2030 continue;
2031 auto *OrigBB = cast<BasicBlock>(Val: Entry.first);
2032 auto *ClonedBB = cast<BasicBlock>(Val: Entry.second);
2033 BlockFrequency Freq = CalleeBFI->getBlockFreq(BB: OrigBB);
2034 if (!ClonedBBs.insert(Ptr: ClonedBB).second) {
2035 // Multiple blocks in the callee might get mapped to one cloned block in
2036 // the caller since we prune the callee as we clone it. When that happens,
2037 // we want to use the maximum among the original blocks' frequencies.
2038 BlockFrequency NewFreq = CallerBFI->getBlockFreq(BB: ClonedBB);
2039 if (NewFreq > Freq)
2040 Freq = NewFreq;
2041 }
2042 CallerBFI->setBlockFreq(BB: ClonedBB, Freq);
2043 }
2044 BasicBlock *EntryClone = cast<BasicBlock>(Val: VMap.lookup(Val: &CalleeEntryBlock));
2045 CallerBFI->setBlockFreqAndScale(
2046 ReferenceBB: EntryClone, Freq: CallerBFI->getBlockFreq(BB: CallSiteBlock), BlocksToScale&: ClonedBBs);
2047}
2048
2049/// Update the branch metadata for cloned call instructions.
2050static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
2051 const ProfileCount &CalleeEntryCount,
2052 const CallBase &TheCall, ProfileSummaryInfo *PSI,
2053 BlockFrequencyInfo *CallerBFI) {
2054 if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1)
2055 return;
2056 auto CallSiteCount =
2057 PSI ? PSI->getProfileCount(CallInst: TheCall, BFI: CallerBFI) : std::nullopt;
2058 int64_t CallCount =
2059 std::min(a: CallSiteCount.value_or(u: 0), b: CalleeEntryCount.getCount());
2060 updateProfileCallee(Callee, EntryDelta: -CallCount, VMap: &VMap);
2061}
2062
2063void llvm::updateProfileCallee(
2064 Function *Callee, int64_t EntryDelta,
2065 const ValueMap<const Value *, WeakTrackingVH> *VMap) {
2066 auto CalleeCount = Callee->getEntryCount();
2067 if (!CalleeCount)
2068 return;
2069
2070 const uint64_t PriorEntryCount = CalleeCount->getCount();
2071
2072 // Since CallSiteCount is an estimate, it could exceed the original callee
2073 // count and has to be set to 0 so guard against underflow.
2074 const uint64_t NewEntryCount =
2075 (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount)
2076 ? 0
2077 : PriorEntryCount + EntryDelta;
2078
2079 auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount,
2080 const uint64_t PriorEntryCount) {
2081 Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB);
2082 if (VPtr)
2083 scaleProfData(I&: *VPtr, S: NewEntryCount, T: PriorEntryCount);
2084 };
2085
2086 // During inlining ?
2087 if (VMap) {
2088 uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
2089 for (auto Entry : *VMap) {
2090 if (isa<CallInst>(Val: Entry.first))
2091 if (auto *CI = dyn_cast_or_null<CallInst>(Val: Entry.second)) {
2092 CI->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount);
2093 updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount);
2094 }
2095
2096 if (isa<InvokeInst>(Val: Entry.first))
2097 if (auto *II = dyn_cast_or_null<InvokeInst>(Val: Entry.second)) {
2098 II->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount);
2099 updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount);
2100 }
2101 }
2102 }
2103
2104 if (EntryDelta) {
2105 Callee->setEntryCount(Count: NewEntryCount);
2106
2107 for (BasicBlock &BB : *Callee)
2108 // No need to update the callsite if it is pruned during inlining.
2109 if (!VMap || VMap->count(Val: &BB))
2110 for (Instruction &I : BB) {
2111 if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
2112 CI->updateProfWeight(S: NewEntryCount, T: PriorEntryCount);
2113 updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount);
2114 }
2115 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &I)) {
2116 II->updateProfWeight(S: NewEntryCount, T: PriorEntryCount);
2117 updateVTableProfWeight(II, NewEntryCount, PriorEntryCount);
2118 }
2119 }
2120 }
2121}
2122
2123/// An operand bundle "clang.arc.attachedcall" on a call indicates the call
2124/// result is implicitly consumed by a call to retainRV or claimRV immediately
2125/// after the call. This function inlines the retainRV/claimRV calls.
2126///
2127/// There are three cases to consider:
2128///
2129/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned
2130/// object in the callee return block, the autoreleaseRV call and the
2131/// retainRV/claimRV call in the caller cancel out. If the call in the caller
2132/// is a claimRV call, a call to objc_release is emitted.
2133///
2134/// 2. If there is a call in the callee return block that doesn't have operand
2135/// bundle "clang.arc.attachedcall", the operand bundle on the original call
2136/// is transferred to the call in the callee.
2137///
2138/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is
2139/// a retainRV call.
2140static void
2141inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
2142 const SmallVectorImpl<ReturnInst *> &Returns) {
2143 assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
2144 bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
2145 IsUnsafeClaimRV = !IsRetainRV;
2146
2147 for (auto *RI : Returns) {
2148 Value *RetOpnd = objcarc::GetRCIdentityRoot(V: RI->getOperand(i_nocapture: 0));
2149 bool InsertRetainCall = IsRetainRV;
2150 IRBuilder<> Builder(RI->getContext());
2151
2152 // Walk backwards through the basic block looking for either a matching
2153 // autoreleaseRV call or an unannotated call.
2154 auto InstRange = llvm::make_range(x: ++(RI->getIterator().getReverse()),
2155 y: RI->getParent()->rend());
2156 for (Instruction &I : llvm::make_early_inc_range(Range&: InstRange)) {
2157 // Ignore casts.
2158 if (isa<CastInst>(Val: I))
2159 continue;
2160
2161 if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) {
2162 if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue ||
2163 !II->use_empty() ||
2164 objcarc::GetRCIdentityRoot(V: II->getOperand(i_nocapture: 0)) != RetOpnd)
2165 break;
2166
2167 // If we've found a matching authoreleaseRV call:
2168 // - If claimRV is attached to the call, insert a call to objc_release
2169 // and erase the autoreleaseRV call.
2170 // - If retainRV is attached to the call, just erase the autoreleaseRV
2171 // call.
2172 if (IsUnsafeClaimRV) {
2173 Builder.SetInsertPoint(II);
2174 Builder.CreateIntrinsic(ID: Intrinsic::objc_release, Args: RetOpnd);
2175 }
2176 II->eraseFromParent();
2177 InsertRetainCall = false;
2178 break;
2179 }
2180
2181 auto *CI = dyn_cast<CallInst>(Val: &I);
2182
2183 if (!CI)
2184 break;
2185
2186 if (objcarc::GetRCIdentityRoot(V: CI) != RetOpnd ||
2187 objcarc::hasAttachedCallOpBundle(CB: CI))
2188 break;
2189
2190 // If we've found an unannotated call that defines RetOpnd, add a
2191 // "clang.arc.attachedcall" operand bundle.
2192 Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(CB: &CB)};
2193 OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
2194 auto *NewCall = CallBase::addOperandBundle(
2195 CB: CI, ID: LLVMContext::OB_clang_arc_attachedcall, OB, InsertPt: CI->getIterator());
2196 NewCall->copyMetadata(SrcInst: *CI);
2197 CI->replaceAllUsesWith(V: NewCall);
2198 CI->eraseFromParent();
2199 InsertRetainCall = false;
2200 break;
2201 }
2202
2203 if (InsertRetainCall) {
2204 // The retainRV is attached to the call and we've failed to find a
2205 // matching autoreleaseRV or an annotated call in the callee. Emit a call
2206 // to objc_retain.
2207 Builder.SetInsertPoint(RI);
2208 Builder.CreateIntrinsic(ID: Intrinsic::objc_retain, Args: RetOpnd);
2209 }
2210 }
2211}
2212
2213// In contextual profiling, when an inline succeeds, we want to remap the
2214// indices of the callee into the index space of the caller. We can't just leave
2215// them as-is because the same callee may appear in other places in this caller
2216// (other callsites), and its (callee's) counters and sub-contextual profile
2217// tree would be potentially different.
2218// Not all BBs of the callee may survive the opportunistic DCE InlineFunction
2219// does (same goes for callsites in the callee).
2220// We will return a pair of vectors, one for basic block IDs and one for
2221// callsites. For such a vector V, V[Idx] will be -1 if the callee
2222// instrumentation with index Idx did not survive inlining, and a new value
2223// otherwise.
2224// This function will update the caller's instrumentation intrinsics
2225// accordingly, mapping indices as described above. We also replace the "name"
2226// operand because we use it to distinguish between "own" instrumentation and
2227// "from callee" instrumentation when performing the traversal of the CFG of the
2228// caller. We traverse depth-first from the callsite's BB and up to the point we
2229// hit BBs owned by the caller.
2230// The return values will be then used to update the contextual
2231// profile. Note: we only update the "name" and "index" operands in the
2232// instrumentation intrinsics, we leave the hash and total nr of indices as-is,
2233// it's not worth updating those.
2234static std::pair<std::vector<int64_t>, std::vector<int64_t>>
2235remapIndices(Function &Caller, BasicBlock *StartBB,
2236 PGOContextualProfile &CtxProf, uint32_t CalleeCounters,
2237 uint32_t CalleeCallsites) {
2238 // We'll allocate a new ID to imported callsite counters and callsites. We're
2239 // using -1 to indicate a counter we delete. Most likely the entry ID, for
2240 // example, will be deleted - we don't want 2 IDs in the same BB, and the
2241 // entry would have been cloned in the callsite's old BB.
2242 std::vector<int64_t> CalleeCounterMap;
2243 std::vector<int64_t> CalleeCallsiteMap;
2244 CalleeCounterMap.resize(new_size: CalleeCounters, x: -1);
2245 CalleeCallsiteMap.resize(new_size: CalleeCallsites, x: -1);
2246
2247 auto RewriteInstrIfNeeded = [&](InstrProfIncrementInst &Ins) -> bool {
2248 if (Ins.getNameValue() == &Caller)
2249 return false;
2250 const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2251 if (CalleeCounterMap[OldID] == -1)
2252 CalleeCounterMap[OldID] = CtxProf.allocateNextCounterIndex(F: Caller);
2253 const auto NewID = static_cast<uint32_t>(CalleeCounterMap[OldID]);
2254
2255 Ins.setNameValue(&Caller);
2256 Ins.setIndex(NewID);
2257 return true;
2258 };
2259
2260 auto RewriteCallsiteInsIfNeeded = [&](InstrProfCallsite &Ins) -> bool {
2261 if (Ins.getNameValue() == &Caller)
2262 return false;
2263 const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2264 if (CalleeCallsiteMap[OldID] == -1)
2265 CalleeCallsiteMap[OldID] = CtxProf.allocateNextCallsiteIndex(F: Caller);
2266 const auto NewID = static_cast<uint32_t>(CalleeCallsiteMap[OldID]);
2267
2268 Ins.setNameValue(&Caller);
2269 Ins.setIndex(NewID);
2270 return true;
2271 };
2272
2273 std::deque<BasicBlock *> Worklist;
2274 DenseSet<const BasicBlock *> Seen;
2275 // We will traverse the BBs starting from the callsite BB. The callsite BB
2276 // will have at least a BB ID - maybe its own, and in any case the one coming
2277 // from the cloned function's entry BB. The other BBs we'll start seeing from
2278 // there on may or may not have BB IDs. BBs with IDs belonging to our caller
2279 // are definitely not coming from the imported function and form a boundary
2280 // past which we don't need to traverse anymore. BBs may have no
2281 // instrumentation (because we originally inserted instrumentation as per
2282 // MST), in which case we'll traverse past them. An invariant we'll keep is
2283 // that a BB will have at most 1 BB ID. For example, in the callsite BB, we
2284 // will delete the callee BB's instrumentation. This doesn't result in
2285 // information loss: the entry BB of the callee will have the same count as
2286 // the callsite's BB. At the end of this traversal, all the callee's
2287 // instrumentation would be mapped into the caller's instrumentation index
2288 // space. Some of the callee's counters may be deleted (as mentioned, this
2289 // should result in no loss of information).
2290 Worklist.push_back(x: StartBB);
2291 while (!Worklist.empty()) {
2292 auto *BB = Worklist.front();
2293 Worklist.pop_front();
2294 bool Changed = false;
2295 auto *BBID = CtxProfAnalysis::getBBInstrumentation(BB&: *BB);
2296 if (BBID) {
2297 Changed |= RewriteInstrIfNeeded(*BBID);
2298 // this may be the entryblock from the inlined callee, coming into a BB
2299 // that didn't have instrumentation because of MST decisions. Let's make
2300 // sure it's placed accordingly. This is a noop elsewhere.
2301 BBID->moveBefore(InsertPos: BB->getFirstInsertionPt());
2302 }
2303 for (auto &I : llvm::make_early_inc_range(Range&: *BB)) {
2304 if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Val: &I)) {
2305 if (isa<InstrProfIncrementInstStep>(Val: Inc)) {
2306 // Step instrumentation is used for select instructions. Inlining may
2307 // have propagated a constant resulting in the condition of the select
2308 // being resolved, case in which function cloning resolves the value
2309 // of the select, and elides the select instruction. If that is the
2310 // case, the step parameter of the instrumentation will reflect that.
2311 // We can delete the instrumentation in that case.
2312 if (isa<Constant>(Val: Inc->getStep())) {
2313 assert(!Inc->getNextNode() || !isa<SelectInst>(Inc->getNextNode()));
2314 Inc->eraseFromParent();
2315 } else {
2316 assert(isa_and_nonnull<SelectInst>(Inc->getNextNode()));
2317 RewriteInstrIfNeeded(*Inc);
2318 }
2319 } else if (Inc != BBID) {
2320 // If we're here it means that the BB had more than 1 IDs, presumably
2321 // some coming from the callee. We "made up our mind" to keep the
2322 // first one (which may or may not have been originally the caller's).
2323 // All the others are superfluous and we delete them.
2324 Inc->eraseFromParent();
2325 Changed = true;
2326 }
2327 } else if (auto *CS = dyn_cast<InstrProfCallsite>(Val: &I)) {
2328 Changed |= RewriteCallsiteInsIfNeeded(*CS);
2329 }
2330 }
2331 if (!BBID || Changed)
2332 for (auto *Succ : successors(BB))
2333 if (Seen.insert(V: Succ).second)
2334 Worklist.push_back(x: Succ);
2335 }
2336
2337 assert(!llvm::is_contained(CalleeCounterMap, 0) &&
2338 "Counter index mapping should be either to -1 or to non-zero index, "
2339 "because the 0 "
2340 "index corresponds to the entry BB of the caller");
2341 assert(!llvm::is_contained(CalleeCallsiteMap, 0) &&
2342 "Callsite index mapping should be either to -1 or to non-zero index, "
2343 "because there should have been at least a callsite - the inlined one "
2344 "- which would have had a 0 index.");
2345
2346 return {std::move(CalleeCounterMap), std::move(CalleeCallsiteMap)};
2347}
2348
2349// Inline. If successful, update the contextual profile (if a valid one is
2350// given).
2351// The contextual profile data is organized in trees, as follows:
2352// - each node corresponds to a function
2353// - the root of each tree corresponds to an "entrypoint" - e.g.
2354// RPC handler for server side
2355// - the path from the root to a node is a particular call path
2356// - the counters stored in a node are counter values observed in that
2357// particular call path ("context")
2358// - the edges between nodes are annotated with callsite IDs.
2359//
2360// Updating the contextual profile after an inlining means, at a high level,
2361// copying over the data of the callee, **intentionally without any value
2362// scaling**, and copying over the callees of the inlined callee.
2363llvm::InlineResult llvm::InlineFunction(
2364 CallBase &CB, InlineFunctionInfo &IFI, PGOContextualProfile &CtxProf,
2365 bool MergeAttributes, AAResults *CalleeAAR, bool InsertLifetime,
2366 Function *ForwardVarArgsTo, OptimizationRemarkEmitter *ORE) {
2367 if (!CtxProf.isInSpecializedModule())
2368 return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
2369 ForwardVarArgsTo, ORE);
2370
2371 auto &Caller = *CB.getCaller();
2372 auto &Callee = *CB.getCalledFunction();
2373 auto *StartBB = CB.getParent();
2374
2375 // Get some preliminary data about the callsite before it might get inlined.
2376 // Inlining shouldn't delete the callee, but it's cleaner (and low-cost) to
2377 // get this data upfront and rely less on InlineFunction's behavior.
2378 const auto CalleeGUID = AssignGUIDPass::getGUID(F: Callee);
2379 auto *CallsiteIDIns = CtxProfAnalysis::getCallsiteInstrumentation(CB);
2380 const auto CallsiteID =
2381 static_cast<uint32_t>(CallsiteIDIns->getIndex()->getZExtValue());
2382
2383 const auto NumCalleeCounters = CtxProf.getNumCounters(F: Callee);
2384 const auto NumCalleeCallsites = CtxProf.getNumCallsites(F: Callee);
2385
2386 auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
2387 ForwardVarArgsTo, ORE);
2388 if (!Ret.isSuccess())
2389 return Ret;
2390
2391 // Inlining succeeded, we don't need the instrumentation of the inlined
2392 // callsite.
2393 CallsiteIDIns->eraseFromParent();
2394
2395 // Assinging Maps and then capturing references into it in the lambda because
2396 // captured structured bindings are a C++20 extension. We do also need a
2397 // capture here, though.
2398 const auto IndicesMaps = remapIndices(Caller, StartBB, CtxProf,
2399 CalleeCounters: NumCalleeCounters, CalleeCallsites: NumCalleeCallsites);
2400 const uint32_t NewCountersSize = CtxProf.getNumCounters(F: Caller);
2401
2402 auto Updater = [&](PGOCtxProfContext &Ctx) {
2403 assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller));
2404 const auto &[CalleeCounterMap, CalleeCallsiteMap] = IndicesMaps;
2405 assert(
2406 (Ctx.counters().size() +
2407 llvm::count_if(CalleeCounterMap, [](auto V) { return V != -1; }) ==
2408 NewCountersSize) &&
2409 "The caller's counters size should have grown by the number of new "
2410 "distinct counters inherited from the inlined callee.");
2411 Ctx.resizeCounters(Size: NewCountersSize);
2412 // If the callsite wasn't exercised in this context, the value of the
2413 // counters coming from it is 0 - which it is right now, after resizing them
2414 // - and so we're done.
2415 auto CSIt = Ctx.callsites().find(x: CallsiteID);
2416 if (CSIt == Ctx.callsites().end())
2417 return;
2418 auto CalleeCtxIt = CSIt->second.find(x: CalleeGUID);
2419 // The callsite was exercised, but not with this callee (so presumably this
2420 // is an indirect callsite). Again, we're done here.
2421 if (CalleeCtxIt == CSIt->second.end())
2422 return;
2423
2424 // Let's pull in the counter values and the subcontexts coming from the
2425 // inlined callee.
2426 auto &CalleeCtx = CalleeCtxIt->second;
2427 assert(CalleeCtx.guid() == CalleeGUID);
2428
2429 for (auto I = 0U; I < CalleeCtx.counters().size(); ++I) {
2430 const int64_t NewIndex = CalleeCounterMap[I];
2431 if (NewIndex >= 0) {
2432 assert(NewIndex != 0 && "counter index mapping shouldn't happen to a 0 "
2433 "index, that's the caller's entry BB");
2434 Ctx.counters()[NewIndex] = CalleeCtx.counters()[I];
2435 }
2436 }
2437 for (auto &[I, OtherSet] : CalleeCtx.callsites()) {
2438 const int64_t NewCSIdx = CalleeCallsiteMap[I];
2439 if (NewCSIdx >= 0) {
2440 assert(NewCSIdx != 0 &&
2441 "callsite index mapping shouldn't happen to a 0 index, the "
2442 "caller must've had at least one callsite (with such an index)");
2443 Ctx.ingestAllContexts(CSId: NewCSIdx, Other: std::move(OtherSet));
2444 }
2445 }
2446 // We know the traversal is preorder, so it wouldn't have yet looked at the
2447 // sub-contexts of this context that it's currently visiting. Meaning, the
2448 // erase below invalidates no iterators.
2449 auto Deleted = Ctx.callsites().erase(x: CallsiteID);
2450 assert(Deleted);
2451 (void)Deleted;
2452 };
2453 CtxProf.update(Updater, F: Caller);
2454 return Ret;
2455}
2456
2457llvm::InlineResult llvm::CanInlineCallSite(const CallBase &CB,
2458 InlineFunctionInfo &IFI) {
2459 assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
2460
2461 // FIXME: we don't inline callbr yet.
2462 if (isa<CallBrInst>(Val: CB))
2463 return InlineResult::failure(Reason: "We don't inline callbr yet.");
2464
2465 // If IFI has any state in it, zap it before we fill it in.
2466 IFI.reset();
2467
2468 Function *CalledFunc = CB.getCalledFunction();
2469 if (!CalledFunc || // Can't inline external function or indirect
2470 CalledFunc->isDeclaration()) // call!
2471 return InlineResult::failure(Reason: "external or indirect");
2472
2473 // The inliner does not know how to inline through calls with operand bundles
2474 // in general ...
2475 if (CB.hasOperandBundles()) {
2476 for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
2477 auto OBUse = CB.getOperandBundleAt(Index: i);
2478 uint32_t Tag = OBUse.getTagID();
2479 // ... but it knows how to inline through "deopt" operand bundles ...
2480 if (Tag == LLVMContext::OB_deopt)
2481 continue;
2482 // ... and "funclet" operand bundles.
2483 if (Tag == LLVMContext::OB_funclet)
2484 continue;
2485 if (Tag == LLVMContext::OB_clang_arc_attachedcall)
2486 continue;
2487 if (Tag == LLVMContext::OB_kcfi)
2488 continue;
2489 if (Tag == LLVMContext::OB_convergencectrl) {
2490 IFI.ConvergenceControlToken = OBUse.Inputs[0].get();
2491 continue;
2492 }
2493
2494 return InlineResult::failure(Reason: "unsupported operand bundle");
2495 }
2496 }
2497
2498 // FIXME: The check below is redundant and incomplete. According to spec, if a
2499 // convergent call is missing a token, then the caller is using uncontrolled
2500 // convergence. If the callee has an entry intrinsic, then the callee is using
2501 // controlled convergence, and the call cannot be inlined. A proper
2502 // implemenation of this check requires a whole new analysis that identifies
2503 // convergence in every function. For now, we skip that and just do this one
2504 // cursory check. The underlying assumption is that in a compiler flow that
2505 // fully implements convergence control tokens, there is no mixing of
2506 // controlled and uncontrolled convergent operations in the whole program.
2507 if (CB.isConvergent()) {
2508 if (!IFI.ConvergenceControlToken &&
2509 getConvergenceEntry(BB&: CalledFunc->getEntryBlock())) {
2510 return InlineResult::failure(
2511 Reason: "convergent call needs convergencectrl operand");
2512 }
2513 }
2514
2515 const BasicBlock *OrigBB = CB.getParent();
2516 const Function *Caller = OrigBB->getParent();
2517
2518 // GC poses two hazards to inlining, which only occur when the callee has GC:
2519 // 1. If the caller has no GC, then the callee's GC must be propagated to the
2520 // caller.
2521 // 2. If the caller has a differing GC, it is invalid to inline.
2522 if (CalledFunc->hasGC()) {
2523 if (Caller->hasGC() && CalledFunc->getGC() != Caller->getGC())
2524 return InlineResult::failure(Reason: "incompatible GC");
2525 }
2526
2527 // Get the personality function from the callee if it contains a landing pad.
2528 Constant *CalledPersonality =
2529 CalledFunc->hasPersonalityFn()
2530 ? CalledFunc->getPersonalityFn()->stripPointerCasts()
2531 : nullptr;
2532
2533 // Find the personality function used by the landing pads of the caller. If it
2534 // exists, then check to see that it matches the personality function used in
2535 // the callee.
2536 Constant *CallerPersonality =
2537 Caller->hasPersonalityFn()
2538 ? Caller->getPersonalityFn()->stripPointerCasts()
2539 : nullptr;
2540 if (CalledPersonality) {
2541 // If the personality functions match, then we can perform the
2542 // inlining. Otherwise, we can't inline.
2543 // TODO: This isn't 100% true. Some personality functions are proper
2544 // supersets of others and can be used in place of the other.
2545 if (CallerPersonality && CalledPersonality != CallerPersonality)
2546 return InlineResult::failure(Reason: "incompatible personality");
2547 }
2548
2549 // We need to figure out which funclet the callsite was in so that we may
2550 // properly nest the callee.
2551 if (CallerPersonality) {
2552 EHPersonality Personality = classifyEHPersonality(Pers: CallerPersonality);
2553 if (isScopedEHPersonality(Pers: Personality)) {
2554 std::optional<OperandBundleUse> ParentFunclet =
2555 CB.getOperandBundle(ID: LLVMContext::OB_funclet);
2556 if (ParentFunclet)
2557 IFI.CallSiteEHPad = cast<FuncletPadInst>(Val: ParentFunclet->Inputs.front());
2558
2559 // OK, the inlining site is legal. What about the target function?
2560
2561 if (IFI.CallSiteEHPad) {
2562 if (Personality == EHPersonality::MSVC_CXX) {
2563 // The MSVC personality cannot tolerate catches getting inlined into
2564 // cleanup funclets.
2565 if (isa<CleanupPadInst>(Val: IFI.CallSiteEHPad)) {
2566 // Ok, the call site is within a cleanuppad. Let's check the callee
2567 // for catchpads.
2568 for (const BasicBlock &CalledBB : *CalledFunc) {
2569 if (isa<CatchSwitchInst>(Val: CalledBB.getFirstNonPHIIt()))
2570 return InlineResult::failure(Reason: "catch in cleanup funclet");
2571 }
2572 }
2573 } else if (isAsynchronousEHPersonality(Pers: Personality)) {
2574 // SEH is even less tolerant, there may not be any sort of exceptional
2575 // funclet in the callee.
2576 for (const BasicBlock &CalledBB : *CalledFunc) {
2577 if (CalledBB.isEHPad())
2578 return InlineResult::failure(Reason: "SEH in cleanup funclet");
2579 }
2580 }
2581 }
2582 }
2583 }
2584
2585 return InlineResult::success();
2586}
2587
2588/// This function inlines the called function into the basic block of the
2589/// caller. This returns false if it is not possible to inline this call.
2590/// The program is still in a well defined state if this occurs though.
2591///
2592/// Note that this only does one level of inlining. For example, if the
2593/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
2594/// exists in the instruction stream. Similarly this will inline a recursive
2595/// function by one level.
2596void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI,
2597 bool MergeAttributes, AAResults *CalleeAAR,
2598 bool InsertLifetime, Function *ForwardVarArgsTo,
2599 OptimizationRemarkEmitter *ORE) {
2600 BasicBlock *OrigBB = CB.getParent();
2601 Function *Caller = OrigBB->getParent();
2602 Function *CalledFunc = CB.getCalledFunction();
2603 assert(CalledFunc && !CalledFunc->isDeclaration() &&
2604 "CanInlineCallSite should have verified direct call to definition");
2605
2606 // Determine if we are dealing with a call in an EHPad which does not unwind
2607 // to caller.
2608 bool EHPadForCallUnwindsLocally = false;
2609 if (IFI.CallSiteEHPad && isa<CallInst>(Val: CB)) {
2610 UnwindDestMemoTy FuncletUnwindMap;
2611 Value *CallSiteUnwindDestToken =
2612 getUnwindDestToken(EHPad: IFI.CallSiteEHPad, MemoMap&: FuncletUnwindMap);
2613
2614 EHPadForCallUnwindsLocally =
2615 CallSiteUnwindDestToken &&
2616 !isa<ConstantTokenNone>(Val: CallSiteUnwindDestToken);
2617 }
2618
2619 // Get an iterator to the last basic block in the function, which will have
2620 // the new function inlined after it.
2621 Function::iterator LastBlock = --Caller->end();
2622
2623 // Make sure to capture all of the return instructions from the cloned
2624 // function.
2625 SmallVector<ReturnInst*, 8> Returns;
2626 ClonedCodeInfo InlinedFunctionInfo;
2627 Function::iterator FirstNewBlock;
2628
2629 // GC poses two hazards to inlining, which only occur when the callee has GC:
2630 // 1. If the caller has no GC, then the callee's GC must be propagated to the
2631 // caller.
2632 // 2. If the caller has a differing GC, it is invalid to inline.
2633 if (CalledFunc->hasGC()) {
2634 if (!Caller->hasGC())
2635 Caller->setGC(CalledFunc->getGC());
2636 else {
2637 assert(CalledFunc->getGC() == Caller->getGC() &&
2638 "CanInlineCallSite should have verified compatible GCs");
2639 }
2640 }
2641
2642 if (CalledFunc->hasPersonalityFn()) {
2643 Constant *CalledPersonality =
2644 CalledFunc->getPersonalityFn()->stripPointerCasts();
2645 if (!Caller->hasPersonalityFn()) {
2646 Caller->setPersonalityFn(CalledPersonality);
2647 } else
2648 assert(Caller->getPersonalityFn()->stripPointerCasts() ==
2649 CalledPersonality &&
2650 "CanInlineCallSite should have verified compatible personality");
2651 }
2652
2653 { // Scope to destroy VMap after cloning.
2654 ValueToValueMapTy VMap;
2655 struct ByValInit {
2656 Value *Dst;
2657 Value *Src;
2658 MaybeAlign SrcAlign;
2659 Type *Ty;
2660 };
2661 // Keep a list of tuples (dst, src, src_align) to emit byval
2662 // initializations. Src Alignment is only available though the callbase,
2663 // therefore has to be saved.
2664 SmallVector<ByValInit, 4> ByValInits;
2665
2666 // When inlining a function that contains noalias scope metadata,
2667 // this metadata needs to be cloned so that the inlined blocks
2668 // have different "unique scopes" at every call site.
2669 // Track the metadata that must be cloned. Do this before other changes to
2670 // the function, so that we do not get in trouble when inlining caller ==
2671 // callee.
2672 ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());
2673
2674 auto &DL = Caller->getDataLayout();
2675
2676 // Calculate the vector of arguments to pass into the function cloner, which
2677 // matches up the formal to the actual argument values.
2678 auto AI = CB.arg_begin();
2679 unsigned ArgNo = 0;
2680 for (Function::arg_iterator I = CalledFunc->arg_begin(),
2681 E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
2682 Value *ActualArg = *AI;
2683
2684 // When byval arguments actually inlined, we need to make the copy implied
2685 // by them explicit. However, we don't do this if the callee is readonly
2686 // or readnone, because the copy would be unneeded: the callee doesn't
2687 // modify the struct.
2688 if (CB.isByValArgument(ArgNo)) {
2689 ActualArg = HandleByValArgument(ByValType: CB.getParamByValType(ArgNo), Arg: ActualArg,
2690 TheCall: &CB, CalledFunc, IFI,
2691 ByValAlignment: CalledFunc->getParamAlign(ArgNo));
2692 if (ActualArg != *AI)
2693 ByValInits.push_back(Elt: {.Dst: ActualArg, .Src: (Value *)*AI,
2694 .SrcAlign: CB.getParamAlign(ArgNo),
2695 .Ty: CB.getParamByValType(ArgNo)});
2696 }
2697
2698 VMap[&*I] = ActualArg;
2699 }
2700
2701 // TODO: Remove this when users have been updated to the assume bundles.
2702 // Add alignment assumptions if necessary. We do this before the inlined
2703 // instructions are actually cloned into the caller so that we can easily
2704 // check what will be known at the start of the inlined code.
2705 AddAlignmentAssumptions(CB, IFI);
2706
2707 AssumptionCache *AC =
2708 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
2709
2710 /// Preserve all attributes on of the call and its parameters.
2711 salvageKnowledge(I: &CB, AC);
2712
2713 // We want the inliner to prune the code as it copies. We would LOVE to
2714 // have no dead or constant instructions leftover after inlining occurs
2715 // (which can happen, e.g., because an argument was constant), but we'll be
2716 // happy with whatever the cloner can do.
2717 CloneAndPruneFunctionInto(NewFunc: Caller, OldFunc: CalledFunc, VMap,
2718 /*ModuleLevelChanges=*/false, Returns, NameSuffix: ".i",
2719 CodeInfo: &InlinedFunctionInfo);
2720 // Remember the first block that is newly cloned over.
2721 FirstNewBlock = LastBlock; ++FirstNewBlock;
2722
2723 // Insert retainRV/clainRV runtime calls.
2724 objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(CB: &CB);
2725 if (RVCallKind != objcarc::ARCInstKind::None)
2726 inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns);
2727
2728 // Updated caller/callee profiles only when requested. For sample loader
2729 // inlining, the context-sensitive inlinee profile doesn't need to be
2730 // subtracted from callee profile, and the inlined clone also doesn't need
2731 // to be scaled based on call site count.
2732 if (IFI.UpdateProfile) {
2733 if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
2734 // Update the BFI of blocks cloned into the caller.
2735 updateCallerBFI(CallSiteBlock: OrigBB, VMap, CallerBFI: IFI.CallerBFI, CalleeBFI: IFI.CalleeBFI,
2736 CalleeEntryBlock: CalledFunc->front());
2737
2738 if (auto Profile = CalledFunc->getEntryCount())
2739 updateCallProfile(Callee: CalledFunc, VMap, CalleeEntryCount: *Profile, TheCall: CB, PSI: IFI.PSI,
2740 CallerBFI: IFI.CallerBFI);
2741 }
2742
2743 // Inject byval arguments initialization.
2744 for (ByValInit &Init : ByValInits)
2745 HandleByValArgumentInit(ByValType: Init.Ty, Dst: Init.Dst, Src: Init.Src, SrcAlign: Init.SrcAlign,
2746 M: Caller->getParent(), InsertBlock: &*FirstNewBlock, IFI,
2747 CalledFunc);
2748
2749 std::optional<OperandBundleUse> ParentDeopt =
2750 CB.getOperandBundle(ID: LLVMContext::OB_deopt);
2751 if (ParentDeopt) {
2752 SmallVector<OperandBundleDef, 2> OpDefs;
2753
2754 for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
2755 CallBase *ICS = dyn_cast_or_null<CallBase>(Val&: VH);
2756 if (!ICS)
2757 continue; // instruction was DCE'd or RAUW'ed to undef
2758
2759 OpDefs.clear();
2760
2761 OpDefs.reserve(N: ICS->getNumOperandBundles());
2762
2763 for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe;
2764 ++COBi) {
2765 auto ChildOB = ICS->getOperandBundleAt(Index: COBi);
2766 if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
2767 // If the inlined call has other operand bundles, let them be
2768 OpDefs.emplace_back(Args&: ChildOB);
2769 continue;
2770 }
2771
2772 // It may be useful to separate this logic (of handling operand
2773 // bundles) out to a separate "policy" component if this gets crowded.
2774 // Prepend the parent's deoptimization continuation to the newly
2775 // inlined call's deoptimization continuation.
2776 std::vector<Value *> MergedDeoptArgs;
2777 MergedDeoptArgs.reserve(n: ParentDeopt->Inputs.size() +
2778 ChildOB.Inputs.size());
2779
2780 llvm::append_range(C&: MergedDeoptArgs, R&: ParentDeopt->Inputs);
2781 llvm::append_range(C&: MergedDeoptArgs, R&: ChildOB.Inputs);
2782
2783 OpDefs.emplace_back(Args: "deopt", Args: std::move(MergedDeoptArgs));
2784 }
2785
2786 Instruction *NewI = CallBase::Create(CB: ICS, Bundles: OpDefs, InsertPt: ICS->getIterator());
2787
2788 // Note: the RAUW does the appropriate fixup in VMap, so we need to do
2789 // this even if the call returns void.
2790 ICS->replaceAllUsesWith(V: NewI);
2791
2792 VH = nullptr;
2793 ICS->eraseFromParent();
2794 }
2795 }
2796
2797 // For 'nodebug' functions, the associated DISubprogram is always null.
2798 // Conservatively avoid propagating the callsite debug location to
2799 // instructions inlined from a function whose DISubprogram is not null.
2800 fixupLineNumbers(Fn: Caller, FI: FirstNewBlock, TheCall: &CB,
2801 CalleeHasDebugInfo: CalledFunc->getSubprogram() != nullptr);
2802
2803 if (isAssignmentTrackingEnabled(M: *Caller->getParent())) {
2804 // Interpret inlined stores to caller-local variables as assignments.
2805 trackInlinedStores(Start: FirstNewBlock, End: Caller->end(), CB);
2806
2807 // Update DIAssignID metadata attachments and uses so that they are
2808 // unique to this inlined instance.
2809 fixupAssignments(Start: FirstNewBlock, End: Caller->end());
2810 }
2811
2812 // Now clone the inlined noalias scope metadata.
2813 SAMetadataCloner.clone();
2814 SAMetadataCloner.remap(FStart: FirstNewBlock, FEnd: Caller->end());
2815
2816 // Add noalias metadata if necessary.
2817 AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);
2818
2819 // Clone return attributes on the callsite into the calls within the inlined
2820 // function which feed into its return value.
2821 AddReturnAttributes(CB, VMap, InlinedFunctionInfo);
2822
2823 // Clone attributes on the params of the callsite to calls within the
2824 // inlined function which use the same param.
2825 AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo);
2826
2827 propagateMemProfMetadata(
2828 Callee: CalledFunc, CB, ContainsMemProfMetadata: InlinedFunctionInfo.ContainsMemProfMetadata, VMap, ORE);
2829
2830 // Propagate metadata on the callsite if necessary.
2831 PropagateCallSiteMetadata(CB, FStart: FirstNewBlock, FEnd: Caller->end());
2832
2833 // Propagate implicit ref metadata.
2834 if (CalledFunc->hasMetadata(KindID: LLVMContext::MD_implicit_ref)) {
2835 SmallVector<MDNode *> MDs;
2836 CalledFunc->getMetadata(KindID: LLVMContext::MD_implicit_ref, MDs);
2837 for (MDNode *MD : MDs) {
2838 Caller->addMetadata(KindID: LLVMContext::MD_implicit_ref, MD&: *MD);
2839 }
2840 }
2841
2842 // Register any cloned assumptions.
2843 if (IFI.GetAssumptionCache)
2844 for (BasicBlock &NewBlock :
2845 make_range(x: FirstNewBlock->getIterator(), y: Caller->end()))
2846 for (Instruction &I : NewBlock)
2847 if (auto *II = dyn_cast<AssumeInst>(Val: &I))
2848 IFI.GetAssumptionCache(*Caller).registerAssumption(CI: II);
2849 }
2850
2851 if (IFI.ConvergenceControlToken) {
2852 IntrinsicInst *IntrinsicCall = getConvergenceEntry(BB&: *FirstNewBlock);
2853 if (IntrinsicCall) {
2854 IntrinsicCall->replaceAllUsesWith(V: IFI.ConvergenceControlToken);
2855 IntrinsicCall->eraseFromParent();
2856 }
2857 }
2858
2859 // If there are any alloca instructions in the block that used to be the entry
2860 // block for the callee, move them to the entry block of the caller. First
2861 // calculate which instruction they should be inserted before. We insert the
2862 // instructions at the end of the current alloca list.
2863 {
2864 BasicBlock::iterator InsertPoint = Caller->begin()->begin();
2865 for (BasicBlock::iterator I = FirstNewBlock->begin(),
2866 E = FirstNewBlock->end(); I != E; ) {
2867 AllocaInst *AI = dyn_cast<AllocaInst>(Val: I++);
2868 if (!AI) continue;
2869
2870 // If the alloca is now dead, remove it. This often occurs due to code
2871 // specialization.
2872 if (AI->use_empty()) {
2873 AI->eraseFromParent();
2874 continue;
2875 }
2876
2877 if (!allocaWouldBeStaticInEntry(AI))
2878 continue;
2879
2880 // Keep track of the static allocas that we inline into the caller.
2881 IFI.StaticAllocas.push_back(Elt: AI);
2882
2883 // Scan for the block of allocas that we can move over, and move them
2884 // all at once.
2885 while (isa<AllocaInst>(Val: I) &&
2886 !cast<AllocaInst>(Val&: I)->use_empty() &&
2887 allocaWouldBeStaticInEntry(AI: cast<AllocaInst>(Val&: I))) {
2888 IFI.StaticAllocas.push_back(Elt: cast<AllocaInst>(Val&: I));
2889 ++I;
2890 }
2891
2892 // Transfer all of the allocas over in a block. Using splice means
2893 // that the instructions aren't removed from the symbol table, then
2894 // reinserted.
2895 I.setTailBit(true);
2896 Caller->getEntryBlock().splice(ToIt: InsertPoint, FromBB: &*FirstNewBlock,
2897 FromBeginIt: AI->getIterator(), FromEndIt: I);
2898 }
2899 }
2900
2901 // If the call to the callee cannot throw, set the 'nounwind' flag on any
2902 // calls that we inline.
2903 bool MarkNoUnwind = CB.doesNotThrow();
2904
2905 SmallVector<Value*,4> VarArgsToForward;
2906 SmallVector<AttributeSet, 4> VarArgsAttrs;
2907 for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
2908 i < CB.arg_size(); i++) {
2909 VarArgsToForward.push_back(Elt: CB.getArgOperand(i));
2910 VarArgsAttrs.push_back(Elt: CB.getAttributes().getParamAttrs(ArgNo: i));
2911 }
2912
2913 bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
2914 if (InlinedFunctionInfo.ContainsCalls) {
2915 CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
2916 if (CallInst *CI = dyn_cast<CallInst>(Val: &CB))
2917 CallSiteTailKind = CI->getTailCallKind();
2918
2919 // For inlining purposes, the "notail" marker is the same as no marker.
2920 if (CallSiteTailKind == CallInst::TCK_NoTail)
2921 CallSiteTailKind = CallInst::TCK_None;
2922
2923 for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
2924 ++BB) {
2925 for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
2926 CallInst *CI = dyn_cast<CallInst>(Val: &I);
2927 if (!CI)
2928 continue;
2929
2930 // Forward varargs from inlined call site to calls to the
2931 // ForwardVarArgsTo function, if requested, and to musttail calls.
2932 if (!VarArgsToForward.empty() &&
2933 ((ForwardVarArgsTo &&
2934 CI->getCalledFunction() == ForwardVarArgsTo) ||
2935 CI->isMustTailCall())) {
2936 // Collect attributes for non-vararg parameters.
2937 AttributeList Attrs = CI->getAttributes();
2938 SmallVector<AttributeSet, 8> ArgAttrs;
2939 if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
2940 for (unsigned ArgNo = 0;
2941 ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
2942 ArgAttrs.push_back(Elt: Attrs.getParamAttrs(ArgNo));
2943 }
2944
2945 // Add VarArg attributes.
2946 ArgAttrs.append(in_start: VarArgsAttrs.begin(), in_end: VarArgsAttrs.end());
2947 Attrs = AttributeList::get(C&: CI->getContext(), FnAttrs: Attrs.getFnAttrs(),
2948 RetAttrs: Attrs.getRetAttrs(), ArgAttrs);
2949 // Add VarArgs to existing parameters.
2950 SmallVector<Value *, 6> Params(CI->args());
2951 Params.append(in_start: VarArgsToForward.begin(), in_end: VarArgsToForward.end());
2952 CallInst *NewCI = CallInst::Create(
2953 Ty: CI->getFunctionType(), Func: CI->getCalledOperand(), Args: Params, NameStr: "", InsertBefore: CI->getIterator());
2954 NewCI->setDebugLoc(CI->getDebugLoc());
2955 NewCI->setAttributes(Attrs);
2956 NewCI->setCallingConv(CI->getCallingConv());
2957 CI->replaceAllUsesWith(V: NewCI);
2958 CI->eraseFromParent();
2959 CI = NewCI;
2960 }
2961
2962 if (Function *F = CI->getCalledFunction())
2963 InlinedDeoptimizeCalls |=
2964 F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
2965
2966 // We need to reduce the strength of any inlined tail calls. For
2967 // musttail, we have to avoid introducing potential unbounded stack
2968 // growth. For example, if functions 'f' and 'g' are mutually recursive
2969 // with musttail, we can inline 'g' into 'f' so long as we preserve
2970 // musttail on the cloned call to 'f'. If either the inlined call site
2971 // or the cloned call site is *not* musttail, the program already has
2972 // one frame of stack growth, so it's safe to remove musttail. Here is
2973 // a table of example transformations:
2974 //
2975 // f -> musttail g -> musttail f ==> f -> musttail f
2976 // f -> musttail g -> tail f ==> f -> tail f
2977 // f -> g -> musttail f ==> f -> f
2978 // f -> g -> tail f ==> f -> f
2979 //
2980 // Inlined notail calls should remain notail calls.
2981 CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
2982 if (ChildTCK != CallInst::TCK_NoTail)
2983 ChildTCK = std::min(a: CallSiteTailKind, b: ChildTCK);
2984 CI->setTailCallKind(ChildTCK);
2985 InlinedMustTailCalls |= CI->isMustTailCall();
2986
2987 // Call sites inlined through a 'nounwind' call site should be
2988 // 'nounwind' as well. However, avoid marking call sites explicitly
2989 // where possible. This helps expose more opportunities for CSE after
2990 // inlining, commonly when the callee is an intrinsic.
2991 if (MarkNoUnwind && !CI->doesNotThrow())
2992 CI->setDoesNotThrow();
2993 }
2994 }
2995 }
2996
2997 // Leave lifetime markers for the static alloca's, scoping them to the
2998 // function we just inlined.
2999 // We need to insert lifetime intrinsics even at O0 to avoid invalid
3000 // access caused by multithreaded coroutines. The check
3001 // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.
3002 if ((InsertLifetime || Caller->isPresplitCoroutine()) &&
3003 !IFI.StaticAllocas.empty()) {
3004 IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin());
3005 for (AllocaInst *AI : IFI.StaticAllocas) {
3006 // Don't mark swifterror allocas. They can't have bitcast uses.
3007 if (AI->isSwiftError())
3008 continue;
3009
3010 // If the alloca is already scoped to something smaller than the whole
3011 // function then there's no need to add redundant, less accurate markers.
3012 if (hasLifetimeMarkers(AI))
3013 continue;
3014
3015 std::optional<TypeSize> Size = AI->getAllocationSize(DL: AI->getDataLayout());
3016 if (Size && Size->isZero())
3017 continue;
3018
3019 builder.CreateLifetimeStart(Ptr: AI);
3020 for (ReturnInst *RI : Returns) {
3021 // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
3022 // call and a return. The return kills all local allocas.
3023 if (InlinedMustTailCalls &&
3024 RI->getParent()->getTerminatingMustTailCall())
3025 continue;
3026 if (InlinedDeoptimizeCalls &&
3027 RI->getParent()->getTerminatingDeoptimizeCall())
3028 continue;
3029 IRBuilder<>(RI).CreateLifetimeEnd(Ptr: AI);
3030 }
3031 }
3032 }
3033
3034 // If the inlined code contained dynamic alloca instructions, wrap the inlined
3035 // code with llvm.stacksave/llvm.stackrestore intrinsics.
3036 if (InlinedFunctionInfo.ContainsDynamicAllocas) {
3037 // Insert the llvm.stacksave.
3038 CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
3039 .CreateStackSave(Name: "savedstack");
3040
3041 // Insert a call to llvm.stackrestore before any return instructions in the
3042 // inlined function.
3043 for (ReturnInst *RI : Returns) {
3044 // Don't insert llvm.stackrestore calls between a musttail or deoptimize
3045 // call and a return. The return will restore the stack pointer.
3046 if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
3047 continue;
3048 if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
3049 continue;
3050 IRBuilder<>(RI).CreateStackRestore(Ptr: SavedPtr);
3051 }
3052 }
3053
3054 // If we are inlining for an invoke instruction, we must make sure to rewrite
3055 // any call instructions into invoke instructions. This is sensitive to which
3056 // funclet pads were top-level in the inlinee, so must be done before
3057 // rewriting the "parent pad" links.
3058 if (auto *II = dyn_cast<InvokeInst>(Val: &CB)) {
3059 BasicBlock *UnwindDest = II->getUnwindDest();
3060 BasicBlock::iterator FirstNonPHI = UnwindDest->getFirstNonPHIIt();
3061 if (isa<LandingPadInst>(Val: FirstNonPHI)) {
3062 HandleInlinedLandingPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
3063 } else {
3064 HandleInlinedEHPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
3065 }
3066 }
3067
3068 // Update the lexical scopes of the new funclets and callsites.
3069 // Anything that had 'none' as its parent is now nested inside the callsite's
3070 // EHPad.
3071 if (IFI.CallSiteEHPad) {
3072 for (Function::iterator BB = FirstNewBlock->getIterator(),
3073 E = Caller->end();
3074 BB != E; ++BB) {
3075 // Add bundle operands to inlined call sites.
3076 PropagateOperandBundles(InlinedBB: BB, CallSiteEHPad: IFI.CallSiteEHPad);
3077
3078 // It is problematic if the inlinee has a cleanupret which unwinds to
3079 // caller and we inline it into a call site which doesn't unwind but into
3080 // an EH pad that does. Such an edge must be dynamically unreachable.
3081 // As such, we replace the cleanupret with unreachable.
3082 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator()))
3083 if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
3084 changeToUnreachable(I: CleanupRet);
3085
3086 BasicBlock::iterator I = BB->getFirstNonPHIIt();
3087 if (!I->isEHPad())
3088 continue;
3089
3090 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) {
3091 if (isa<ConstantTokenNone>(Val: CatchSwitch->getParentPad()))
3092 CatchSwitch->setParentPad(IFI.CallSiteEHPad);
3093 } else {
3094 auto *FPI = cast<FuncletPadInst>(Val&: I);
3095 if (isa<ConstantTokenNone>(Val: FPI->getParentPad()))
3096 FPI->setParentPad(IFI.CallSiteEHPad);
3097 }
3098 }
3099 }
3100
3101 if (InlinedDeoptimizeCalls) {
3102 // We need to at least remove the deoptimizing returns from the Return set,
3103 // so that the control flow from those returns does not get merged into the
3104 // caller (but terminate it instead). If the caller's return type does not
3105 // match the callee's return type, we also need to change the return type of
3106 // the intrinsic.
3107 if (Caller->getReturnType() == CB.getType()) {
3108 llvm::erase_if(C&: Returns, P: [](ReturnInst *RI) {
3109 return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
3110 });
3111 } else {
3112 SmallVector<ReturnInst *, 8> NormalReturns;
3113 Function *NewDeoptIntrinsic = Intrinsic::getOrInsertDeclaration(
3114 M: Caller->getParent(), id: Intrinsic::experimental_deoptimize,
3115 Tys: {Caller->getReturnType()});
3116
3117 for (ReturnInst *RI : Returns) {
3118 CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
3119 if (!DeoptCall) {
3120 NormalReturns.push_back(Elt: RI);
3121 continue;
3122 }
3123
3124 // The calling convention on the deoptimize call itself may be bogus,
3125 // since the code we're inlining may have undefined behavior (and may
3126 // never actually execute at runtime); but all
3127 // @llvm.experimental.deoptimize declarations have to have the same
3128 // calling convention in a well-formed module.
3129 auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
3130 NewDeoptIntrinsic->setCallingConv(CallingConv);
3131 auto *CurBB = RI->getParent();
3132 RI->eraseFromParent();
3133
3134 SmallVector<Value *, 4> CallArgs(DeoptCall->args());
3135
3136 SmallVector<OperandBundleDef, 1> OpBundles;
3137 DeoptCall->getOperandBundlesAsDefs(Defs&: OpBundles);
3138 auto DeoptAttributes = DeoptCall->getAttributes();
3139 DeoptCall->eraseFromParent();
3140 assert(!OpBundles.empty() &&
3141 "Expected at least the deopt operand bundle");
3142
3143 IRBuilder<> Builder(CurBB);
3144 CallInst *NewDeoptCall =
3145 Builder.CreateCall(Callee: NewDeoptIntrinsic, Args: CallArgs, OpBundles);
3146 NewDeoptCall->setCallingConv(CallingConv);
3147 NewDeoptCall->setAttributes(DeoptAttributes);
3148 if (NewDeoptCall->getType()->isVoidTy())
3149 Builder.CreateRetVoid();
3150 else
3151 Builder.CreateRet(V: NewDeoptCall);
3152 // Since the ret type is changed, remove the incompatible attributes.
3153 NewDeoptCall->removeRetAttrs(AttrsToRemove: AttributeFuncs::typeIncompatible(
3154 Ty: NewDeoptCall->getType(), AS: NewDeoptCall->getRetAttributes()));
3155 }
3156
3157 // Leave behind the normal returns so we can merge control flow.
3158 std::swap(LHS&: Returns, RHS&: NormalReturns);
3159 }
3160 }
3161
3162 // Handle any inlined musttail call sites. In order for a new call site to be
3163 // musttail, the source of the clone and the inlined call site must have been
3164 // musttail. Therefore it's safe to return without merging control into the
3165 // phi below.
3166 if (InlinedMustTailCalls) {
3167 // Check if we need to bitcast the result of any musttail calls.
3168 Type *NewRetTy = Caller->getReturnType();
3169 bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy;
3170
3171 // Handle the returns preceded by musttail calls separately.
3172 SmallVector<ReturnInst *, 8> NormalReturns;
3173 for (ReturnInst *RI : Returns) {
3174 CallInst *ReturnedMustTail =
3175 RI->getParent()->getTerminatingMustTailCall();
3176 if (!ReturnedMustTail) {
3177 NormalReturns.push_back(Elt: RI);
3178 continue;
3179 }
3180 if (!NeedBitCast)
3181 continue;
3182
3183 // Delete the old return and any preceding bitcast.
3184 BasicBlock *CurBB = RI->getParent();
3185 auto *OldCast = dyn_cast_or_null<BitCastInst>(Val: RI->getReturnValue());
3186 RI->eraseFromParent();
3187 if (OldCast)
3188 OldCast->eraseFromParent();
3189
3190 // Insert a new bitcast and return with the right type.
3191 IRBuilder<> Builder(CurBB);
3192 Builder.CreateRet(V: Builder.CreateBitCast(V: ReturnedMustTail, DestTy: NewRetTy));
3193 }
3194
3195 // Leave behind the normal returns so we can merge control flow.
3196 std::swap(LHS&: Returns, RHS&: NormalReturns);
3197 }
3198
3199 // Now that all of the transforms on the inlined code have taken place but
3200 // before we splice the inlined code into the CFG and lose track of which
3201 // blocks were actually inlined, collect the call sites. We only do this if
3202 // call graph updates weren't requested, as those provide value handle based
3203 // tracking of inlined call sites instead. Calls to intrinsics are not
3204 // collected because they are not inlineable.
3205 if (InlinedFunctionInfo.ContainsCalls) {
3206 // Otherwise just collect the raw call sites that were inlined.
3207 for (BasicBlock &NewBB :
3208 make_range(x: FirstNewBlock->getIterator(), y: Caller->end()))
3209 for (Instruction &I : NewBB)
3210 if (auto *CB = dyn_cast<CallBase>(Val: &I))
3211 if (!(CB->getCalledFunction() &&
3212 CB->getCalledFunction()->isIntrinsic()))
3213 IFI.InlinedCallSites.push_back(Elt: CB);
3214 }
3215
3216 // If we cloned in _exactly one_ basic block, and if that block ends in a
3217 // return instruction, we splice the body of the inlined callee directly into
3218 // the calling basic block.
3219 if (Returns.size() == 1 && std::distance(first: FirstNewBlock, last: Caller->end()) == 1) {
3220 // Move all of the instructions right before the call.
3221 OrigBB->splice(ToIt: CB.getIterator(), FromBB: &*FirstNewBlock, FromBeginIt: FirstNewBlock->begin(),
3222 FromEndIt: FirstNewBlock->end());
3223 // Remove the cloned basic block.
3224 Caller->back().eraseFromParent();
3225
3226 // If the call site was an invoke instruction, add a branch to the normal
3227 // destination.
3228 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
3229 BranchInst *NewBr = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator());
3230 NewBr->setDebugLoc(Returns[0]->getDebugLoc());
3231 }
3232
3233 // If the return instruction returned a value, replace uses of the call with
3234 // uses of the returned value.
3235 if (!CB.use_empty()) {
3236 ReturnInst *R = Returns[0];
3237 if (&CB == R->getReturnValue())
3238 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3239 else
3240 CB.replaceAllUsesWith(V: R->getReturnValue());
3241 }
3242 // Since we are now done with the Call/Invoke, we can delete it.
3243 CB.eraseFromParent();
3244
3245 // Since we are now done with the return instruction, delete it also.
3246 Returns[0]->eraseFromParent();
3247
3248 if (MergeAttributes)
3249 AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc);
3250
3251 // We are now done with the inlining.
3252 return;
3253 }
3254
3255 // Otherwise, we have the normal case, of more than one block to inline or
3256 // multiple return sites.
3257
3258 // We want to clone the entire callee function into the hole between the
3259 // "starter" and "ender" blocks. How we accomplish this depends on whether
3260 // this is an invoke instruction or a call instruction.
3261 BasicBlock *AfterCallBB;
3262 BranchInst *CreatedBranchToNormalDest = nullptr;
3263 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
3264
3265 // Add an unconditional branch to make this look like the CallInst case...
3266 CreatedBranchToNormalDest = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator());
3267 // We intend to replace this DebugLoc with another later.
3268 CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getTemporary());
3269
3270 // Split the basic block. This guarantees that no PHI nodes will have to be
3271 // updated due to new incoming edges, and make the invoke case more
3272 // symmetric to the call case.
3273 AfterCallBB =
3274 OrigBB->splitBasicBlock(I: CreatedBranchToNormalDest->getIterator(),
3275 BBName: CalledFunc->getName() + ".exit");
3276
3277 } else { // It's a call
3278 // If this is a call instruction, we need to split the basic block that
3279 // the call lives in.
3280 //
3281 AfterCallBB = OrigBB->splitBasicBlock(I: CB.getIterator(),
3282 BBName: CalledFunc->getName() + ".exit");
3283 }
3284
3285 if (IFI.CallerBFI) {
3286 // Copy original BB's block frequency to AfterCallBB
3287 IFI.CallerBFI->setBlockFreq(BB: AfterCallBB,
3288 Freq: IFI.CallerBFI->getBlockFreq(BB: OrigBB));
3289 }
3290
3291 // Change the branch that used to go to AfterCallBB to branch to the first
3292 // basic block of the inlined function.
3293 //
3294 Instruction *Br = OrigBB->getTerminator();
3295 assert(Br && Br->getOpcode() == Instruction::Br &&
3296 "splitBasicBlock broken!");
3297 Br->setOperand(i: 0, Val: &*FirstNewBlock);
3298
3299 // Now that the function is correct, make it a little bit nicer. In
3300 // particular, move the basic blocks inserted from the end of the function
3301 // into the space made by splitting the source basic block.
3302 Caller->splice(ToIt: AfterCallBB->getIterator(), FromF: Caller, FromBeginIt: FirstNewBlock,
3303 FromEndIt: Caller->end());
3304
3305 // Handle all of the return instructions that we just cloned in, and eliminate
3306 // any users of the original call/invoke instruction.
3307 Type *RTy = CalledFunc->getReturnType();
3308
3309 PHINode *PHI = nullptr;
3310 if (Returns.size() > 1) {
3311 // The PHI node should go at the front of the new basic block to merge all
3312 // possible incoming values.
3313 if (!CB.use_empty()) {
3314 PHI = PHINode::Create(Ty: RTy, NumReservedValues: Returns.size(), NameStr: CB.getName());
3315 PHI->insertBefore(InsertPos: AfterCallBB->begin());
3316 // Anything that used the result of the function call should now use the
3317 // PHI node as their operand.
3318 CB.replaceAllUsesWith(V: PHI);
3319 }
3320
3321 // Loop over all of the return instructions adding entries to the PHI node
3322 // as appropriate.
3323 if (PHI) {
3324 for (ReturnInst *RI : Returns) {
3325 assert(RI->getReturnValue()->getType() == PHI->getType() &&
3326 "Ret value not consistent in function!");
3327 PHI->addIncoming(V: RI->getReturnValue(), BB: RI->getParent());
3328 }
3329 }
3330
3331 // Add a branch to the merge points and remove return instructions.
3332 DebugLoc Loc;
3333 for (ReturnInst *RI : Returns) {
3334 BranchInst *BI = BranchInst::Create(IfTrue: AfterCallBB, InsertBefore: RI->getIterator());
3335 Loc = RI->getDebugLoc();
3336 BI->setDebugLoc(Loc);
3337 RI->eraseFromParent();
3338 }
3339 // We need to set the debug location to *somewhere* inside the
3340 // inlined function. The line number may be nonsensical, but the
3341 // instruction will at least be associated with the right
3342 // function.
3343 if (CreatedBranchToNormalDest)
3344 CreatedBranchToNormalDest->setDebugLoc(Loc);
3345 } else if (!Returns.empty()) {
3346 // Otherwise, if there is exactly one return value, just replace anything
3347 // using the return value of the call with the computed value.
3348 if (!CB.use_empty()) {
3349 if (&CB == Returns[0]->getReturnValue())
3350 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3351 else
3352 CB.replaceAllUsesWith(V: Returns[0]->getReturnValue());
3353 }
3354
3355 // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
3356 BasicBlock *ReturnBB = Returns[0]->getParent();
3357 ReturnBB->replaceAllUsesWith(V: AfterCallBB);
3358
3359 // Splice the code from the return block into the block that it will return
3360 // to, which contains the code that was after the call.
3361 AfterCallBB->splice(ToIt: AfterCallBB->begin(), FromBB: ReturnBB);
3362
3363 if (CreatedBranchToNormalDest)
3364 CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
3365
3366 // Delete the return instruction now and empty ReturnBB now.
3367 Returns[0]->eraseFromParent();
3368 ReturnBB->eraseFromParent();
3369 } else if (!CB.use_empty()) {
3370 // In this case there are no returns to use, so there is no clear source
3371 // location for the "return".
3372 // FIXME: It may be correct to use the scope end line of the function here,
3373 // since this likely means we are falling out of the function.
3374 if (CreatedBranchToNormalDest)
3375 CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getUnknown());
3376 // No returns, but something is using the return value of the call. Just
3377 // nuke the result.
3378 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3379 }
3380
3381 // Since we are now done with the Call/Invoke, we can delete it.
3382 CB.eraseFromParent();
3383
3384 // If we inlined any musttail calls and the original return is now
3385 // unreachable, delete it. It can only contain a bitcast and ret.
3386 if (InlinedMustTailCalls && pred_empty(BB: AfterCallBB))
3387 AfterCallBB->eraseFromParent();
3388
3389 // We should always be able to fold the entry block of the function into the
3390 // single predecessor of the block...
3391 assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
3392 BasicBlock *CalleeEntry = cast<BranchInst>(Val: Br)->getSuccessor(i: 0);
3393
3394 // Splice the code entry block into calling block, right before the
3395 // unconditional branch.
3396 CalleeEntry->replaceAllUsesWith(V: OrigBB); // Update PHI nodes
3397 OrigBB->splice(ToIt: Br->getIterator(), FromBB: CalleeEntry);
3398
3399 // Remove the unconditional branch.
3400 Br->eraseFromParent();
3401
3402 // Now we can remove the CalleeEntry block, which is now empty.
3403 CalleeEntry->eraseFromParent();
3404
3405 // If we inserted a phi node, check to see if it has a single value (e.g. all
3406 // the entries are the same or undef). If so, remove the PHI so it doesn't
3407 // block other optimizations.
3408 if (PHI) {
3409 AssumptionCache *AC =
3410 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
3411 auto &DL = Caller->getDataLayout();
3412 if (Value *V = simplifyInstruction(I: PHI, Q: {DL, nullptr, nullptr, AC})) {
3413 PHI->replaceAllUsesWith(V);
3414 PHI->eraseFromParent();
3415 }
3416 }
3417
3418 if (MergeAttributes)
3419 AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc);
3420}
3421
3422llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
3423 bool MergeAttributes,
3424 AAResults *CalleeAAR,
3425 bool InsertLifetime,
3426 Function *ForwardVarArgsTo,
3427 OptimizationRemarkEmitter *ORE) {
3428 llvm::InlineResult Result = CanInlineCallSite(CB, IFI);
3429 if (Result.isSuccess()) {
3430 InlineFunctionImpl(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
3431 ForwardVarArgsTo, ORE);
3432 }
3433
3434 return Result;
3435}
3436