1//===- InlineFunction.cpp - Code to perform function inlining -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements inlining of a function into a call site, resolving
10// parameters and the return value as appropriate.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/SetVector.h"
17#include "llvm/ADT/SmallPtrSet.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/ADT/iterator_range.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Analysis/AssumptionCache.h"
23#include "llvm/Analysis/BlockFrequencyInfo.h"
24#include "llvm/Analysis/CallGraph.h"
25#include "llvm/Analysis/CaptureTracking.h"
26#include "llvm/Analysis/CtxProfAnalysis.h"
27#include "llvm/Analysis/IndirectCallVisitor.h"
28#include "llvm/Analysis/InstructionSimplify.h"
29#include "llvm/Analysis/MemoryProfileInfo.h"
30#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
31#include "llvm/Analysis/ObjCARCUtil.h"
32#include "llvm/Analysis/ProfileSummaryInfo.h"
33#include "llvm/Analysis/ValueTracking.h"
34#include "llvm/Analysis/VectorUtils.h"
35#include "llvm/IR/Argument.h"
36#include "llvm/IR/AttributeMask.h"
37#include "llvm/IR/Attributes.h"
38#include "llvm/IR/BasicBlock.h"
39#include "llvm/IR/CFG.h"
40#include "llvm/IR/Constant.h"
41#include "llvm/IR/ConstantRange.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/DataLayout.h"
44#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/DebugInfoMetadata.h"
46#include "llvm/IR/DebugLoc.h"
47#include "llvm/IR/DerivedTypes.h"
48#include "llvm/IR/Dominators.h"
49#include "llvm/IR/EHPersonalities.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalVariable.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InlineAsm.h"
54#include "llvm/IR/InstrTypes.h"
55#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Instructions.h"
57#include "llvm/IR/IntrinsicInst.h"
58#include "llvm/IR/Intrinsics.h"
59#include "llvm/IR/LLVMContext.h"
60#include "llvm/IR/MDBuilder.h"
61#include "llvm/IR/Metadata.h"
62#include "llvm/IR/Module.h"
63#include "llvm/IR/PatternMatch.h"
64#include "llvm/IR/ProfDataUtils.h"
65#include "llvm/IR/Type.h"
66#include "llvm/IR/User.h"
67#include "llvm/IR/Value.h"
68#include "llvm/Support/Casting.h"
69#include "llvm/Support/CommandLine.h"
70#include "llvm/Support/ErrorHandling.h"
71#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
72#include "llvm/Transforms/Utils/Cloning.h"
73#include "llvm/Transforms/Utils/Local.h"
74#include "llvm/Transforms/Utils/ValueMapper.h"
75#include <algorithm>
76#include <cassert>
77#include <cstdint>
78#include <deque>
79#include <iterator>
80#include <limits>
81#include <optional>
82#include <string>
83#include <utility>
84#include <vector>
85
86#define DEBUG_TYPE "inline-function"
87
88using namespace llvm;
89using namespace llvm::memprof;
90using ProfileCount = Function::ProfileCount;
91
92static cl::opt<bool>
93EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(Val: true),
94 cl::Hidden,
95 cl::desc("Convert noalias attributes to metadata during inlining."));
96
97static cl::opt<bool>
98 UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
99 cl::init(Val: true),
100 cl::desc("Use the llvm.experimental.noalias.scope.decl "
101 "intrinsic during inlining."));
102
103// Disabled by default, because the added alignment assumptions may increase
104// compile-time and block optimizations. This option is not suitable for use
105// with frontends that emit comprehensive parameter alignment annotations.
106static cl::opt<bool>
107PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
108 cl::init(Val: false), cl::Hidden,
109 cl::desc("Convert align attributes to assumptions during inlining."));
110
111static cl::opt<unsigned> InlinerAttributeWindow(
112 "max-inst-checked-for-throw-during-inlining", cl::Hidden,
113 cl::desc("the maximum number of instructions analyzed for may throw during "
114 "attribute inference in inlined body"),
115 cl::init(Val: 4));
116
117namespace {
118
119 /// A class for recording information about inlining a landing pad.
120 class LandingPadInliningInfo {
121 /// Destination of the invoke's unwind.
122 BasicBlock *OuterResumeDest;
123
124 /// Destination for the callee's resume.
125 BasicBlock *InnerResumeDest = nullptr;
126
127 /// LandingPadInst associated with the invoke.
128 LandingPadInst *CallerLPad = nullptr;
129
130 /// PHI for EH values from landingpad insts.
131 PHINode *InnerEHValuesPHI = nullptr;
132
133 SmallVector<Value*, 8> UnwindDestPHIValues;
134
135 public:
136 LandingPadInliningInfo(InvokeInst *II)
137 : OuterResumeDest(II->getUnwindDest()) {
138 // If there are PHI nodes in the unwind destination block, we need to keep
139 // track of which values came into them from the invoke before removing
140 // the edge from this block.
141 BasicBlock *InvokeBB = II->getParent();
142 BasicBlock::iterator I = OuterResumeDest->begin();
143 for (; isa<PHINode>(Val: I); ++I) {
144 // Save the value to use for this edge.
145 PHINode *PHI = cast<PHINode>(Val&: I);
146 UnwindDestPHIValues.push_back(Elt: PHI->getIncomingValueForBlock(BB: InvokeBB));
147 }
148
149 CallerLPad = cast<LandingPadInst>(Val&: I);
150 }
151
152 /// The outer unwind destination is the target of
153 /// unwind edges introduced for calls within the inlined function.
154 BasicBlock *getOuterResumeDest() const {
155 return OuterResumeDest;
156 }
157
158 BasicBlock *getInnerResumeDest();
159
160 LandingPadInst *getLandingPadInst() const { return CallerLPad; }
161
162 /// Forward the 'resume' instruction to the caller's landing pad block.
163 /// When the landing pad block has only one predecessor, this is
164 /// a simple branch. When there is more than one predecessor, we need to
165 /// split the landing pad block after the landingpad instruction and jump
166 /// to there.
167 void forwardResume(ResumeInst *RI,
168 SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
169
170 /// Add incoming-PHI values to the unwind destination block for the given
171 /// basic block, using the values for the original invoke's source block.
172 void addIncomingPHIValuesFor(BasicBlock *BB) const {
173 addIncomingPHIValuesForInto(src: BB, dest: OuterResumeDest);
174 }
175
176 void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
177 BasicBlock::iterator I = dest->begin();
178 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
179 PHINode *phi = cast<PHINode>(Val&: I);
180 phi->addIncoming(V: UnwindDestPHIValues[i], BB: src);
181 }
182 }
183 };
184} // end anonymous namespace
185
186static IntrinsicInst *getConvergenceEntry(BasicBlock &BB) {
187 BasicBlock::iterator It = BB.getFirstNonPHIIt();
188 while (It != BB.end()) {
189 if (auto *IntrinsicCall = dyn_cast<ConvergenceControlInst>(Val&: It)) {
190 if (IntrinsicCall->isEntry()) {
191 return IntrinsicCall;
192 }
193 }
194 It = std::next(x: It);
195 }
196 return nullptr;
197}
198
199/// Get or create a target for the branch from ResumeInsts.
200BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
201 if (InnerResumeDest) return InnerResumeDest;
202
203 // Split the landing pad.
204 BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
205 InnerResumeDest =
206 OuterResumeDest->splitBasicBlock(I: SplitPoint,
207 BBName: OuterResumeDest->getName() + ".body");
208
209 // The number of incoming edges we expect to the inner landing pad.
210 const unsigned PHICapacity = 2;
211
212 // Create corresponding new PHIs for all the PHIs in the outer landing pad.
213 BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
214 BasicBlock::iterator I = OuterResumeDest->begin();
215 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
216 PHINode *OuterPHI = cast<PHINode>(Val&: I);
217 PHINode *InnerPHI = PHINode::Create(Ty: OuterPHI->getType(), NumReservedValues: PHICapacity,
218 NameStr: OuterPHI->getName() + ".lpad-body");
219 InnerPHI->insertBefore(InsertPos: InsertPoint);
220 OuterPHI->replaceAllUsesWith(V: InnerPHI);
221 InnerPHI->addIncoming(V: OuterPHI, BB: OuterResumeDest);
222 }
223
224 // Create a PHI for the exception values.
225 InnerEHValuesPHI =
226 PHINode::Create(Ty: CallerLPad->getType(), NumReservedValues: PHICapacity, NameStr: "eh.lpad-body");
227 InnerEHValuesPHI->insertBefore(InsertPos: InsertPoint);
228 CallerLPad->replaceAllUsesWith(V: InnerEHValuesPHI);
229 InnerEHValuesPHI->addIncoming(V: CallerLPad, BB: OuterResumeDest);
230
231 // All done.
232 return InnerResumeDest;
233}
234
235/// Forward the 'resume' instruction to the caller's landing pad block.
236/// When the landing pad block has only one predecessor, this is a simple
237/// branch. When there is more than one predecessor, we need to split the
238/// landing pad block after the landingpad instruction and jump to there.
239void LandingPadInliningInfo::forwardResume(
240 ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
241 BasicBlock *Dest = getInnerResumeDest();
242 BasicBlock *Src = RI->getParent();
243
244 auto *BI = BranchInst::Create(IfTrue: Dest, InsertBefore: Src);
245 BI->setDebugLoc(RI->getDebugLoc());
246
247 // Update the PHIs in the destination. They were inserted in an order which
248 // makes this work.
249 addIncomingPHIValuesForInto(src: Src, dest: Dest);
250
251 InnerEHValuesPHI->addIncoming(V: RI->getOperand(i_nocapture: 0), BB: Src);
252 RI->eraseFromParent();
253}
254
255/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
256static Value *getParentPad(Value *EHPad) {
257 if (auto *FPI = dyn_cast<FuncletPadInst>(Val: EHPad))
258 return FPI->getParentPad();
259 return cast<CatchSwitchInst>(Val: EHPad)->getParentPad();
260}
261
262using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;
263
264/// Helper for getUnwindDestToken that does the descendant-ward part of
265/// the search.
266static Value *getUnwindDestTokenHelper(Instruction *EHPad,
267 UnwindDestMemoTy &MemoMap) {
268 SmallVector<Instruction *, 8> Worklist(1, EHPad);
269
270 while (!Worklist.empty()) {
271 Instruction *CurrentPad = Worklist.pop_back_val();
272 // We only put pads on the worklist that aren't in the MemoMap. When
273 // we find an unwind dest for a pad we may update its ancestors, but
274 // the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
275 // so they should never get updated while queued on the worklist.
276 assert(!MemoMap.count(CurrentPad));
277 Value *UnwindDestToken = nullptr;
278 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: CurrentPad)) {
279 if (CatchSwitch->hasUnwindDest()) {
280 UnwindDestToken = &*CatchSwitch->getUnwindDest()->getFirstNonPHIIt();
281 } else {
282 // Catchswitch doesn't have a 'nounwind' variant, and one might be
283 // annotated as "unwinds to caller" when really it's nounwind (see
284 // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
285 // parent's unwind dest from this. We can check its catchpads'
286 // descendants, since they might include a cleanuppad with an
287 // "unwinds to caller" cleanupret, which can be trusted.
288 for (auto HI = CatchSwitch->handler_begin(),
289 HE = CatchSwitch->handler_end();
290 HI != HE && !UnwindDestToken; ++HI) {
291 BasicBlock *HandlerBlock = *HI;
292 auto *CatchPad =
293 cast<CatchPadInst>(Val: &*HandlerBlock->getFirstNonPHIIt());
294 for (User *Child : CatchPad->users()) {
295 // Intentionally ignore invokes here -- since the catchswitch is
296 // marked "unwind to caller", it would be a verifier error if it
297 // contained an invoke which unwinds out of it, so any invoke we'd
298 // encounter must unwind to some child of the catch.
299 if (!isa<CleanupPadInst>(Val: Child) && !isa<CatchSwitchInst>(Val: Child))
300 continue;
301
302 Instruction *ChildPad = cast<Instruction>(Val: Child);
303 auto Memo = MemoMap.find(Val: ChildPad);
304 if (Memo == MemoMap.end()) {
305 // Haven't figured out this child pad yet; queue it.
306 Worklist.push_back(Elt: ChildPad);
307 continue;
308 }
309 // We've already checked this child, but might have found that
310 // it offers no proof either way.
311 Value *ChildUnwindDestToken = Memo->second;
312 if (!ChildUnwindDestToken)
313 continue;
314 // We already know the child's unwind dest, which can either
315 // be ConstantTokenNone to indicate unwind to caller, or can
316 // be another child of the catchpad. Only the former indicates
317 // the unwind dest of the catchswitch.
318 if (isa<ConstantTokenNone>(Val: ChildUnwindDestToken)) {
319 UnwindDestToken = ChildUnwindDestToken;
320 break;
321 }
322 assert(getParentPad(ChildUnwindDestToken) == CatchPad);
323 }
324 }
325 }
326 } else {
327 auto *CleanupPad = cast<CleanupPadInst>(Val: CurrentPad);
328 for (User *U : CleanupPad->users()) {
329 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: U)) {
330 if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
331 UnwindDestToken = &*RetUnwindDest->getFirstNonPHIIt();
332 else
333 UnwindDestToken = ConstantTokenNone::get(Context&: CleanupPad->getContext());
334 break;
335 }
336 Value *ChildUnwindDestToken;
337 if (auto *Invoke = dyn_cast<InvokeInst>(Val: U)) {
338 ChildUnwindDestToken = &*Invoke->getUnwindDest()->getFirstNonPHIIt();
339 } else if (isa<CleanupPadInst>(Val: U) || isa<CatchSwitchInst>(Val: U)) {
340 Instruction *ChildPad = cast<Instruction>(Val: U);
341 auto Memo = MemoMap.find(Val: ChildPad);
342 if (Memo == MemoMap.end()) {
343 // Haven't resolved this child yet; queue it and keep searching.
344 Worklist.push_back(Elt: ChildPad);
345 continue;
346 }
347 // We've checked this child, but still need to ignore it if it
348 // had no proof either way.
349 ChildUnwindDestToken = Memo->second;
350 if (!ChildUnwindDestToken)
351 continue;
352 } else {
353 // Not a relevant user of the cleanuppad
354 continue;
355 }
356 // In a well-formed program, the child/invoke must either unwind to
357 // an(other) child of the cleanup, or exit the cleanup. In the
358 // first case, continue searching.
359 if (isa<Instruction>(Val: ChildUnwindDestToken) &&
360 getParentPad(EHPad: ChildUnwindDestToken) == CleanupPad)
361 continue;
362 UnwindDestToken = ChildUnwindDestToken;
363 break;
364 }
365 }
366 // If we haven't found an unwind dest for CurrentPad, we may have queued its
367 // children, so move on to the next in the worklist.
368 if (!UnwindDestToken)
369 continue;
370
371 // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
372 // any ancestors of CurrentPad up to but not including UnwindDestToken's
373 // parent pad. Record this in the memo map, and check to see if the
374 // original EHPad being queried is one of the ones exited.
375 Value *UnwindParent;
376 if (auto *UnwindPad = dyn_cast<Instruction>(Val: UnwindDestToken))
377 UnwindParent = getParentPad(EHPad: UnwindPad);
378 else
379 UnwindParent = nullptr;
380 bool ExitedOriginalPad = false;
381 for (Instruction *ExitedPad = CurrentPad;
382 ExitedPad && ExitedPad != UnwindParent;
383 ExitedPad = dyn_cast<Instruction>(Val: getParentPad(EHPad: ExitedPad))) {
384 // Skip over catchpads since they just follow their catchswitches.
385 if (isa<CatchPadInst>(Val: ExitedPad))
386 continue;
387 MemoMap[ExitedPad] = UnwindDestToken;
388 ExitedOriginalPad |= (ExitedPad == EHPad);
389 }
390
391 if (ExitedOriginalPad)
392 return UnwindDestToken;
393
394 // Continue the search.
395 }
396
397 // No definitive information is contained within this funclet.
398 return nullptr;
399}
400
401/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
402/// return that pad instruction. If it unwinds to caller, return
403/// ConstantTokenNone. If it does not have a definitive unwind destination,
404/// return nullptr.
405///
406/// This routine gets invoked for calls in funclets in inlinees when inlining
407/// an invoke. Since many funclets don't have calls inside them, it's queried
408/// on-demand rather than building a map of pads to unwind dests up front.
409/// Determining a funclet's unwind dest may require recursively searching its
410/// descendants, and also ancestors and cousins if the descendants don't provide
411/// an answer. Since most funclets will have their unwind dest immediately
412/// available as the unwind dest of a catchswitch or cleanupret, this routine
413/// searches top-down from the given pad and then up. To avoid worst-case
414/// quadratic run-time given that approach, it uses a memo map to avoid
415/// re-processing funclet trees. The callers that rewrite the IR as they go
416/// take advantage of this, for correctness, by checking/forcing rewritten
417/// pads' entries to match the original callee view.
418static Value *getUnwindDestToken(Instruction *EHPad,
419 UnwindDestMemoTy &MemoMap) {
420 // Catchpads unwind to the same place as their catchswitch;
421 // redirct any queries on catchpads so the code below can
422 // deal with just catchswitches and cleanuppads.
423 if (auto *CPI = dyn_cast<CatchPadInst>(Val: EHPad))
424 EHPad = CPI->getCatchSwitch();
425
426 // Check if we've already determined the unwind dest for this pad.
427 auto Memo = MemoMap.find(Val: EHPad);
428 if (Memo != MemoMap.end())
429 return Memo->second;
430
431 // Search EHPad and, if necessary, its descendants.
432 Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
433 assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
434 if (UnwindDestToken)
435 return UnwindDestToken;
436
437 // No information is available for this EHPad from itself or any of its
438 // descendants. An unwind all the way out to a pad in the caller would
439 // need also to agree with the unwind dest of the parent funclet, so
440 // search up the chain to try to find a funclet with information. Put
441 // null entries in the memo map to avoid re-processing as we go up.
442 MemoMap[EHPad] = nullptr;
443#ifndef NDEBUG
444 SmallPtrSet<Instruction *, 4> TempMemos;
445 TempMemos.insert(EHPad);
446#endif
447 Instruction *LastUselessPad = EHPad;
448 Value *AncestorToken;
449 for (AncestorToken = getParentPad(EHPad);
450 auto *AncestorPad = dyn_cast<Instruction>(Val: AncestorToken);
451 AncestorToken = getParentPad(EHPad: AncestorToken)) {
452 // Skip over catchpads since they just follow their catchswitches.
453 if (isa<CatchPadInst>(Val: AncestorPad))
454 continue;
455 // If the MemoMap had an entry mapping AncestorPad to nullptr, since we
456 // haven't yet called getUnwindDestTokenHelper for AncestorPad in this
457 // call to getUnwindDestToken, that would mean that AncestorPad had no
458 // information in itself, its descendants, or its ancestors. If that
459 // were the case, then we should also have recorded the lack of information
460 // for the descendant that we're coming from. So assert that we don't
461 // find a null entry in the MemoMap for AncestorPad.
462 assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
463 auto AncestorMemo = MemoMap.find(Val: AncestorPad);
464 if (AncestorMemo == MemoMap.end()) {
465 UnwindDestToken = getUnwindDestTokenHelper(EHPad: AncestorPad, MemoMap);
466 } else {
467 UnwindDestToken = AncestorMemo->second;
468 }
469 if (UnwindDestToken)
470 break;
471 LastUselessPad = AncestorPad;
472 MemoMap[LastUselessPad] = nullptr;
473#ifndef NDEBUG
474 TempMemos.insert(LastUselessPad);
475#endif
476 }
477
478 // We know that getUnwindDestTokenHelper was called on LastUselessPad and
479 // returned nullptr (and likewise for EHPad and any of its ancestors up to
480 // LastUselessPad), so LastUselessPad has no information from below. Since
481 // getUnwindDestTokenHelper must investigate all downward paths through
482 // no-information nodes to prove that a node has no information like this,
483 // and since any time it finds information it records it in the MemoMap for
484 // not just the immediately-containing funclet but also any ancestors also
485 // exited, it must be the case that, walking downward from LastUselessPad,
486 // visiting just those nodes which have not been mapped to an unwind dest
487 // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
488 // they are just used to keep getUnwindDestTokenHelper from repeating work),
489 // any node visited must have been exhaustively searched with no information
490 // for it found.
491 SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
492 while (!Worklist.empty()) {
493 Instruction *UselessPad = Worklist.pop_back_val();
494 auto Memo = MemoMap.find(Val: UselessPad);
495 if (Memo != MemoMap.end() && Memo->second) {
496 // Here the name 'UselessPad' is a bit of a misnomer, because we've found
497 // that it is a funclet that does have information about unwinding to
498 // a particular destination; its parent was a useless pad.
499 // Since its parent has no information, the unwind edge must not escape
500 // the parent, and must target a sibling of this pad. This local unwind
501 // gives us no information about EHPad. Leave it and the subtree rooted
502 // at it alone.
503 assert(getParentPad(Memo->second) == getParentPad(UselessPad));
504 continue;
505 }
506 // We know we don't have information for UselesPad. If it has an entry in
507 // the MemoMap (mapping it to nullptr), it must be one of the TempMemos
508 // added on this invocation of getUnwindDestToken; if a previous invocation
509 // recorded nullptr, it would have had to prove that the ancestors of
510 // UselessPad, which include LastUselessPad, had no information, and that
511 // in turn would have required proving that the descendants of
512 // LastUselesPad, which include EHPad, have no information about
513 // LastUselessPad, which would imply that EHPad was mapped to nullptr in
514 // the MemoMap on that invocation, which isn't the case if we got here.
515 assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
516 // Assert as we enumerate users that 'UselessPad' doesn't have any unwind
517 // information that we'd be contradicting by making a map entry for it
518 // (which is something that getUnwindDestTokenHelper must have proved for
519 // us to get here). Just assert on is direct users here; the checks in
520 // this downward walk at its descendants will verify that they don't have
521 // any unwind edges that exit 'UselessPad' either (i.e. they either have no
522 // unwind edges or unwind to a sibling).
523 MemoMap[UselessPad] = UnwindDestToken;
524 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: UselessPad)) {
525 assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
526 for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
527 auto *CatchPad = &*HandlerBlock->getFirstNonPHIIt();
528 for (User *U : CatchPad->users()) {
529 assert((!isa<InvokeInst>(U) ||
530 (getParentPad(&*cast<InvokeInst>(U)
531 ->getUnwindDest()
532 ->getFirstNonPHIIt()) == CatchPad)) &&
533 "Expected useless pad");
534 if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U))
535 Worklist.push_back(Elt: cast<Instruction>(Val: U));
536 }
537 }
538 } else {
539 assert(isa<CleanupPadInst>(UselessPad));
540 for (User *U : UselessPad->users()) {
541 assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
542 assert(
543 (!isa<InvokeInst>(U) ||
544 (getParentPad(
545 &*cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHIIt()) ==
546 UselessPad)) &&
547 "Expected useless pad");
548 if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U))
549 Worklist.push_back(Elt: cast<Instruction>(Val: U));
550 }
551 }
552 }
553
554 return UnwindDestToken;
555}
556
557/// When we inline a basic block into an invoke,
558/// we have to turn all of the calls that can throw into invokes.
559/// This function analyze BB to see if there are any calls, and if so,
560/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
561/// nodes in that block with the values specified in InvokeDestPHIValues.
562static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
563 BasicBlock *BB, BasicBlock *UnwindEdge,
564 UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
565 for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
566 // We only need to check for function calls: inlined invoke
567 // instructions require no special handling.
568 CallInst *CI = dyn_cast<CallInst>(Val: &I);
569
570 if (!CI || CI->doesNotThrow())
571 continue;
572
573 // We do not need to (and in fact, cannot) convert possibly throwing calls
574 // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
575 // invokes. The caller's "segment" of the deoptimization continuation
576 // attached to the newly inlined @llvm.experimental_deoptimize
577 // (resp. @llvm.experimental.guard) call should contain the exception
578 // handling logic, if any.
579 if (auto *F = CI->getCalledFunction())
580 if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
581 F->getIntrinsicID() == Intrinsic::experimental_guard)
582 continue;
583
584 if (auto FuncletBundle = CI->getOperandBundle(ID: LLVMContext::OB_funclet)) {
585 // This call is nested inside a funclet. If that funclet has an unwind
586 // destination within the inlinee, then unwinding out of this call would
587 // be UB. Rewriting this call to an invoke which targets the inlined
588 // invoke's unwind dest would give the call's parent funclet multiple
589 // unwind destinations, which is something that subsequent EH table
590 // generation can't handle and that the veirifer rejects. So when we
591 // see such a call, leave it as a call.
592 auto *FuncletPad = cast<Instruction>(Val: FuncletBundle->Inputs[0]);
593 Value *UnwindDestToken =
594 getUnwindDestToken(EHPad: FuncletPad, MemoMap&: *FuncletUnwindMap);
595 if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
596 continue;
597#ifndef NDEBUG
598 Instruction *MemoKey;
599 if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
600 MemoKey = CatchPad->getCatchSwitch();
601 else
602 MemoKey = FuncletPad;
603 assert(FuncletUnwindMap->count(MemoKey) &&
604 (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
605 "must get memoized to avoid confusing later searches");
606#endif // NDEBUG
607 }
608
609 changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
610 return BB;
611 }
612 return nullptr;
613}
614
615/// If we inlined an invoke site, we need to convert calls
616/// in the body of the inlined function into invokes.
617///
618/// II is the invoke instruction being inlined. FirstNewBlock is the first
619/// block of the inlined code (the last block is the end of the function),
620/// and InlineCodeInfo is information about the code that got inlined.
621static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
622 ClonedCodeInfo &InlinedCodeInfo) {
623 BasicBlock *InvokeDest = II->getUnwindDest();
624
625 Function *Caller = FirstNewBlock->getParent();
626
627 // The inlined code is currently at the end of the function, scan from the
628 // start of the inlined code to its end, checking for stuff we need to
629 // rewrite.
630 LandingPadInliningInfo Invoke(II);
631
632 // Get all of the inlined landing pad instructions.
633 SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
634 for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
635 I != E; ++I)
636 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: I->getTerminator()))
637 InlinedLPads.insert(Ptr: II->getLandingPadInst());
638
639 // Append the clauses from the outer landing pad instruction into the inlined
640 // landing pad instructions.
641 LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
642 for (LandingPadInst *InlinedLPad : InlinedLPads) {
643 unsigned OuterNum = OuterLPad->getNumClauses();
644 InlinedLPad->reserveClauses(Size: OuterNum);
645 for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
646 InlinedLPad->addClause(ClauseVal: OuterLPad->getClause(Idx: OuterIdx));
647 if (OuterLPad->isCleanup())
648 InlinedLPad->setCleanup(true);
649 }
650
651 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
652 BB != E; ++BB) {
653 if (InlinedCodeInfo.ContainsCalls)
654 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
655 BB: &*BB, UnwindEdge: Invoke.getOuterResumeDest()))
656 // Update any PHI nodes in the exceptional block to indicate that there
657 // is now a new entry in them.
658 Invoke.addIncomingPHIValuesFor(BB: NewBB);
659
660 // Forward any resumes that are remaining here.
661 if (ResumeInst *RI = dyn_cast<ResumeInst>(Val: BB->getTerminator()))
662 Invoke.forwardResume(RI, InlinedLPads);
663 }
664
665 // Now that everything is happy, we have one final detail. The PHI nodes in
666 // the exception destination block still have entries due to the original
667 // invoke instruction. Eliminate these entries (which might even delete the
668 // PHI node) now.
669 InvokeDest->removePredecessor(Pred: II->getParent());
670}
671
672/// If we inlined an invoke site, we need to convert calls
673/// in the body of the inlined function into invokes.
674///
675/// II is the invoke instruction being inlined. FirstNewBlock is the first
676/// block of the inlined code (the last block is the end of the function),
677/// and InlineCodeInfo is information about the code that got inlined.
678static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
679 ClonedCodeInfo &InlinedCodeInfo) {
680 BasicBlock *UnwindDest = II->getUnwindDest();
681 Function *Caller = FirstNewBlock->getParent();
682
683 assert(UnwindDest->getFirstNonPHIIt()->isEHPad() && "unexpected BasicBlock!");
684
685 // If there are PHI nodes in the unwind destination block, we need to keep
686 // track of which values came into them from the invoke before removing the
687 // edge from this block.
688 SmallVector<Value *, 8> UnwindDestPHIValues;
689 BasicBlock *InvokeBB = II->getParent();
690 for (PHINode &PHI : UnwindDest->phis()) {
691 // Save the value to use for this edge.
692 UnwindDestPHIValues.push_back(Elt: PHI.getIncomingValueForBlock(BB: InvokeBB));
693 }
694
695 // Add incoming-PHI values to the unwind destination block for the given basic
696 // block, using the values for the original invoke's source block.
697 auto UpdatePHINodes = [&](BasicBlock *Src) {
698 BasicBlock::iterator I = UnwindDest->begin();
699 for (Value *V : UnwindDestPHIValues) {
700 PHINode *PHI = cast<PHINode>(Val&: I);
701 PHI->addIncoming(V, BB: Src);
702 ++I;
703 }
704 };
705
706 // This connects all the instructions which 'unwind to caller' to the invoke
707 // destination.
708 UnwindDestMemoTy FuncletUnwindMap;
709 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
710 BB != E; ++BB) {
711 if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator())) {
712 if (CRI->unwindsToCaller()) {
713 auto *CleanupPad = CRI->getCleanupPad();
714 CleanupReturnInst::Create(CleanupPad, UnwindBB: UnwindDest, InsertBefore: CRI->getIterator());
715 CRI->eraseFromParent();
716 UpdatePHINodes(&*BB);
717 // Finding a cleanupret with an unwind destination would confuse
718 // subsequent calls to getUnwindDestToken, so map the cleanuppad
719 // to short-circuit any such calls and recognize this as an "unwind
720 // to caller" cleanup.
721 assert(!FuncletUnwindMap.count(CleanupPad) ||
722 isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
723 FuncletUnwindMap[CleanupPad] =
724 ConstantTokenNone::get(Context&: Caller->getContext());
725 }
726 }
727
728 BasicBlock::iterator I = BB->getFirstNonPHIIt();
729 if (!I->isEHPad())
730 continue;
731
732 Instruction *Replacement = nullptr;
733 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) {
734 if (CatchSwitch->unwindsToCaller()) {
735 Value *UnwindDestToken;
736 if (auto *ParentPad =
737 dyn_cast<Instruction>(Val: CatchSwitch->getParentPad())) {
738 // This catchswitch is nested inside another funclet. If that
739 // funclet has an unwind destination within the inlinee, then
740 // unwinding out of this catchswitch would be UB. Rewriting this
741 // catchswitch to unwind to the inlined invoke's unwind dest would
742 // give the parent funclet multiple unwind destinations, which is
743 // something that subsequent EH table generation can't handle and
744 // that the veirifer rejects. So when we see such a call, leave it
745 // as "unwind to caller".
746 UnwindDestToken = getUnwindDestToken(EHPad: ParentPad, MemoMap&: FuncletUnwindMap);
747 if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
748 continue;
749 } else {
750 // This catchswitch has no parent to inherit constraints from, and
751 // none of its descendants can have an unwind edge that exits it and
752 // targets another funclet in the inlinee. It may or may not have a
753 // descendant that definitively has an unwind to caller. In either
754 // case, we'll have to assume that any unwinds out of it may need to
755 // be routed to the caller, so treat it as though it has a definitive
756 // unwind to caller.
757 UnwindDestToken = ConstantTokenNone::get(Context&: Caller->getContext());
758 }
759 auto *NewCatchSwitch = CatchSwitchInst::Create(
760 ParentPad: CatchSwitch->getParentPad(), UnwindDest,
761 NumHandlers: CatchSwitch->getNumHandlers(), NameStr: CatchSwitch->getName(),
762 InsertBefore: CatchSwitch->getIterator());
763 for (BasicBlock *PadBB : CatchSwitch->handlers())
764 NewCatchSwitch->addHandler(Dest: PadBB);
765 // Propagate info for the old catchswitch over to the new one in
766 // the unwind map. This also serves to short-circuit any subsequent
767 // checks for the unwind dest of this catchswitch, which would get
768 // confused if they found the outer handler in the callee.
769 FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
770 Replacement = NewCatchSwitch;
771 }
772 } else if (!isa<FuncletPadInst>(Val: I)) {
773 llvm_unreachable("unexpected EHPad!");
774 }
775
776 if (Replacement) {
777 Replacement->takeName(V: &*I);
778 I->replaceAllUsesWith(V: Replacement);
779 I->eraseFromParent();
780 UpdatePHINodes(&*BB);
781 }
782 }
783
784 if (InlinedCodeInfo.ContainsCalls)
785 for (Function::iterator BB = FirstNewBlock->getIterator(),
786 E = Caller->end();
787 BB != E; ++BB)
788 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
789 BB: &*BB, UnwindEdge: UnwindDest, FuncletUnwindMap: &FuncletUnwindMap))
790 // Update any PHI nodes in the exceptional block to indicate that there
791 // is now a new entry in them.
792 UpdatePHINodes(NewBB);
793
794 // Now that everything is happy, we have one final detail. The PHI nodes in
795 // the exception destination block still have entries due to the original
796 // invoke instruction. Eliminate these entries (which might even delete the
797 // PHI node) now.
798 UnwindDest->removePredecessor(Pred: InvokeBB);
799}
800
801static bool haveCommonPrefix(MDNode *MIBStackContext,
802 MDNode *CallsiteStackContext) {
803 assert(MIBStackContext->getNumOperands() > 0 &&
804 CallsiteStackContext->getNumOperands() > 0);
805 // Because of the context trimming performed during matching, the callsite
806 // context could have more stack ids than the MIB. We match up to the end of
807 // the shortest stack context.
808 for (auto MIBStackIter = MIBStackContext->op_begin(),
809 CallsiteStackIter = CallsiteStackContext->op_begin();
810 MIBStackIter != MIBStackContext->op_end() &&
811 CallsiteStackIter != CallsiteStackContext->op_end();
812 MIBStackIter++, CallsiteStackIter++) {
813 auto *Val1 = mdconst::dyn_extract<ConstantInt>(MD: *MIBStackIter);
814 auto *Val2 = mdconst::dyn_extract<ConstantInt>(MD: *CallsiteStackIter);
815 assert(Val1 && Val2);
816 if (Val1->getZExtValue() != Val2->getZExtValue())
817 return false;
818 }
819 return true;
820}
821
822static void removeMemProfMetadata(CallBase *Call) {
823 Call->setMetadata(KindID: LLVMContext::MD_memprof, Node: nullptr);
824}
825
826static void removeCallsiteMetadata(CallBase *Call) {
827 Call->setMetadata(KindID: LLVMContext::MD_callsite, Node: nullptr);
828}
829
830static void updateMemprofMetadata(CallBase *CI,
831 const std::vector<Metadata *> &MIBList,
832 OptimizationRemarkEmitter *ORE) {
833 assert(!MIBList.empty());
834 // Remove existing memprof, which will either be replaced or may not be needed
835 // if we are able to use a single allocation type function attribute.
836 removeMemProfMetadata(Call: CI);
837 CallStackTrie CallStack(ORE);
838 for (Metadata *MIB : MIBList)
839 CallStack.addCallStack(MIB: cast<MDNode>(Val: MIB));
840 bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI);
841 assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof));
842 if (!MemprofMDAttached)
843 // If we used a function attribute remove the callsite metadata as well.
844 removeCallsiteMetadata(Call: CI);
845}
846
847// Update the metadata on the inlined copy ClonedCall of a call OrigCall in the
848// inlined callee body, based on the callsite metadata InlinedCallsiteMD from
849// the call that was inlined.
850static void propagateMemProfHelper(const CallBase *OrigCall,
851 CallBase *ClonedCall,
852 MDNode *InlinedCallsiteMD,
853 OptimizationRemarkEmitter *ORE) {
854 MDNode *OrigCallsiteMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_callsite);
855 MDNode *ClonedCallsiteMD = nullptr;
856 // Check if the call originally had callsite metadata, and update it for the
857 // new call in the inlined body.
858 if (OrigCallsiteMD) {
859 // The cloned call's context is now the concatenation of the original call's
860 // callsite metadata and the callsite metadata on the call where it was
861 // inlined.
862 ClonedCallsiteMD = MDNode::concatenate(A: OrigCallsiteMD, B: InlinedCallsiteMD);
863 ClonedCall->setMetadata(KindID: LLVMContext::MD_callsite, Node: ClonedCallsiteMD);
864 }
865
866 // Update any memprof metadata on the cloned call.
867 MDNode *OrigMemProfMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_memprof);
868 if (!OrigMemProfMD)
869 return;
870 // We currently expect that allocations with memprof metadata also have
871 // callsite metadata for the allocation's part of the context.
872 assert(OrigCallsiteMD);
873
874 // New call's MIB list.
875 std::vector<Metadata *> NewMIBList;
876
877 // For each MIB metadata, check if its call stack context starts with the
878 // new clone's callsite metadata. If so, that MIB goes onto the cloned call in
879 // the inlined body. If not, it stays on the out-of-line original call.
880 for (auto &MIBOp : OrigMemProfMD->operands()) {
881 MDNode *MIB = dyn_cast<MDNode>(Val: MIBOp);
882 // Stack is first operand of MIB.
883 MDNode *StackMD = getMIBStackNode(MIB);
884 assert(StackMD);
885 // See if the new cloned callsite context matches this profiled context.
886 if (haveCommonPrefix(MIBStackContext: StackMD, CallsiteStackContext: ClonedCallsiteMD))
887 // Add it to the cloned call's MIB list.
888 NewMIBList.push_back(x: MIB);
889 }
890 if (NewMIBList.empty()) {
891 removeMemProfMetadata(Call: ClonedCall);
892 removeCallsiteMetadata(Call: ClonedCall);
893 return;
894 }
895 if (NewMIBList.size() < OrigMemProfMD->getNumOperands())
896 updateMemprofMetadata(CI: ClonedCall, MIBList: NewMIBList, ORE);
897}
898
899// Update memprof related metadata (!memprof and !callsite) based on the
900// inlining of Callee into the callsite at CB. The updates include merging the
901// inlined callee's callsite metadata with that of the inlined call,
902// and moving the subset of any memprof contexts to the inlined callee
903// allocations if they match the new inlined call stack.
904static void
905propagateMemProfMetadata(Function *Callee, CallBase &CB,
906 bool ContainsMemProfMetadata,
907 const ValueMap<const Value *, WeakTrackingVH> &VMap,
908 OptimizationRemarkEmitter *ORE) {
909 MDNode *CallsiteMD = CB.getMetadata(KindID: LLVMContext::MD_callsite);
910 // Only need to update if the inlined callsite had callsite metadata, or if
911 // there was any memprof metadata inlined.
912 if (!CallsiteMD && !ContainsMemProfMetadata)
913 return;
914
915 // Propagate metadata onto the cloned calls in the inlined callee.
916 for (const auto &Entry : VMap) {
917 // See if this is a call that has been inlined and remapped, and not
918 // simplified away in the process.
919 auto *OrigCall = dyn_cast_or_null<CallBase>(Val: Entry.first);
920 auto *ClonedCall = dyn_cast_or_null<CallBase>(Val: Entry.second);
921 if (!OrigCall || !ClonedCall)
922 continue;
923 // If the inlined callsite did not have any callsite metadata, then it isn't
924 // involved in any profiled call contexts, and we can remove any memprof
925 // metadata on the cloned call.
926 if (!CallsiteMD) {
927 removeMemProfMetadata(Call: ClonedCall);
928 removeCallsiteMetadata(Call: ClonedCall);
929 continue;
930 }
931 propagateMemProfHelper(OrigCall, ClonedCall, InlinedCallsiteMD: CallsiteMD, ORE);
932 }
933}
934
935/// When inlining a call site that has !llvm.mem.parallel_loop_access,
936/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
937/// be propagated to all memory-accessing cloned instructions.
938static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
939 Function::iterator FEnd) {
940 MDNode *MemParallelLoopAccess =
941 CB.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
942 MDNode *AccessGroup = CB.getMetadata(KindID: LLVMContext::MD_access_group);
943 MDNode *AliasScope = CB.getMetadata(KindID: LLVMContext::MD_alias_scope);
944 MDNode *NoAlias = CB.getMetadata(KindID: LLVMContext::MD_noalias);
945 if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
946 return;
947
948 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
949 for (Instruction &I : BB) {
950 // This metadata is only relevant for instructions that access memory.
951 if (!I.mayReadOrWriteMemory())
952 continue;
953
954 if (MemParallelLoopAccess) {
955 // TODO: This probably should not overwrite MemParalleLoopAccess.
956 MemParallelLoopAccess = MDNode::concatenate(
957 A: I.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access),
958 B: MemParallelLoopAccess);
959 I.setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access,
960 Node: MemParallelLoopAccess);
961 }
962
963 if (AccessGroup)
964 I.setMetadata(KindID: LLVMContext::MD_access_group, Node: uniteAccessGroups(
965 AccGroups1: I.getMetadata(KindID: LLVMContext::MD_access_group), AccGroups2: AccessGroup));
966
967 if (AliasScope)
968 I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MDNode::concatenate(
969 A: I.getMetadata(KindID: LLVMContext::MD_alias_scope), B: AliasScope));
970
971 if (NoAlias)
972 I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::concatenate(
973 A: I.getMetadata(KindID: LLVMContext::MD_noalias), B: NoAlias));
974 }
975 }
976}
977
978/// Bundle operands of the inlined function must be added to inlined call sites.
979static void PropagateOperandBundles(Function::iterator InlinedBB,
980 Instruction *CallSiteEHPad) {
981 for (Instruction &II : llvm::make_early_inc_range(Range&: *InlinedBB)) {
982 CallBase *I = dyn_cast<CallBase>(Val: &II);
983 if (!I)
984 continue;
985 // Skip call sites which already have a "funclet" bundle.
986 if (I->getOperandBundle(ID: LLVMContext::OB_funclet))
987 continue;
988 // Skip call sites which are nounwind intrinsics (as long as they don't
989 // lower into regular function calls in the course of IR transformations).
990 auto *CalledFn =
991 dyn_cast<Function>(Val: I->getCalledOperand()->stripPointerCasts());
992 if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() &&
993 !IntrinsicInst::mayLowerToFunctionCall(IID: CalledFn->getIntrinsicID()))
994 continue;
995
996 SmallVector<OperandBundleDef, 1> OpBundles;
997 I->getOperandBundlesAsDefs(Defs&: OpBundles);
998 OpBundles.emplace_back(Args: "funclet", Args&: CallSiteEHPad);
999
1000 Instruction *NewInst = CallBase::Create(CB: I, Bundles: OpBundles, InsertPt: I->getIterator());
1001 NewInst->takeName(V: I);
1002 I->replaceAllUsesWith(V: NewInst);
1003 I->eraseFromParent();
1004 }
1005}
1006
1007namespace {
1008/// Utility for cloning !noalias and !alias.scope metadata. When a code region
1009/// using scoped alias metadata is inlined, the aliasing relationships may not
1010/// hold between the two version. It is necessary to create a deep clone of the
1011/// metadata, putting the two versions in separate scope domains.
1012class ScopedAliasMetadataDeepCloner {
1013 using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;
1014 SetVector<const MDNode *> MD;
1015 MetadataMap MDMap;
1016 void addRecursiveMetadataUses();
1017
1018public:
1019 ScopedAliasMetadataDeepCloner(const Function *F);
1020
1021 /// Create a new clone of the scoped alias metadata, which will be used by
1022 /// subsequent remap() calls.
1023 void clone();
1024
1025 /// Remap instructions in the given range from the original to the cloned
1026 /// metadata.
1027 void remap(Function::iterator FStart, Function::iterator FEnd);
1028};
1029} // namespace
1030
1031ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
1032 const Function *F) {
1033 for (const BasicBlock &BB : *F) {
1034 for (const Instruction &I : BB) {
1035 if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1036 MD.insert(X: M);
1037 if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1038 MD.insert(X: M);
1039
1040 // We also need to clone the metadata in noalias intrinsics.
1041 if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1042 MD.insert(X: Decl->getScopeList());
1043 }
1044 }
1045 addRecursiveMetadataUses();
1046}
1047
1048void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {
1049 SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
1050 while (!Queue.empty()) {
1051 const MDNode *M = cast<MDNode>(Val: Queue.pop_back_val());
1052 for (const Metadata *Op : M->operands())
1053 if (const MDNode *OpMD = dyn_cast<MDNode>(Val: Op))
1054 if (MD.insert(X: OpMD))
1055 Queue.push_back(Elt: OpMD);
1056 }
1057}
1058
1059void ScopedAliasMetadataDeepCloner::clone() {
1060 assert(MDMap.empty() && "clone() already called ?");
1061
1062 SmallVector<TempMDTuple, 16> DummyNodes;
1063 for (const MDNode *I : MD) {
1064 DummyNodes.push_back(Elt: MDTuple::getTemporary(Context&: I->getContext(), MDs: {}));
1065 MDMap[I].reset(MD: DummyNodes.back().get());
1066 }
1067
1068 // Create new metadata nodes to replace the dummy nodes, replacing old
1069 // metadata references with either a dummy node or an already-created new
1070 // node.
1071 SmallVector<Metadata *, 4> NewOps;
1072 for (const MDNode *I : MD) {
1073 for (const Metadata *Op : I->operands()) {
1074 if (const MDNode *M = dyn_cast<MDNode>(Val: Op))
1075 NewOps.push_back(Elt: MDMap[M]);
1076 else
1077 NewOps.push_back(Elt: const_cast<Metadata *>(Op));
1078 }
1079
1080 MDNode *NewM = MDNode::get(Context&: I->getContext(), MDs: NewOps);
1081 MDTuple *TempM = cast<MDTuple>(Val&: MDMap[I]);
1082 assert(TempM->isTemporary() && "Expected temporary node");
1083
1084 TempM->replaceAllUsesWith(MD: NewM);
1085 NewOps.clear();
1086 }
1087}
1088
1089void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
1090 Function::iterator FEnd) {
1091 if (MDMap.empty())
1092 return; // Nothing to do.
1093
1094 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
1095 for (Instruction &I : BB) {
1096 // TODO: The null checks for the MDMap.lookup() results should no longer
1097 // be necessary.
1098 if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1099 if (MDNode *MNew = MDMap.lookup(Val: M))
1100 I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MNew);
1101
1102 if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1103 if (MDNode *MNew = MDMap.lookup(Val: M))
1104 I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MNew);
1105
1106 if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1107 if (MDNode *MNew = MDMap.lookup(Val: Decl->getScopeList()))
1108 Decl->setScopeList(MNew);
1109 }
1110 }
1111}
1112
1113/// If the inlined function has noalias arguments,
1114/// then add new alias scopes for each noalias argument, tag the mapped noalias
1115/// parameters with noalias metadata specifying the new scope, and tag all
1116/// non-derived loads, stores and memory intrinsics with the new alias scopes.
1117static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
1118 const DataLayout &DL, AAResults *CalleeAAR,
1119 ClonedCodeInfo &InlinedFunctionInfo) {
1120 if (!EnableNoAliasConversion)
1121 return;
1122
1123 const Function *CalledFunc = CB.getCalledFunction();
1124 SmallVector<const Argument *, 4> NoAliasArgs;
1125
1126 for (const Argument &Arg : CalledFunc->args())
1127 if (CB.paramHasAttr(ArgNo: Arg.getArgNo(), Kind: Attribute::NoAlias) && !Arg.use_empty())
1128 NoAliasArgs.push_back(Elt: &Arg);
1129
1130 if (NoAliasArgs.empty())
1131 return;
1132
1133 // To do a good job, if a noalias variable is captured, we need to know if
1134 // the capture point dominates the particular use we're considering.
1135 DominatorTree DT;
1136 DT.recalculate(Func&: const_cast<Function&>(*CalledFunc));
1137
1138 // noalias indicates that pointer values based on the argument do not alias
1139 // pointer values which are not based on it. So we add a new "scope" for each
1140 // noalias function argument. Accesses using pointers based on that argument
1141 // become part of that alias scope, accesses using pointers not based on that
1142 // argument are tagged as noalias with that scope.
1143
1144 DenseMap<const Argument *, MDNode *> NewScopes;
1145 MDBuilder MDB(CalledFunc->getContext());
1146
1147 // Create a new scope domain for this function.
1148 MDNode *NewDomain =
1149 MDB.createAnonymousAliasScopeDomain(Name: CalledFunc->getName());
1150 for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
1151 const Argument *A = NoAliasArgs[i];
1152
1153 std::string Name = std::string(CalledFunc->getName());
1154 if (A->hasName()) {
1155 Name += ": %";
1156 Name += A->getName();
1157 } else {
1158 Name += ": argument ";
1159 Name += utostr(X: i);
1160 }
1161
1162 // Note: We always create a new anonymous root here. This is true regardless
1163 // of the linkage of the callee because the aliasing "scope" is not just a
1164 // property of the callee, but also all control dependencies in the caller.
1165 MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name);
1166 NewScopes.insert(KV: std::make_pair(x&: A, y&: NewScope));
1167
1168 if (UseNoAliasIntrinsic) {
1169 // Introduce a llvm.experimental.noalias.scope.decl for the noalias
1170 // argument.
1171 MDNode *AScopeList = MDNode::get(Context&: CalledFunc->getContext(), MDs: NewScope);
1172 auto *NoAliasDecl =
1173 IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(ScopeTag: AScopeList);
1174 // Ignore the result for now. The result will be used when the
1175 // llvm.noalias intrinsic is introduced.
1176 (void)NoAliasDecl;
1177 }
1178 }
1179
1180 // Iterate over all new instructions in the map; for all memory-access
1181 // instructions, add the alias scope metadata.
1182 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
1183 VMI != VMIE; ++VMI) {
1184 if (const Instruction *I = dyn_cast<Instruction>(Val: VMI->first)) {
1185 if (!VMI->second)
1186 continue;
1187
1188 Instruction *NI = dyn_cast<Instruction>(Val&: VMI->second);
1189 if (!NI || InlinedFunctionInfo.isSimplified(From: I, To: NI))
1190 continue;
1191
1192 bool IsArgMemOnlyCall = false, IsFuncCall = false;
1193 SmallVector<const Value *, 2> PtrArgs;
1194
1195 if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I))
1196 PtrArgs.push_back(Elt: LI->getPointerOperand());
1197 else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1198 PtrArgs.push_back(Elt: SI->getPointerOperand());
1199 else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(Val: I))
1200 PtrArgs.push_back(Elt: VAAI->getPointerOperand());
1201 else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Val: I))
1202 PtrArgs.push_back(Elt: CXI->getPointerOperand());
1203 else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Val: I))
1204 PtrArgs.push_back(Elt: RMWI->getPointerOperand());
1205 else if (const auto *Call = dyn_cast<CallBase>(Val: I)) {
1206 // If we know that the call does not access memory, then we'll still
1207 // know that about the inlined clone of this call site, and we don't
1208 // need to add metadata.
1209 if (Call->doesNotAccessMemory())
1210 continue;
1211
1212 IsFuncCall = true;
1213 if (CalleeAAR) {
1214 MemoryEffects ME = CalleeAAR->getMemoryEffects(Call);
1215
1216 // We'll retain this knowledge without additional metadata.
1217 if (ME.onlyAccessesInaccessibleMem())
1218 continue;
1219
1220 if (ME.onlyAccessesArgPointees())
1221 IsArgMemOnlyCall = true;
1222 }
1223
1224 for (Value *Arg : Call->args()) {
1225 // Only care about pointer arguments. If a noalias argument is
1226 // accessed through a non-pointer argument, it must be captured
1227 // first (e.g. via ptrtoint), and we protect against captures below.
1228 if (!Arg->getType()->isPointerTy())
1229 continue;
1230
1231 PtrArgs.push_back(Elt: Arg);
1232 }
1233 }
1234
1235 // If we found no pointers, then this instruction is not suitable for
1236 // pairing with an instruction to receive aliasing metadata.
1237 // However, if this is a call, this we might just alias with none of the
1238 // noalias arguments.
1239 if (PtrArgs.empty() && !IsFuncCall)
1240 continue;
1241
1242 // It is possible that there is only one underlying object, but you
1243 // need to go through several PHIs to see it, and thus could be
1244 // repeated in the Objects list.
1245 SmallPtrSet<const Value *, 4> ObjSet;
1246 SmallVector<Metadata *, 4> Scopes, NoAliases;
1247
1248 for (const Value *V : PtrArgs) {
1249 SmallVector<const Value *, 4> Objects;
1250 getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
1251
1252 ObjSet.insert_range(R&: Objects);
1253 }
1254
1255 // Figure out if we're derived from anything that is not a noalias
1256 // argument.
1257 bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false,
1258 UsesUnknownObject = false;
1259 for (const Value *V : ObjSet) {
1260 // Is this value a constant that cannot be derived from any pointer
1261 // value (we need to exclude constant expressions, for example, that
1262 // are formed from arithmetic on global symbols).
1263 bool IsNonPtrConst = isa<ConstantInt>(Val: V) || isa<ConstantFP>(Val: V) ||
1264 isa<ConstantPointerNull>(Val: V) ||
1265 isa<ConstantDataVector>(Val: V) || isa<UndefValue>(Val: V);
1266 if (IsNonPtrConst)
1267 continue;
1268
1269 // If this is anything other than a noalias argument, then we cannot
1270 // completely describe the aliasing properties using alias.scope
1271 // metadata (and, thus, won't add any).
1272 if (const Argument *A = dyn_cast<Argument>(Val: V)) {
1273 if (!CB.paramHasAttr(ArgNo: A->getArgNo(), Kind: Attribute::NoAlias))
1274 UsesAliasingPtr = true;
1275 } else {
1276 UsesAliasingPtr = true;
1277 }
1278
1279 if (isEscapeSource(V)) {
1280 // An escape source can only alias with a noalias argument if it has
1281 // been captured beforehand.
1282 RequiresNoCaptureBefore = true;
1283 } else if (!isa<Argument>(Val: V) && !isIdentifiedObject(V)) {
1284 // If this is neither an escape source, nor some identified object
1285 // (which cannot directly alias a noalias argument), nor some other
1286 // argument (which, by definition, also cannot alias a noalias
1287 // argument), conservatively do not make any assumptions.
1288 UsesUnknownObject = true;
1289 }
1290 }
1291
1292 // Nothing we can do if the used underlying object cannot be reliably
1293 // determined.
1294 if (UsesUnknownObject)
1295 continue;
1296
1297 // A function call can always get captured noalias pointers (via other
1298 // parameters, globals, etc.).
1299 if (IsFuncCall && !IsArgMemOnlyCall)
1300 RequiresNoCaptureBefore = true;
1301
1302 // First, we want to figure out all of the sets with which we definitely
1303 // don't alias. Iterate over all noalias set, and add those for which:
1304 // 1. The noalias argument is not in the set of objects from which we
1305 // definitely derive.
1306 // 2. The noalias argument has not yet been captured.
1307 // An arbitrary function that might load pointers could see captured
1308 // noalias arguments via other noalias arguments or globals, and so we
1309 // must always check for prior capture.
1310 for (const Argument *A : NoAliasArgs) {
1311 if (ObjSet.contains(Ptr: A))
1312 continue; // May be based on a noalias argument.
1313
1314 // It might be tempting to skip the PointerMayBeCapturedBefore check if
1315 // A->hasNoCaptureAttr() is true, but this is incorrect because
1316 // nocapture only guarantees that no copies outlive the function, not
1317 // that the value cannot be locally captured.
1318 if (!RequiresNoCaptureBefore ||
1319 !capturesAnything(CC: PointerMayBeCapturedBefore(
1320 V: A, /*ReturnCaptures=*/false, I, DT: &DT, /*IncludeI=*/false,
1321 Mask: CaptureComponents::Provenance)))
1322 NoAliases.push_back(Elt: NewScopes[A]);
1323 }
1324
1325 if (!NoAliases.empty())
1326 NI->setMetadata(KindID: LLVMContext::MD_noalias,
1327 Node: MDNode::concatenate(
1328 A: NI->getMetadata(KindID: LLVMContext::MD_noalias),
1329 B: MDNode::get(Context&: CalledFunc->getContext(), MDs: NoAliases)));
1330
1331 // Next, we want to figure out all of the sets to which we might belong.
1332 // We might belong to a set if the noalias argument is in the set of
1333 // underlying objects. If there is some non-noalias argument in our list
1334 // of underlying objects, then we cannot add a scope because the fact
1335 // that some access does not alias with any set of our noalias arguments
1336 // cannot itself guarantee that it does not alias with this access
1337 // (because there is some pointer of unknown origin involved and the
1338 // other access might also depend on this pointer). We also cannot add
1339 // scopes to arbitrary functions unless we know they don't access any
1340 // non-parameter pointer-values.
1341 bool CanAddScopes = !UsesAliasingPtr;
1342 if (CanAddScopes && IsFuncCall)
1343 CanAddScopes = IsArgMemOnlyCall;
1344
1345 if (CanAddScopes)
1346 for (const Argument *A : NoAliasArgs) {
1347 if (ObjSet.count(Ptr: A))
1348 Scopes.push_back(Elt: NewScopes[A]);
1349 }
1350
1351 if (!Scopes.empty())
1352 NI->setMetadata(
1353 KindID: LLVMContext::MD_alias_scope,
1354 Node: MDNode::concatenate(A: NI->getMetadata(KindID: LLVMContext::MD_alias_scope),
1355 B: MDNode::get(Context&: CalledFunc->getContext(), MDs: Scopes)));
1356 }
1357 }
1358}
1359
1360static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
1361 ReturnInst *End) {
1362
1363 assert(Begin->getParent() == End->getParent() &&
1364 "Expected to be in same basic block!");
1365 auto BeginIt = Begin->getIterator();
1366 assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator");
1367 return !llvm::isGuaranteedToTransferExecutionToSuccessor(
1368 Begin: ++BeginIt, End: End->getIterator(), ScanLimit: InlinerAttributeWindow + 1);
1369}
1370
1371// Add attributes from CB params and Fn attributes that can always be propagated
1372// to the corresponding argument / inner callbases.
1373static void AddParamAndFnBasicAttributes(const CallBase &CB,
1374 ValueToValueMapTy &VMap,
1375 ClonedCodeInfo &InlinedFunctionInfo) {
1376 auto *CalledFunction = CB.getCalledFunction();
1377 auto &Context = CalledFunction->getContext();
1378
1379 // Collect valid attributes for all params.
1380 SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
1381 bool HasAttrToPropagate = false;
1382
1383 // Attributes we can only propagate if the exact parameter is forwarded.
1384 // We can propagate both poison generating and UB generating attributes
1385 // without any extra checks. The only attribute that is tricky to propagate
1386 // is `noundef` (skipped for now) as that can create new UB where previous
1387 // behavior was just using a poison value.
1388 static const Attribute::AttrKind ExactAttrsToPropagate[] = {
1389 Attribute::Dereferenceable, Attribute::DereferenceableOrNull,
1390 Attribute::NonNull, Attribute::NoFPClass,
1391 Attribute::Alignment, Attribute::Range};
1392
1393 for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
1394 ValidObjParamAttrs.emplace_back(Args: AttrBuilder{CB.getContext()});
1395 ValidExactParamAttrs.emplace_back(Args: AttrBuilder{CB.getContext()});
1396 // Access attributes can be propagated to any param with the same underlying
1397 // object as the argument.
1398 if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadNone))
1399 ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadNone);
1400 if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadOnly))
1401 ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadOnly);
1402
1403 for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
1404 Attribute Attr = CB.getParamAttr(ArgNo: I, Kind: AK);
1405 if (Attr.isValid())
1406 ValidExactParamAttrs.back().addAttribute(A: Attr);
1407 }
1408
1409 HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes();
1410 HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes();
1411 }
1412
1413 // Won't be able to propagate anything.
1414 if (!HasAttrToPropagate)
1415 return;
1416
1417 for (BasicBlock &BB : *CalledFunction) {
1418 for (Instruction &Ins : BB) {
1419 const auto *InnerCB = dyn_cast<CallBase>(Val: &Ins);
1420 if (!InnerCB)
1421 continue;
1422 auto *NewInnerCB = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: InnerCB));
1423 if (!NewInnerCB)
1424 continue;
1425 // The InnerCB might have be simplified during the inlining
1426 // process which can make propagation incorrect.
1427 if (InlinedFunctionInfo.isSimplified(From: InnerCB, To: NewInnerCB))
1428 continue;
1429
1430 AttributeList AL = NewInnerCB->getAttributes();
1431 for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
1432 // It's unsound or requires special handling to propagate
1433 // attributes to byval arguments. Even if CalledFunction
1434 // doesn't e.g. write to the argument (readonly), the call to
1435 // NewInnerCB may write to its by-value copy.
1436 if (NewInnerCB->paramHasAttr(ArgNo: I, Kind: Attribute::ByVal))
1437 continue;
1438
1439 // Don't bother propagating attrs to constants.
1440 if (match(V: NewInnerCB->getArgOperand(i: I),
1441 P: llvm::PatternMatch::m_ImmConstant()))
1442 continue;
1443
1444 // Check if the underlying value for the parameter is an argument.
1445 const Argument *Arg = dyn_cast<Argument>(Val: InnerCB->getArgOperand(i: I));
1446 unsigned ArgNo;
1447 if (Arg) {
1448 ArgNo = Arg->getArgNo();
1449 // For dereferenceable, dereferenceable_or_null, align, etc...
1450 // we don't want to propagate if the existing param has the same
1451 // attribute with "better" constraints. So remove from the
1452 // new AL if the region of the existing param is larger than
1453 // what we can propagate.
1454 AttrBuilder NewAB{
1455 Context, AttributeSet::get(C&: Context, B: ValidExactParamAttrs[ArgNo])};
1456 if (AL.getParamDereferenceableBytes(Index: I) >
1457 NewAB.getDereferenceableBytes())
1458 NewAB.removeAttribute(Val: Attribute::Dereferenceable);
1459 if (AL.getParamDereferenceableOrNullBytes(ArgNo: I) >
1460 NewAB.getDereferenceableOrNullBytes())
1461 NewAB.removeAttribute(Val: Attribute::DereferenceableOrNull);
1462 if (AL.getParamAlignment(ArgNo: I).valueOrOne() >
1463 NewAB.getAlignment().valueOrOne())
1464 NewAB.removeAttribute(Val: Attribute::Alignment);
1465 if (auto ExistingRange = AL.getParamRange(ArgNo: I)) {
1466 if (auto NewRange = NewAB.getRange()) {
1467 ConstantRange CombinedRange =
1468 ExistingRange->intersectWith(CR: *NewRange);
1469 NewAB.removeAttribute(Val: Attribute::Range);
1470 NewAB.addRangeAttr(CR: CombinedRange);
1471 }
1472 }
1473
1474 if (FPClassTest ExistingNoFP = AL.getParamNoFPClass(ArgNo: I))
1475 NewAB.addNoFPClassAttr(NoFPClassMask: ExistingNoFP | NewAB.getNoFPClass());
1476
1477 AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: NewAB);
1478 } else if (NewInnerCB->getArgOperand(i: I)->getType()->isPointerTy()) {
1479 // Check if the underlying value for the parameter is an argument.
1480 const Value *UnderlyingV =
1481 getUnderlyingObject(V: InnerCB->getArgOperand(i: I));
1482 Arg = dyn_cast<Argument>(Val: UnderlyingV);
1483 if (!Arg)
1484 continue;
1485 ArgNo = Arg->getArgNo();
1486 } else {
1487 continue;
1488 }
1489
1490 // If so, propagate its access attributes.
1491 AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: ValidObjParamAttrs[ArgNo]);
1492
1493 // We can have conflicting attributes from the inner callsite and
1494 // to-be-inlined callsite. In that case, choose the most
1495 // restrictive.
1496
1497 // readonly + writeonly means we can never deref so make readnone.
1498 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) &&
1499 AL.hasParamAttr(ArgNo: I, Kind: Attribute::WriteOnly))
1500 AL = AL.addParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadNone);
1501
1502 // If have readnone, need to clear readonly/writeonly
1503 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone)) {
1504 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadOnly);
1505 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::WriteOnly);
1506 }
1507
1508 // Writable cannot exist in conjunction w/ readonly/readnone
1509 if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) ||
1510 AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone))
1511 AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::Writable);
1512 }
1513 NewInnerCB->setAttributes(AL);
1514 }
1515 }
1516}
1517
1518// Only allow these white listed attributes to be propagated back to the
1519// callee. This is because other attributes may only be valid on the call
1520// itself, i.e. attributes such as signext and zeroext.
1521
1522// Attributes that are always okay to propagate as if they are violated its
1523// immediate UB.
1524static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) {
1525 AttrBuilder Valid(CB.getContext());
1526 if (auto DerefBytes = CB.getRetDereferenceableBytes())
1527 Valid.addDereferenceableAttr(Bytes: DerefBytes);
1528 if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes())
1529 Valid.addDereferenceableOrNullAttr(Bytes: DerefOrNullBytes);
1530 if (CB.hasRetAttr(Kind: Attribute::NoAlias))
1531 Valid.addAttribute(Val: Attribute::NoAlias);
1532 if (CB.hasRetAttr(Kind: Attribute::NoUndef))
1533 Valid.addAttribute(Val: Attribute::NoUndef);
1534 return Valid;
1535}
1536
1537// Attributes that need additional checks as propagating them may change
1538// behavior or cause new UB.
1539static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {
1540 AttrBuilder Valid(CB.getContext());
1541 if (CB.hasRetAttr(Kind: Attribute::NonNull))
1542 Valid.addAttribute(Val: Attribute::NonNull);
1543 if (CB.hasRetAttr(Kind: Attribute::Alignment))
1544 Valid.addAlignmentAttr(Align: CB.getRetAlign());
1545 if (std::optional<ConstantRange> Range = CB.getRange())
1546 Valid.addRangeAttr(CR: *Range);
1547 return Valid;
1548}
1549
1550static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap,
1551 ClonedCodeInfo &InlinedFunctionInfo) {
1552 AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);
1553 AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
1554 if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes())
1555 return;
1556 auto *CalledFunction = CB.getCalledFunction();
1557 auto &Context = CalledFunction->getContext();
1558
1559 for (auto &BB : *CalledFunction) {
1560 auto *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator());
1561 if (!RI || !isa<CallBase>(Val: RI->getOperand(i_nocapture: 0)))
1562 continue;
1563 auto *RetVal = cast<CallBase>(Val: RI->getOperand(i_nocapture: 0));
1564 // Check that the cloned RetVal exists and is a call, otherwise we cannot
1565 // add the attributes on the cloned RetVal. Simplification during inlining
1566 // could have transformed the cloned instruction.
1567 auto *NewRetVal = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: RetVal));
1568 if (!NewRetVal)
1569 continue;
1570
1571 // The RetVal might have be simplified during the inlining
1572 // process which can make propagation incorrect.
1573 if (InlinedFunctionInfo.isSimplified(From: RetVal, To: NewRetVal))
1574 continue;
1575 // Backward propagation of attributes to the returned value may be incorrect
1576 // if it is control flow dependent.
1577 // Consider:
1578 // @callee {
1579 // %rv = call @foo()
1580 // %rv2 = call @bar()
1581 // if (%rv2 != null)
1582 // return %rv2
1583 // if (%rv == null)
1584 // exit()
1585 // return %rv
1586 // }
1587 // caller() {
1588 // %val = call nonnull @callee()
1589 // }
1590 // Here we cannot add the nonnull attribute on either foo or bar. So, we
1591 // limit the check to both RetVal and RI are in the same basic block and
1592 // there are no throwing/exiting instructions between these instructions.
1593 if (RI->getParent() != RetVal->getParent() ||
1594 MayContainThrowingOrExitingCallAfterCB(Begin: RetVal, End: RI))
1595 continue;
1596 // Add to the existing attributes of NewRetVal, i.e. the cloned call
1597 // instruction.
1598 // NB! When we have the same attribute already existing on NewRetVal, but
1599 // with a differing value, the AttributeList's merge API honours the already
1600 // existing attribute value (i.e. attributes such as dereferenceable,
1601 // dereferenceable_or_null etc). See AttrBuilder::merge for more details.
1602 AttributeList AL = NewRetVal->getAttributes();
1603 if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes())
1604 ValidUB.removeAttribute(Val: Attribute::Dereferenceable);
1605 if (ValidUB.getDereferenceableOrNullBytes() <
1606 AL.getRetDereferenceableOrNullBytes())
1607 ValidUB.removeAttribute(Val: Attribute::DereferenceableOrNull);
1608 AttributeList NewAL = AL.addRetAttributes(C&: Context, B: ValidUB);
1609 // Attributes that may generate poison returns are a bit tricky. If we
1610 // propagate them, other uses of the callsite might have their behavior
1611 // change or cause UB (if they have noundef) b.c of the new potential
1612 // poison.
1613 // Take the following three cases:
1614 //
1615 // 1)
1616 // define nonnull ptr @foo() {
1617 // %p = call ptr @bar()
1618 // call void @use(ptr %p) willreturn nounwind
1619 // ret ptr %p
1620 // }
1621 //
1622 // 2)
1623 // define noundef nonnull ptr @foo() {
1624 // %p = call ptr @bar()
1625 // call void @use(ptr %p) willreturn nounwind
1626 // ret ptr %p
1627 // }
1628 //
1629 // 3)
1630 // define nonnull ptr @foo() {
1631 // %p = call noundef ptr @bar()
1632 // ret ptr %p
1633 // }
1634 //
1635 // In case 1, we can't propagate nonnull because poison value in @use may
1636 // change behavior or trigger UB.
1637 // In case 2, we don't need to be concerned about propagating nonnull, as
1638 // any new poison at @use will trigger UB anyways.
1639 // In case 3, we can never propagate nonnull because it may create UB due to
1640 // the noundef on @bar.
1641 if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne())
1642 ValidPG.removeAttribute(Val: Attribute::Alignment);
1643 if (ValidPG.hasAttributes()) {
1644 Attribute CBRange = ValidPG.getAttribute(Kind: Attribute::Range);
1645 if (CBRange.isValid()) {
1646 Attribute NewRange = AL.getRetAttr(Kind: Attribute::Range);
1647 if (NewRange.isValid()) {
1648 ValidPG.addRangeAttr(
1649 CR: CBRange.getRange().intersectWith(CR: NewRange.getRange()));
1650 }
1651 }
1652 // Three checks.
1653 // If the callsite has `noundef`, then a poison due to violating the
1654 // return attribute will create UB anyways so we can always propagate.
1655 // Otherwise, if the return value (callee to be inlined) has `noundef`, we
1656 // can't propagate as a new poison return will cause UB.
1657 // Finally, check if the return value has no uses whose behavior may
1658 // change/may cause UB if we potentially return poison. At the moment this
1659 // is implemented overly conservatively with a single-use check.
1660 // TODO: Update the single-use check to iterate through uses and only bail
1661 // if we have a potentially dangerous use.
1662
1663 if (CB.hasRetAttr(Kind: Attribute::NoUndef) ||
1664 (RetVal->hasOneUse() && !RetVal->hasRetAttr(Kind: Attribute::NoUndef)))
1665 NewAL = NewAL.addRetAttributes(C&: Context, B: ValidPG);
1666 }
1667 NewRetVal->setAttributes(NewAL);
1668 }
1669}
1670
1671/// If the inlined function has non-byval align arguments, then
1672/// add @llvm.assume-based alignment assumptions to preserve this information.
1673static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
1674 if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
1675 return;
1676
1677 AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller());
1678 auto &DL = CB.getDataLayout();
1679
1680 // To avoid inserting redundant assumptions, we should check for assumptions
1681 // already in the caller. To do this, we might need a DT of the caller.
1682 DominatorTree DT;
1683 bool DTCalculated = false;
1684
1685 Function *CalledFunc = CB.getCalledFunction();
1686 for (Argument &Arg : CalledFunc->args()) {
1687 if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() ||
1688 Arg.use_empty())
1689 continue;
1690 MaybeAlign Alignment = Arg.getParamAlign();
1691 if (!Alignment)
1692 continue;
1693
1694 if (!DTCalculated) {
1695 DT.recalculate(Func&: *CB.getCaller());
1696 DTCalculated = true;
1697 }
1698 // If we can already prove the asserted alignment in the context of the
1699 // caller, then don't bother inserting the assumption.
1700 Value *ArgVal = CB.getArgOperand(i: Arg.getArgNo());
1701 if (getKnownAlignment(V: ArgVal, DL, CxtI: &CB, AC, DT: &DT) >= *Alignment)
1702 continue;
1703
1704 CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption(
1705 DL, PtrValue: ArgVal, Alignment: Alignment->value());
1706 AC->registerAssumption(CI: cast<AssumeInst>(Val: NewAsmp));
1707 }
1708}
1709
1710static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
1711 MaybeAlign SrcAlign, Module *M,
1712 BasicBlock *InsertBlock,
1713 InlineFunctionInfo &IFI,
1714 Function *CalledFunc) {
1715 IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
1716
1717 Value *Size =
1718 Builder.getInt64(C: M->getDataLayout().getTypeStoreSize(Ty: ByValType));
1719
1720 Align DstAlign = Dst->getPointerAlignment(DL: M->getDataLayout());
1721
1722 // Generate a memcpy with the correct alignments.
1723 CallInst *CI = Builder.CreateMemCpy(Dst, DstAlign, Src, SrcAlign, Size);
1724
1725 // The verifier requires that all calls of debug-info-bearing functions
1726 // from debug-info-bearing functions have a debug location (for inlining
1727 // purposes). Assign a dummy location to satisfy the constraint.
1728 if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram())
1729 if (DISubprogram *SP = CalledFunc->getSubprogram())
1730 CI->setDebugLoc(DILocation::get(Context&: SP->getContext(), Line: 0, Column: 0, Scope: SP));
1731}
1732
1733/// When inlining a call site that has a byval argument,
1734/// we have to make the implicit memcpy explicit by adding it.
1735static Value *HandleByValArgument(Type *ByValType, Value *Arg,
1736 Instruction *TheCall,
1737 const Function *CalledFunc,
1738 InlineFunctionInfo &IFI,
1739 MaybeAlign ByValAlignment) {
1740 Function *Caller = TheCall->getFunction();
1741 const DataLayout &DL = Caller->getDataLayout();
1742
1743 // If the called function is readonly, then it could not mutate the caller's
1744 // copy of the byval'd memory. In this case, it is safe to elide the copy and
1745 // temporary.
1746 if (CalledFunc->onlyReadsMemory()) {
1747 // If the byval argument has a specified alignment that is greater than the
1748 // passed in pointer, then we either have to round up the input pointer or
1749 // give up on this transformation.
1750 if (ByValAlignment.valueOrOne() == 1)
1751 return Arg;
1752
1753 AssumptionCache *AC =
1754 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
1755
1756 // If the pointer is already known to be sufficiently aligned, or if we can
1757 // round it up to a larger alignment, then we don't need a temporary.
1758 if (getOrEnforceKnownAlignment(V: Arg, PrefAlign: *ByValAlignment, DL, CxtI: TheCall, AC) >=
1759 *ByValAlignment)
1760 return Arg;
1761
1762 // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
1763 // for code quality, but rarely happens and is required for correctness.
1764 }
1765
1766 // Create the alloca. If we have DataLayout, use nice alignment.
1767 Align Alignment = DL.getPrefTypeAlign(Ty: ByValType);
1768
1769 // If the byval had an alignment specified, we *must* use at least that
1770 // alignment, as it is required by the byval argument (and uses of the
1771 // pointer inside the callee).
1772 if (ByValAlignment)
1773 Alignment = std::max(a: Alignment, b: *ByValAlignment);
1774
1775 AllocaInst *NewAlloca =
1776 new AllocaInst(ByValType, Arg->getType()->getPointerAddressSpace(),
1777 nullptr, Alignment, Arg->getName());
1778 NewAlloca->setDebugLoc(DebugLoc::getCompilerGenerated());
1779 NewAlloca->insertBefore(InsertPos: Caller->begin()->begin());
1780 IFI.StaticAllocas.push_back(Elt: NewAlloca);
1781
1782 // Uses of the argument in the function should use our new alloca
1783 // instead.
1784 return NewAlloca;
1785}
1786
1787// Check whether this Value is used by a lifetime intrinsic.
1788static bool isUsedByLifetimeMarker(Value *V) {
1789 for (User *U : V->users())
1790 if (isa<LifetimeIntrinsic>(Val: U))
1791 return true;
1792 return false;
1793}
1794
1795// Check whether the given alloca already has
1796// lifetime.start or lifetime.end intrinsics.
1797static bool hasLifetimeMarkers(AllocaInst *AI) {
1798 Type *Ty = AI->getType();
1799 Type *Int8PtrTy =
1800 PointerType::get(C&: Ty->getContext(), AddressSpace: Ty->getPointerAddressSpace());
1801 if (Ty == Int8PtrTy)
1802 return isUsedByLifetimeMarker(V: AI);
1803
1804 // Do a scan to find all the casts to i8*.
1805 for (User *U : AI->users()) {
1806 if (U->getType() != Int8PtrTy) continue;
1807 if (U->stripPointerCasts() != AI) continue;
1808 if (isUsedByLifetimeMarker(V: U))
1809 return true;
1810 }
1811 return false;
1812}
1813
1814/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
1815/// block. Allocas used in inalloca calls and allocas of dynamic array size
1816/// cannot be static.
1817static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
1818 return isa<Constant>(Val: AI->getArraySize()) && !AI->isUsedWithInAlloca();
1819}
1820
1821/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
1822/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
1823static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
1824 LLVMContext &Ctx,
1825 DenseMap<const MDNode *, MDNode *> &IANodes) {
1826 auto IA = DebugLoc::appendInlinedAt(DL: OrigDL, InlinedAt, Ctx, Cache&: IANodes);
1827 return DILocation::get(Context&: Ctx, Line: OrigDL.getLine(), Column: OrigDL.getCol(),
1828 Scope: OrigDL.getScope(), InlinedAt: IA, ImplicitCode: OrigDL.isImplicitCode(),
1829 AtomGroup: OrigDL->getAtomGroup(), AtomRank: OrigDL->getAtomRank());
1830}
1831
1832/// Update inlined instructions' line numbers to
1833/// to encode location where these instructions are inlined.
1834static void fixupLineNumbers(Function *Fn, Function::iterator FI,
1835 Instruction *TheCall, bool CalleeHasDebugInfo) {
1836 if (!TheCall->getDebugLoc())
1837 return;
1838
1839 // Don't propagate the source location atom from the call to inlined nodebug
1840 // instructions, and avoid putting it in the InlinedAt field of inlined
1841 // not-nodebug instructions. FIXME: Possibly worth transferring/generating
1842 // an atom for the returned value, otherwise we miss stepping on inlined
1843 // nodebug functions (which is different to existing behaviour).
1844 DebugLoc TheCallDL = TheCall->getDebugLoc()->getWithoutAtom();
1845
1846 auto &Ctx = Fn->getContext();
1847 DILocation *InlinedAtNode = TheCallDL;
1848
1849 // Create a unique call site, not to be confused with any other call from the
1850 // same location.
1851 InlinedAtNode = DILocation::getDistinct(
1852 Context&: Ctx, Line: InlinedAtNode->getLine(), Column: InlinedAtNode->getColumn(),
1853 Scope: InlinedAtNode->getScope(), InlinedAt: InlinedAtNode->getInlinedAt());
1854
1855 // Cache the inlined-at nodes as they're built so they are reused, without
1856 // this every instruction's inlined-at chain would become distinct from each
1857 // other.
1858 DenseMap<const MDNode *, MDNode *> IANodes;
1859
1860 // Check if we are not generating inline line tables and want to use
1861 // the call site location instead.
1862 bool NoInlineLineTables = Fn->hasFnAttribute(Kind: "no-inline-line-tables");
1863
1864 // Helper-util for updating the metadata attached to an instruction.
1865 auto UpdateInst = [&](Instruction &I) {
1866 // Loop metadata needs to be updated so that the start and end locs
1867 // reference inlined-at locations.
1868 auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
1869 &IANodes](Metadata *MD) -> Metadata * {
1870 if (auto *Loc = dyn_cast_or_null<DILocation>(Val: MD))
1871 return inlineDebugLoc(OrigDL: Loc, InlinedAt: InlinedAtNode, Ctx, IANodes).get();
1872 return MD;
1873 };
1874 updateLoopMetadataDebugLocations(I, Updater: updateLoopInfoLoc);
1875
1876 if (!NoInlineLineTables)
1877 if (DebugLoc DL = I.getDebugLoc()) {
1878 DebugLoc IDL =
1879 inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode, Ctx&: I.getContext(), IANodes);
1880 I.setDebugLoc(IDL);
1881 return;
1882 }
1883
1884 if (CalleeHasDebugInfo && !NoInlineLineTables)
1885 return;
1886
1887 // If the inlined instruction has no line number, or if inline info
1888 // is not being generated, make it look as if it originates from the call
1889 // location. This is important for ((__always_inline, __nodebug__))
1890 // functions which must use caller location for all instructions in their
1891 // function body.
1892
1893 // Don't update static allocas, as they may get moved later.
1894 if (auto *AI = dyn_cast<AllocaInst>(Val: &I))
1895 if (allocaWouldBeStaticInEntry(AI))
1896 return;
1897
1898 // Do not force a debug loc for pseudo probes, since they do not need to
1899 // be debuggable, and also they are expected to have a zero/null dwarf
1900 // discriminator at this point which could be violated otherwise.
1901 if (isa<PseudoProbeInst>(Val: I))
1902 return;
1903
1904 I.setDebugLoc(TheCallDL);
1905 };
1906
1907 // Helper-util for updating debug-info records attached to instructions.
1908 auto UpdateDVR = [&](DbgRecord *DVR) {
1909 assert(DVR->getDebugLoc() && "Debug Value must have debug loc");
1910 if (NoInlineLineTables) {
1911 DVR->setDebugLoc(TheCallDL);
1912 return;
1913 }
1914 DebugLoc DL = DVR->getDebugLoc();
1915 DebugLoc IDL =
1916 inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode,
1917 Ctx&: DVR->getMarker()->getParent()->getContext(), IANodes);
1918 DVR->setDebugLoc(IDL);
1919 };
1920
1921 // Iterate over all instructions, updating metadata and debug-info records.
1922 for (; FI != Fn->end(); ++FI) {
1923 for (Instruction &I : *FI) {
1924 UpdateInst(I);
1925 for (DbgRecord &DVR : I.getDbgRecordRange()) {
1926 UpdateDVR(&DVR);
1927 }
1928 }
1929
1930 // Remove debug info records if we're not keeping inline info.
1931 if (NoInlineLineTables) {
1932 BasicBlock::iterator BI = FI->begin();
1933 while (BI != FI->end()) {
1934 BI->dropDbgRecords();
1935 ++BI;
1936 }
1937 }
1938 }
1939}
1940
1941#undef DEBUG_TYPE
1942#define DEBUG_TYPE "assignment-tracking"
1943/// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB.
1944static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL,
1945 const CallBase &CB) {
1946 at::StorageToVarsMap EscapedLocals;
1947 SmallPtrSet<const Value *, 4> SeenBases;
1948
1949 LLVM_DEBUG(
1950 errs() << "# Finding caller local variables escaped by callee\n");
1951 for (const Value *Arg : CB.args()) {
1952 LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n");
1953 if (!Arg->getType()->isPointerTy()) {
1954 LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n");
1955 continue;
1956 }
1957
1958 const Instruction *I = dyn_cast<Instruction>(Val: Arg);
1959 if (!I) {
1960 LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n");
1961 continue;
1962 }
1963
1964 // Walk back to the base storage.
1965 assert(Arg->getType()->isPtrOrPtrVectorTy());
1966 APInt TmpOffset(DL.getIndexTypeSizeInBits(Ty: Arg->getType()), 0, false);
1967 const AllocaInst *Base = dyn_cast<AllocaInst>(
1968 Val: Arg->stripAndAccumulateConstantOffsets(DL, Offset&: TmpOffset, AllowNonInbounds: true));
1969 if (!Base) {
1970 LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n");
1971 continue;
1972 }
1973
1974 assert(Base);
1975 LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n");
1976 // We only need to process each base address once - skip any duplicates.
1977 if (!SeenBases.insert(Ptr: Base).second)
1978 continue;
1979
1980 // Find all local variables associated with the backing storage.
1981 auto CollectAssignsForStorage = [&](auto *DbgAssign) {
1982 // Skip variables from inlined functions - they are not local variables.
1983 if (DbgAssign->getDebugLoc().getInlinedAt())
1984 return;
1985 LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n");
1986 EscapedLocals[Base].insert(X: at::VarRecord(DbgAssign));
1987 };
1988 for_each(Range: at::getAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage);
1989 for_each(Range: at::getDVRAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage);
1990 }
1991 return EscapedLocals;
1992}
1993
1994static void trackInlinedStores(Function::iterator Start, Function::iterator End,
1995 const CallBase &CB) {
1996 LLVM_DEBUG(errs() << "trackInlinedStores into "
1997 << Start->getParent()->getName() << " from "
1998 << CB.getCalledFunction()->getName() << "\n");
1999 const DataLayout &DL = CB.getDataLayout();
2000 at::trackAssignments(Start, End, Vars: collectEscapedLocals(DL, CB), DL);
2001}
2002
2003/// Update inlined instructions' DIAssignID metadata. We need to do this
2004/// otherwise a function inlined more than once into the same function
2005/// will cause DIAssignID to be shared by many instructions.
2006static void fixupAssignments(Function::iterator Start, Function::iterator End) {
2007 DenseMap<DIAssignID *, DIAssignID *> Map;
2008 // Loop over all the inlined instructions. If we find a DIAssignID
2009 // attachment or use, replace it with a new version.
2010 for (auto BBI = Start; BBI != End; ++BBI) {
2011 for (Instruction &I : *BBI)
2012 at::remapAssignID(Map, I);
2013 }
2014}
2015#undef DEBUG_TYPE
2016#define DEBUG_TYPE "inline-function"
2017
2018/// Update the block frequencies of the caller after a callee has been inlined.
2019///
2020/// Each block cloned into the caller has its block frequency scaled by the
2021/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
2022/// callee's entry block gets the same frequency as the callsite block and the
2023/// relative frequencies of all cloned blocks remain the same after cloning.
2024static void updateCallerBFI(BasicBlock *CallSiteBlock,
2025 const ValueToValueMapTy &VMap,
2026 BlockFrequencyInfo *CallerBFI,
2027 BlockFrequencyInfo *CalleeBFI,
2028 const BasicBlock &CalleeEntryBlock) {
2029 SmallPtrSet<BasicBlock *, 16> ClonedBBs;
2030 for (auto Entry : VMap) {
2031 if (!isa<BasicBlock>(Val: Entry.first) || !Entry.second)
2032 continue;
2033 auto *OrigBB = cast<BasicBlock>(Val: Entry.first);
2034 auto *ClonedBB = cast<BasicBlock>(Val: Entry.second);
2035 BlockFrequency Freq = CalleeBFI->getBlockFreq(BB: OrigBB);
2036 if (!ClonedBBs.insert(Ptr: ClonedBB).second) {
2037 // Multiple blocks in the callee might get mapped to one cloned block in
2038 // the caller since we prune the callee as we clone it. When that happens,
2039 // we want to use the maximum among the original blocks' frequencies.
2040 BlockFrequency NewFreq = CallerBFI->getBlockFreq(BB: ClonedBB);
2041 if (NewFreq > Freq)
2042 Freq = NewFreq;
2043 }
2044 CallerBFI->setBlockFreq(BB: ClonedBB, Freq);
2045 }
2046 BasicBlock *EntryClone = cast<BasicBlock>(Val: VMap.lookup(Val: &CalleeEntryBlock));
2047 CallerBFI->setBlockFreqAndScale(
2048 ReferenceBB: EntryClone, Freq: CallerBFI->getBlockFreq(BB: CallSiteBlock), BlocksToScale&: ClonedBBs);
2049}
2050
2051/// Update the branch metadata for cloned call instructions.
2052static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
2053 const ProfileCount &CalleeEntryCount,
2054 const CallBase &TheCall, ProfileSummaryInfo *PSI,
2055 BlockFrequencyInfo *CallerBFI) {
2056 if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1)
2057 return;
2058 auto CallSiteCount =
2059 PSI ? PSI->getProfileCount(CallInst: TheCall, BFI: CallerBFI) : std::nullopt;
2060 int64_t CallCount =
2061 std::min(a: CallSiteCount.value_or(u: 0), b: CalleeEntryCount.getCount());
2062 updateProfileCallee(Callee, EntryDelta: -CallCount, VMap: &VMap);
2063}
2064
2065void llvm::updateProfileCallee(
2066 Function *Callee, int64_t EntryDelta,
2067 const ValueMap<const Value *, WeakTrackingVH> *VMap) {
2068 auto CalleeCount = Callee->getEntryCount();
2069 if (!CalleeCount)
2070 return;
2071
2072 const uint64_t PriorEntryCount = CalleeCount->getCount();
2073
2074 // Since CallSiteCount is an estimate, it could exceed the original callee
2075 // count and has to be set to 0 so guard against underflow.
2076 const uint64_t NewEntryCount =
2077 (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount)
2078 ? 0
2079 : PriorEntryCount + EntryDelta;
2080
2081 auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount,
2082 const uint64_t PriorEntryCount) {
2083 Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB);
2084 if (VPtr)
2085 scaleProfData(I&: *VPtr, S: NewEntryCount, T: PriorEntryCount);
2086 };
2087
2088 // During inlining ?
2089 if (VMap) {
2090 uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
2091 for (auto Entry : *VMap) {
2092 if (isa<CallInst>(Val: Entry.first))
2093 if (auto *CI = dyn_cast_or_null<CallInst>(Val: Entry.second)) {
2094 CI->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount);
2095 updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount);
2096 }
2097
2098 if (isa<InvokeInst>(Val: Entry.first))
2099 if (auto *II = dyn_cast_or_null<InvokeInst>(Val: Entry.second)) {
2100 II->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount);
2101 updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount);
2102 }
2103 }
2104 }
2105
2106 if (EntryDelta) {
2107 Callee->setEntryCount(Count: NewEntryCount);
2108
2109 for (BasicBlock &BB : *Callee)
2110 // No need to update the callsite if it is pruned during inlining.
2111 if (!VMap || VMap->count(Val: &BB))
2112 for (Instruction &I : BB) {
2113 if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
2114 CI->updateProfWeight(S: NewEntryCount, T: PriorEntryCount);
2115 updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount);
2116 }
2117 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &I)) {
2118 II->updateProfWeight(S: NewEntryCount, T: PriorEntryCount);
2119 updateVTableProfWeight(II, NewEntryCount, PriorEntryCount);
2120 }
2121 }
2122 }
2123}
2124
2125/// An operand bundle "clang.arc.attachedcall" on a call indicates the call
2126/// result is implicitly consumed by a call to retainRV or claimRV immediately
2127/// after the call. This function inlines the retainRV/claimRV calls.
2128///
2129/// There are three cases to consider:
2130///
2131/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned
2132/// object in the callee return block, the autoreleaseRV call and the
2133/// retainRV/claimRV call in the caller cancel out. If the call in the caller
2134/// is a claimRV call, a call to objc_release is emitted.
2135///
2136/// 2. If there is a call in the callee return block that doesn't have operand
2137/// bundle "clang.arc.attachedcall", the operand bundle on the original call
2138/// is transferred to the call in the callee.
2139///
2140/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is
2141/// a retainRV call.
2142static void
2143inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
2144 const SmallVectorImpl<ReturnInst *> &Returns) {
2145 assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
2146 bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
2147 IsUnsafeClaimRV = !IsRetainRV;
2148
2149 for (auto *RI : Returns) {
2150 Value *RetOpnd = objcarc::GetRCIdentityRoot(V: RI->getOperand(i_nocapture: 0));
2151 bool InsertRetainCall = IsRetainRV;
2152 IRBuilder<> Builder(RI->getContext());
2153
2154 // Walk backwards through the basic block looking for either a matching
2155 // autoreleaseRV call or an unannotated call.
2156 auto InstRange = llvm::make_range(x: ++(RI->getIterator().getReverse()),
2157 y: RI->getParent()->rend());
2158 for (Instruction &I : llvm::make_early_inc_range(Range&: InstRange)) {
2159 // Ignore casts.
2160 if (isa<CastInst>(Val: I))
2161 continue;
2162
2163 if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) {
2164 if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue ||
2165 !II->use_empty() ||
2166 objcarc::GetRCIdentityRoot(V: II->getOperand(i_nocapture: 0)) != RetOpnd)
2167 break;
2168
2169 // If we've found a matching authoreleaseRV call:
2170 // - If claimRV is attached to the call, insert a call to objc_release
2171 // and erase the autoreleaseRV call.
2172 // - If retainRV is attached to the call, just erase the autoreleaseRV
2173 // call.
2174 if (IsUnsafeClaimRV) {
2175 Builder.SetInsertPoint(II);
2176 Builder.CreateIntrinsic(ID: Intrinsic::objc_release, Args: RetOpnd);
2177 }
2178 II->eraseFromParent();
2179 InsertRetainCall = false;
2180 break;
2181 }
2182
2183 auto *CI = dyn_cast<CallInst>(Val: &I);
2184
2185 if (!CI)
2186 break;
2187
2188 if (objcarc::GetRCIdentityRoot(V: CI) != RetOpnd ||
2189 objcarc::hasAttachedCallOpBundle(CB: CI))
2190 break;
2191
2192 // If we've found an unannotated call that defines RetOpnd, add a
2193 // "clang.arc.attachedcall" operand bundle.
2194 Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(CB: &CB)};
2195 OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
2196 auto *NewCall = CallBase::addOperandBundle(
2197 CB: CI, ID: LLVMContext::OB_clang_arc_attachedcall, OB, InsertPt: CI->getIterator());
2198 NewCall->copyMetadata(SrcInst: *CI);
2199 CI->replaceAllUsesWith(V: NewCall);
2200 CI->eraseFromParent();
2201 InsertRetainCall = false;
2202 break;
2203 }
2204
2205 if (InsertRetainCall) {
2206 // The retainRV is attached to the call and we've failed to find a
2207 // matching autoreleaseRV or an annotated call in the callee. Emit a call
2208 // to objc_retain.
2209 Builder.SetInsertPoint(RI);
2210 Builder.CreateIntrinsic(ID: Intrinsic::objc_retain, Args: RetOpnd);
2211 }
2212 }
2213}
2214
2215// In contextual profiling, when an inline succeeds, we want to remap the
2216// indices of the callee into the index space of the caller. We can't just leave
2217// them as-is because the same callee may appear in other places in this caller
2218// (other callsites), and its (callee's) counters and sub-contextual profile
2219// tree would be potentially different.
2220// Not all BBs of the callee may survive the opportunistic DCE InlineFunction
2221// does (same goes for callsites in the callee).
2222// We will return a pair of vectors, one for basic block IDs and one for
2223// callsites. For such a vector V, V[Idx] will be -1 if the callee
2224// instrumentation with index Idx did not survive inlining, and a new value
2225// otherwise.
2226// This function will update the caller's instrumentation intrinsics
2227// accordingly, mapping indices as described above. We also replace the "name"
2228// operand because we use it to distinguish between "own" instrumentation and
2229// "from callee" instrumentation when performing the traversal of the CFG of the
2230// caller. We traverse depth-first from the callsite's BB and up to the point we
2231// hit BBs owned by the caller.
2232// The return values will be then used to update the contextual
2233// profile. Note: we only update the "name" and "index" operands in the
2234// instrumentation intrinsics, we leave the hash and total nr of indices as-is,
2235// it's not worth updating those.
2236static std::pair<std::vector<int64_t>, std::vector<int64_t>>
2237remapIndices(Function &Caller, BasicBlock *StartBB,
2238 PGOContextualProfile &CtxProf, uint32_t CalleeCounters,
2239 uint32_t CalleeCallsites) {
2240 // We'll allocate a new ID to imported callsite counters and callsites. We're
2241 // using -1 to indicate a counter we delete. Most likely the entry ID, for
2242 // example, will be deleted - we don't want 2 IDs in the same BB, and the
2243 // entry would have been cloned in the callsite's old BB.
2244 std::vector<int64_t> CalleeCounterMap;
2245 std::vector<int64_t> CalleeCallsiteMap;
2246 CalleeCounterMap.resize(new_size: CalleeCounters, x: -1);
2247 CalleeCallsiteMap.resize(new_size: CalleeCallsites, x: -1);
2248
2249 auto RewriteInstrIfNeeded = [&](InstrProfIncrementInst &Ins) -> bool {
2250 if (Ins.getNameValue() == &Caller)
2251 return false;
2252 const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2253 if (CalleeCounterMap[OldID] == -1)
2254 CalleeCounterMap[OldID] = CtxProf.allocateNextCounterIndex(F: Caller);
2255 const auto NewID = static_cast<uint32_t>(CalleeCounterMap[OldID]);
2256
2257 Ins.setNameValue(&Caller);
2258 Ins.setIndex(NewID);
2259 return true;
2260 };
2261
2262 auto RewriteCallsiteInsIfNeeded = [&](InstrProfCallsite &Ins) -> bool {
2263 if (Ins.getNameValue() == &Caller)
2264 return false;
2265 const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2266 if (CalleeCallsiteMap[OldID] == -1)
2267 CalleeCallsiteMap[OldID] = CtxProf.allocateNextCallsiteIndex(F: Caller);
2268 const auto NewID = static_cast<uint32_t>(CalleeCallsiteMap[OldID]);
2269
2270 Ins.setNameValue(&Caller);
2271 Ins.setIndex(NewID);
2272 return true;
2273 };
2274
2275 std::deque<BasicBlock *> Worklist;
2276 DenseSet<const BasicBlock *> Seen;
2277 // We will traverse the BBs starting from the callsite BB. The callsite BB
2278 // will have at least a BB ID - maybe its own, and in any case the one coming
2279 // from the cloned function's entry BB. The other BBs we'll start seeing from
2280 // there on may or may not have BB IDs. BBs with IDs belonging to our caller
2281 // are definitely not coming from the imported function and form a boundary
2282 // past which we don't need to traverse anymore. BBs may have no
2283 // instrumentation (because we originally inserted instrumentation as per
2284 // MST), in which case we'll traverse past them. An invariant we'll keep is
2285 // that a BB will have at most 1 BB ID. For example, in the callsite BB, we
2286 // will delete the callee BB's instrumentation. This doesn't result in
2287 // information loss: the entry BB of the callee will have the same count as
2288 // the callsite's BB. At the end of this traversal, all the callee's
2289 // instrumentation would be mapped into the caller's instrumentation index
2290 // space. Some of the callee's counters may be deleted (as mentioned, this
2291 // should result in no loss of information).
2292 Worklist.push_back(x: StartBB);
2293 while (!Worklist.empty()) {
2294 auto *BB = Worklist.front();
2295 Worklist.pop_front();
2296 bool Changed = false;
2297 auto *BBID = CtxProfAnalysis::getBBInstrumentation(BB&: *BB);
2298 if (BBID) {
2299 Changed |= RewriteInstrIfNeeded(*BBID);
2300 // this may be the entryblock from the inlined callee, coming into a BB
2301 // that didn't have instrumentation because of MST decisions. Let's make
2302 // sure it's placed accordingly. This is a noop elsewhere.
2303 BBID->moveBefore(InsertPos: BB->getFirstInsertionPt());
2304 }
2305 for (auto &I : llvm::make_early_inc_range(Range&: *BB)) {
2306 if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Val: &I)) {
2307 if (isa<InstrProfIncrementInstStep>(Val: Inc)) {
2308 // Step instrumentation is used for select instructions. Inlining may
2309 // have propagated a constant resulting in the condition of the select
2310 // being resolved, case in which function cloning resolves the value
2311 // of the select, and elides the select instruction. If that is the
2312 // case, the step parameter of the instrumentation will reflect that.
2313 // We can delete the instrumentation in that case.
2314 if (isa<Constant>(Val: Inc->getStep())) {
2315 assert(!Inc->getNextNode() || !isa<SelectInst>(Inc->getNextNode()));
2316 Inc->eraseFromParent();
2317 } else {
2318 assert(isa_and_nonnull<SelectInst>(Inc->getNextNode()));
2319 RewriteInstrIfNeeded(*Inc);
2320 }
2321 } else if (Inc != BBID) {
2322 // If we're here it means that the BB had more than 1 IDs, presumably
2323 // some coming from the callee. We "made up our mind" to keep the
2324 // first one (which may or may not have been originally the caller's).
2325 // All the others are superfluous and we delete them.
2326 Inc->eraseFromParent();
2327 Changed = true;
2328 }
2329 } else if (auto *CS = dyn_cast<InstrProfCallsite>(Val: &I)) {
2330 Changed |= RewriteCallsiteInsIfNeeded(*CS);
2331 }
2332 }
2333 if (!BBID || Changed)
2334 for (auto *Succ : successors(BB))
2335 if (Seen.insert(V: Succ).second)
2336 Worklist.push_back(x: Succ);
2337 }
2338
2339 assert(!llvm::is_contained(CalleeCounterMap, 0) &&
2340 "Counter index mapping should be either to -1 or to non-zero index, "
2341 "because the 0 "
2342 "index corresponds to the entry BB of the caller");
2343 assert(!llvm::is_contained(CalleeCallsiteMap, 0) &&
2344 "Callsite index mapping should be either to -1 or to non-zero index, "
2345 "because there should have been at least a callsite - the inlined one "
2346 "- which would have had a 0 index.");
2347
2348 return {std::move(CalleeCounterMap), std::move(CalleeCallsiteMap)};
2349}
2350
2351// Inline. If successful, update the contextual profile (if a valid one is
2352// given).
2353// The contextual profile data is organized in trees, as follows:
2354// - each node corresponds to a function
2355// - the root of each tree corresponds to an "entrypoint" - e.g.
2356// RPC handler for server side
2357// - the path from the root to a node is a particular call path
2358// - the counters stored in a node are counter values observed in that
2359// particular call path ("context")
2360// - the edges between nodes are annotated with callsite IDs.
2361//
2362// Updating the contextual profile after an inlining means, at a high level,
2363// copying over the data of the callee, **intentionally without any value
2364// scaling**, and copying over the callees of the inlined callee.
2365llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
2366 PGOContextualProfile &CtxProf,
2367 bool MergeAttributes,
2368 AAResults *CalleeAAR,
2369 bool InsertLifetime,
2370 Function *ForwardVarArgsTo) {
2371 if (!CtxProf.isInSpecializedModule())
2372 return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
2373 ForwardVarArgsTo);
2374
2375 auto &Caller = *CB.getCaller();
2376 auto &Callee = *CB.getCalledFunction();
2377 auto *StartBB = CB.getParent();
2378
2379 // Get some preliminary data about the callsite before it might get inlined.
2380 // Inlining shouldn't delete the callee, but it's cleaner (and low-cost) to
2381 // get this data upfront and rely less on InlineFunction's behavior.
2382 const auto CalleeGUID = AssignGUIDPass::getGUID(F: Callee);
2383 auto *CallsiteIDIns = CtxProfAnalysis::getCallsiteInstrumentation(CB);
2384 const auto CallsiteID =
2385 static_cast<uint32_t>(CallsiteIDIns->getIndex()->getZExtValue());
2386
2387 const auto NumCalleeCounters = CtxProf.getNumCounters(F: Callee);
2388 const auto NumCalleeCallsites = CtxProf.getNumCallsites(F: Callee);
2389
2390 auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
2391 ForwardVarArgsTo);
2392 if (!Ret.isSuccess())
2393 return Ret;
2394
2395 // Inlining succeeded, we don't need the instrumentation of the inlined
2396 // callsite.
2397 CallsiteIDIns->eraseFromParent();
2398
2399 // Assinging Maps and then capturing references into it in the lambda because
2400 // captured structured bindings are a C++20 extension. We do also need a
2401 // capture here, though.
2402 const auto IndicesMaps = remapIndices(Caller, StartBB, CtxProf,
2403 CalleeCounters: NumCalleeCounters, CalleeCallsites: NumCalleeCallsites);
2404 const uint32_t NewCountersSize = CtxProf.getNumCounters(F: Caller);
2405
2406 auto Updater = [&](PGOCtxProfContext &Ctx) {
2407 assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller));
2408 const auto &[CalleeCounterMap, CalleeCallsiteMap] = IndicesMaps;
2409 assert(
2410 (Ctx.counters().size() +
2411 llvm::count_if(CalleeCounterMap, [](auto V) { return V != -1; }) ==
2412 NewCountersSize) &&
2413 "The caller's counters size should have grown by the number of new "
2414 "distinct counters inherited from the inlined callee.");
2415 Ctx.resizeCounters(Size: NewCountersSize);
2416 // If the callsite wasn't exercised in this context, the value of the
2417 // counters coming from it is 0 - which it is right now, after resizing them
2418 // - and so we're done.
2419 auto CSIt = Ctx.callsites().find(x: CallsiteID);
2420 if (CSIt == Ctx.callsites().end())
2421 return;
2422 auto CalleeCtxIt = CSIt->second.find(x: CalleeGUID);
2423 // The callsite was exercised, but not with this callee (so presumably this
2424 // is an indirect callsite). Again, we're done here.
2425 if (CalleeCtxIt == CSIt->second.end())
2426 return;
2427
2428 // Let's pull in the counter values and the subcontexts coming from the
2429 // inlined callee.
2430 auto &CalleeCtx = CalleeCtxIt->second;
2431 assert(CalleeCtx.guid() == CalleeGUID);
2432
2433 for (auto I = 0U; I < CalleeCtx.counters().size(); ++I) {
2434 const int64_t NewIndex = CalleeCounterMap[I];
2435 if (NewIndex >= 0) {
2436 assert(NewIndex != 0 && "counter index mapping shouldn't happen to a 0 "
2437 "index, that's the caller's entry BB");
2438 Ctx.counters()[NewIndex] = CalleeCtx.counters()[I];
2439 }
2440 }
2441 for (auto &[I, OtherSet] : CalleeCtx.callsites()) {
2442 const int64_t NewCSIdx = CalleeCallsiteMap[I];
2443 if (NewCSIdx >= 0) {
2444 assert(NewCSIdx != 0 &&
2445 "callsite index mapping shouldn't happen to a 0 index, the "
2446 "caller must've had at least one callsite (with such an index)");
2447 Ctx.ingestAllContexts(CSId: NewCSIdx, Other: std::move(OtherSet));
2448 }
2449 }
2450 // We know the traversal is preorder, so it wouldn't have yet looked at the
2451 // sub-contexts of this context that it's currently visiting. Meaning, the
2452 // erase below invalidates no iterators.
2453 auto Deleted = Ctx.callsites().erase(x: CallsiteID);
2454 assert(Deleted);
2455 (void)Deleted;
2456 };
2457 CtxProf.update(Updater, F: Caller);
2458 return Ret;
2459}
2460
2461/// This function inlines the called function into the basic block of the
2462/// caller. This returns false if it is not possible to inline this call.
2463/// The program is still in a well defined state if this occurs though.
2464///
2465/// Note that this only does one level of inlining. For example, if the
2466/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
2467/// exists in the instruction stream. Similarly this will inline a recursive
2468/// function by one level.
2469llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
2470 bool MergeAttributes,
2471 AAResults *CalleeAAR,
2472 bool InsertLifetime,
2473 Function *ForwardVarArgsTo,
2474 OptimizationRemarkEmitter *ORE) {
2475 assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
2476
2477 // FIXME: we don't inline callbr yet.
2478 if (isa<CallBrInst>(Val: CB))
2479 return InlineResult::failure(Reason: "We don't inline callbr yet.");
2480
2481 // If IFI has any state in it, zap it before we fill it in.
2482 IFI.reset();
2483
2484 Function *CalledFunc = CB.getCalledFunction();
2485 if (!CalledFunc || // Can't inline external function or indirect
2486 CalledFunc->isDeclaration()) // call!
2487 return InlineResult::failure(Reason: "external or indirect");
2488
2489 // The inliner does not know how to inline through calls with operand bundles
2490 // in general ...
2491 Value *ConvergenceControlToken = nullptr;
2492 if (CB.hasOperandBundles()) {
2493 for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
2494 auto OBUse = CB.getOperandBundleAt(Index: i);
2495 uint32_t Tag = OBUse.getTagID();
2496 // ... but it knows how to inline through "deopt" operand bundles ...
2497 if (Tag == LLVMContext::OB_deopt)
2498 continue;
2499 // ... and "funclet" operand bundles.
2500 if (Tag == LLVMContext::OB_funclet)
2501 continue;
2502 if (Tag == LLVMContext::OB_clang_arc_attachedcall)
2503 continue;
2504 if (Tag == LLVMContext::OB_kcfi)
2505 continue;
2506 if (Tag == LLVMContext::OB_convergencectrl) {
2507 ConvergenceControlToken = OBUse.Inputs[0].get();
2508 continue;
2509 }
2510
2511 return InlineResult::failure(Reason: "unsupported operand bundle");
2512 }
2513 }
2514
2515 // FIXME: The check below is redundant and incomplete. According to spec, if a
2516 // convergent call is missing a token, then the caller is using uncontrolled
2517 // convergence. If the callee has an entry intrinsic, then the callee is using
2518 // controlled convergence, and the call cannot be inlined. A proper
2519 // implemenation of this check requires a whole new analysis that identifies
2520 // convergence in every function. For now, we skip that and just do this one
2521 // cursory check. The underlying assumption is that in a compiler flow that
2522 // fully implements convergence control tokens, there is no mixing of
2523 // controlled and uncontrolled convergent operations in the whole program.
2524 if (CB.isConvergent()) {
2525 if (!ConvergenceControlToken &&
2526 getConvergenceEntry(BB&: CalledFunc->getEntryBlock())) {
2527 return InlineResult::failure(
2528 Reason: "convergent call needs convergencectrl operand");
2529 }
2530 }
2531
2532 // If the call to the callee cannot throw, set the 'nounwind' flag on any
2533 // calls that we inline.
2534 bool MarkNoUnwind = CB.doesNotThrow();
2535
2536 BasicBlock *OrigBB = CB.getParent();
2537 Function *Caller = OrigBB->getParent();
2538
2539 // GC poses two hazards to inlining, which only occur when the callee has GC:
2540 // 1. If the caller has no GC, then the callee's GC must be propagated to the
2541 // caller.
2542 // 2. If the caller has a differing GC, it is invalid to inline.
2543 if (CalledFunc->hasGC()) {
2544 if (!Caller->hasGC())
2545 Caller->setGC(CalledFunc->getGC());
2546 else if (CalledFunc->getGC() != Caller->getGC())
2547 return InlineResult::failure(Reason: "incompatible GC");
2548 }
2549
2550 // Get the personality function from the callee if it contains a landing pad.
2551 Constant *CalledPersonality =
2552 CalledFunc->hasPersonalityFn()
2553 ? CalledFunc->getPersonalityFn()->stripPointerCasts()
2554 : nullptr;
2555
2556 // Find the personality function used by the landing pads of the caller. If it
2557 // exists, then check to see that it matches the personality function used in
2558 // the callee.
2559 Constant *CallerPersonality =
2560 Caller->hasPersonalityFn()
2561 ? Caller->getPersonalityFn()->stripPointerCasts()
2562 : nullptr;
2563 if (CalledPersonality) {
2564 if (!CallerPersonality)
2565 Caller->setPersonalityFn(CalledPersonality);
2566 // If the personality functions match, then we can perform the
2567 // inlining. Otherwise, we can't inline.
2568 // TODO: This isn't 100% true. Some personality functions are proper
2569 // supersets of others and can be used in place of the other.
2570 else if (CalledPersonality != CallerPersonality)
2571 return InlineResult::failure(Reason: "incompatible personality");
2572 }
2573
2574 // We need to figure out which funclet the callsite was in so that we may
2575 // properly nest the callee.
2576 Instruction *CallSiteEHPad = nullptr;
2577 if (CallerPersonality) {
2578 EHPersonality Personality = classifyEHPersonality(Pers: CallerPersonality);
2579 if (isScopedEHPersonality(Pers: Personality)) {
2580 std::optional<OperandBundleUse> ParentFunclet =
2581 CB.getOperandBundle(ID: LLVMContext::OB_funclet);
2582 if (ParentFunclet)
2583 CallSiteEHPad = cast<FuncletPadInst>(Val: ParentFunclet->Inputs.front());
2584
2585 // OK, the inlining site is legal. What about the target function?
2586
2587 if (CallSiteEHPad) {
2588 if (Personality == EHPersonality::MSVC_CXX) {
2589 // The MSVC personality cannot tolerate catches getting inlined into
2590 // cleanup funclets.
2591 if (isa<CleanupPadInst>(Val: CallSiteEHPad)) {
2592 // Ok, the call site is within a cleanuppad. Let's check the callee
2593 // for catchpads.
2594 for (const BasicBlock &CalledBB : *CalledFunc) {
2595 if (isa<CatchSwitchInst>(Val: CalledBB.getFirstNonPHIIt()))
2596 return InlineResult::failure(Reason: "catch in cleanup funclet");
2597 }
2598 }
2599 } else if (isAsynchronousEHPersonality(Pers: Personality)) {
2600 // SEH is even less tolerant, there may not be any sort of exceptional
2601 // funclet in the callee.
2602 for (const BasicBlock &CalledBB : *CalledFunc) {
2603 if (CalledBB.isEHPad())
2604 return InlineResult::failure(Reason: "SEH in cleanup funclet");
2605 }
2606 }
2607 }
2608 }
2609 }
2610
2611 // Determine if we are dealing with a call in an EHPad which does not unwind
2612 // to caller.
2613 bool EHPadForCallUnwindsLocally = false;
2614 if (CallSiteEHPad && isa<CallInst>(Val: CB)) {
2615 UnwindDestMemoTy FuncletUnwindMap;
2616 Value *CallSiteUnwindDestToken =
2617 getUnwindDestToken(EHPad: CallSiteEHPad, MemoMap&: FuncletUnwindMap);
2618
2619 EHPadForCallUnwindsLocally =
2620 CallSiteUnwindDestToken &&
2621 !isa<ConstantTokenNone>(Val: CallSiteUnwindDestToken);
2622 }
2623
2624 // Get an iterator to the last basic block in the function, which will have
2625 // the new function inlined after it.
2626 Function::iterator LastBlock = --Caller->end();
2627
2628 // Make sure to capture all of the return instructions from the cloned
2629 // function.
2630 SmallVector<ReturnInst*, 8> Returns;
2631 ClonedCodeInfo InlinedFunctionInfo;
2632 Function::iterator FirstNewBlock;
2633
2634 { // Scope to destroy VMap after cloning.
2635 ValueToValueMapTy VMap;
2636 struct ByValInit {
2637 Value *Dst;
2638 Value *Src;
2639 MaybeAlign SrcAlign;
2640 Type *Ty;
2641 };
2642 // Keep a list of tuples (dst, src, src_align) to emit byval
2643 // initializations. Src Alignment is only available though the callbase,
2644 // therefore has to be saved.
2645 SmallVector<ByValInit, 4> ByValInits;
2646
2647 // When inlining a function that contains noalias scope metadata,
2648 // this metadata needs to be cloned so that the inlined blocks
2649 // have different "unique scopes" at every call site.
2650 // Track the metadata that must be cloned. Do this before other changes to
2651 // the function, so that we do not get in trouble when inlining caller ==
2652 // callee.
2653 ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());
2654
2655 auto &DL = Caller->getDataLayout();
2656
2657 // Calculate the vector of arguments to pass into the function cloner, which
2658 // matches up the formal to the actual argument values.
2659 auto AI = CB.arg_begin();
2660 unsigned ArgNo = 0;
2661 for (Function::arg_iterator I = CalledFunc->arg_begin(),
2662 E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
2663 Value *ActualArg = *AI;
2664
2665 // When byval arguments actually inlined, we need to make the copy implied
2666 // by them explicit. However, we don't do this if the callee is readonly
2667 // or readnone, because the copy would be unneeded: the callee doesn't
2668 // modify the struct.
2669 if (CB.isByValArgument(ArgNo)) {
2670 ActualArg = HandleByValArgument(ByValType: CB.getParamByValType(ArgNo), Arg: ActualArg,
2671 TheCall: &CB, CalledFunc, IFI,
2672 ByValAlignment: CalledFunc->getParamAlign(ArgNo));
2673 if (ActualArg != *AI)
2674 ByValInits.push_back(Elt: {.Dst: ActualArg, .Src: (Value *)*AI,
2675 .SrcAlign: CB.getParamAlign(ArgNo),
2676 .Ty: CB.getParamByValType(ArgNo)});
2677 }
2678
2679 VMap[&*I] = ActualArg;
2680 }
2681
2682 // TODO: Remove this when users have been updated to the assume bundles.
2683 // Add alignment assumptions if necessary. We do this before the inlined
2684 // instructions are actually cloned into the caller so that we can easily
2685 // check what will be known at the start of the inlined code.
2686 AddAlignmentAssumptions(CB, IFI);
2687
2688 AssumptionCache *AC =
2689 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
2690
2691 /// Preserve all attributes on of the call and its parameters.
2692 salvageKnowledge(I: &CB, AC);
2693
2694 // We want the inliner to prune the code as it copies. We would LOVE to
2695 // have no dead or constant instructions leftover after inlining occurs
2696 // (which can happen, e.g., because an argument was constant), but we'll be
2697 // happy with whatever the cloner can do.
2698 CloneAndPruneFunctionInto(NewFunc: Caller, OldFunc: CalledFunc, VMap,
2699 /*ModuleLevelChanges=*/false, Returns, NameSuffix: ".i",
2700 CodeInfo: &InlinedFunctionInfo);
2701 // Remember the first block that is newly cloned over.
2702 FirstNewBlock = LastBlock; ++FirstNewBlock;
2703
2704 // Insert retainRV/clainRV runtime calls.
2705 objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(CB: &CB);
2706 if (RVCallKind != objcarc::ARCInstKind::None)
2707 inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns);
2708
2709 // Updated caller/callee profiles only when requested. For sample loader
2710 // inlining, the context-sensitive inlinee profile doesn't need to be
2711 // subtracted from callee profile, and the inlined clone also doesn't need
2712 // to be scaled based on call site count.
2713 if (IFI.UpdateProfile) {
2714 if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
2715 // Update the BFI of blocks cloned into the caller.
2716 updateCallerBFI(CallSiteBlock: OrigBB, VMap, CallerBFI: IFI.CallerBFI, CalleeBFI: IFI.CalleeBFI,
2717 CalleeEntryBlock: CalledFunc->front());
2718
2719 if (auto Profile = CalledFunc->getEntryCount())
2720 updateCallProfile(Callee: CalledFunc, VMap, CalleeEntryCount: *Profile, TheCall: CB, PSI: IFI.PSI,
2721 CallerBFI: IFI.CallerBFI);
2722 }
2723
2724 // Inject byval arguments initialization.
2725 for (ByValInit &Init : ByValInits)
2726 HandleByValArgumentInit(ByValType: Init.Ty, Dst: Init.Dst, Src: Init.Src, SrcAlign: Init.SrcAlign,
2727 M: Caller->getParent(), InsertBlock: &*FirstNewBlock, IFI,
2728 CalledFunc);
2729
2730 std::optional<OperandBundleUse> ParentDeopt =
2731 CB.getOperandBundle(ID: LLVMContext::OB_deopt);
2732 if (ParentDeopt) {
2733 SmallVector<OperandBundleDef, 2> OpDefs;
2734
2735 for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
2736 CallBase *ICS = dyn_cast_or_null<CallBase>(Val&: VH);
2737 if (!ICS)
2738 continue; // instruction was DCE'd or RAUW'ed to undef
2739
2740 OpDefs.clear();
2741
2742 OpDefs.reserve(N: ICS->getNumOperandBundles());
2743
2744 for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe;
2745 ++COBi) {
2746 auto ChildOB = ICS->getOperandBundleAt(Index: COBi);
2747 if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
2748 // If the inlined call has other operand bundles, let them be
2749 OpDefs.emplace_back(Args&: ChildOB);
2750 continue;
2751 }
2752
2753 // It may be useful to separate this logic (of handling operand
2754 // bundles) out to a separate "policy" component if this gets crowded.
2755 // Prepend the parent's deoptimization continuation to the newly
2756 // inlined call's deoptimization continuation.
2757 std::vector<Value *> MergedDeoptArgs;
2758 MergedDeoptArgs.reserve(n: ParentDeopt->Inputs.size() +
2759 ChildOB.Inputs.size());
2760
2761 llvm::append_range(C&: MergedDeoptArgs, R&: ParentDeopt->Inputs);
2762 llvm::append_range(C&: MergedDeoptArgs, R&: ChildOB.Inputs);
2763
2764 OpDefs.emplace_back(Args: "deopt", Args: std::move(MergedDeoptArgs));
2765 }
2766
2767 Instruction *NewI = CallBase::Create(CB: ICS, Bundles: OpDefs, InsertPt: ICS->getIterator());
2768
2769 // Note: the RAUW does the appropriate fixup in VMap, so we need to do
2770 // this even if the call returns void.
2771 ICS->replaceAllUsesWith(V: NewI);
2772
2773 VH = nullptr;
2774 ICS->eraseFromParent();
2775 }
2776 }
2777
2778 // For 'nodebug' functions, the associated DISubprogram is always null.
2779 // Conservatively avoid propagating the callsite debug location to
2780 // instructions inlined from a function whose DISubprogram is not null.
2781 fixupLineNumbers(Fn: Caller, FI: FirstNewBlock, TheCall: &CB,
2782 CalleeHasDebugInfo: CalledFunc->getSubprogram() != nullptr);
2783
2784 if (isAssignmentTrackingEnabled(M: *Caller->getParent())) {
2785 // Interpret inlined stores to caller-local variables as assignments.
2786 trackInlinedStores(Start: FirstNewBlock, End: Caller->end(), CB);
2787
2788 // Update DIAssignID metadata attachments and uses so that they are
2789 // unique to this inlined instance.
2790 fixupAssignments(Start: FirstNewBlock, End: Caller->end());
2791 }
2792
2793 // Now clone the inlined noalias scope metadata.
2794 SAMetadataCloner.clone();
2795 SAMetadataCloner.remap(FStart: FirstNewBlock, FEnd: Caller->end());
2796
2797 // Add noalias metadata if necessary.
2798 AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);
2799
2800 // Clone return attributes on the callsite into the calls within the inlined
2801 // function which feed into its return value.
2802 AddReturnAttributes(CB, VMap, InlinedFunctionInfo);
2803
2804 // Clone attributes on the params of the callsite to calls within the
2805 // inlined function which use the same param.
2806 AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo);
2807
2808 propagateMemProfMetadata(
2809 Callee: CalledFunc, CB, ContainsMemProfMetadata: InlinedFunctionInfo.ContainsMemProfMetadata, VMap, ORE);
2810
2811 // Propagate metadata on the callsite if necessary.
2812 PropagateCallSiteMetadata(CB, FStart: FirstNewBlock, FEnd: Caller->end());
2813
2814 // Register any cloned assumptions.
2815 if (IFI.GetAssumptionCache)
2816 for (BasicBlock &NewBlock :
2817 make_range(x: FirstNewBlock->getIterator(), y: Caller->end()))
2818 for (Instruction &I : NewBlock)
2819 if (auto *II = dyn_cast<AssumeInst>(Val: &I))
2820 IFI.GetAssumptionCache(*Caller).registerAssumption(CI: II);
2821 }
2822
2823 if (ConvergenceControlToken) {
2824 IntrinsicInst *IntrinsicCall = getConvergenceEntry(BB&: *FirstNewBlock);
2825 if (IntrinsicCall) {
2826 IntrinsicCall->replaceAllUsesWith(V: ConvergenceControlToken);
2827 IntrinsicCall->eraseFromParent();
2828 }
2829 }
2830
2831 // If there are any alloca instructions in the block that used to be the entry
2832 // block for the callee, move them to the entry block of the caller. First
2833 // calculate which instruction they should be inserted before. We insert the
2834 // instructions at the end of the current alloca list.
2835 {
2836 BasicBlock::iterator InsertPoint = Caller->begin()->begin();
2837 for (BasicBlock::iterator I = FirstNewBlock->begin(),
2838 E = FirstNewBlock->end(); I != E; ) {
2839 AllocaInst *AI = dyn_cast<AllocaInst>(Val: I++);
2840 if (!AI) continue;
2841
2842 // If the alloca is now dead, remove it. This often occurs due to code
2843 // specialization.
2844 if (AI->use_empty()) {
2845 AI->eraseFromParent();
2846 continue;
2847 }
2848
2849 if (!allocaWouldBeStaticInEntry(AI))
2850 continue;
2851
2852 // Keep track of the static allocas that we inline into the caller.
2853 IFI.StaticAllocas.push_back(Elt: AI);
2854
2855 // Scan for the block of allocas that we can move over, and move them
2856 // all at once.
2857 while (isa<AllocaInst>(Val: I) &&
2858 !cast<AllocaInst>(Val&: I)->use_empty() &&
2859 allocaWouldBeStaticInEntry(AI: cast<AllocaInst>(Val&: I))) {
2860 IFI.StaticAllocas.push_back(Elt: cast<AllocaInst>(Val&: I));
2861 ++I;
2862 }
2863
2864 // Transfer all of the allocas over in a block. Using splice means
2865 // that the instructions aren't removed from the symbol table, then
2866 // reinserted.
2867 I.setTailBit(true);
2868 Caller->getEntryBlock().splice(ToIt: InsertPoint, FromBB: &*FirstNewBlock,
2869 FromBeginIt: AI->getIterator(), FromEndIt: I);
2870 }
2871 }
2872
2873 SmallVector<Value*,4> VarArgsToForward;
2874 SmallVector<AttributeSet, 4> VarArgsAttrs;
2875 for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
2876 i < CB.arg_size(); i++) {
2877 VarArgsToForward.push_back(Elt: CB.getArgOperand(i));
2878 VarArgsAttrs.push_back(Elt: CB.getAttributes().getParamAttrs(ArgNo: i));
2879 }
2880
2881 bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
2882 if (InlinedFunctionInfo.ContainsCalls) {
2883 CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
2884 if (CallInst *CI = dyn_cast<CallInst>(Val: &CB))
2885 CallSiteTailKind = CI->getTailCallKind();
2886
2887 // For inlining purposes, the "notail" marker is the same as no marker.
2888 if (CallSiteTailKind == CallInst::TCK_NoTail)
2889 CallSiteTailKind = CallInst::TCK_None;
2890
2891 for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
2892 ++BB) {
2893 for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
2894 CallInst *CI = dyn_cast<CallInst>(Val: &I);
2895 if (!CI)
2896 continue;
2897
2898 // Forward varargs from inlined call site to calls to the
2899 // ForwardVarArgsTo function, if requested, and to musttail calls.
2900 if (!VarArgsToForward.empty() &&
2901 ((ForwardVarArgsTo &&
2902 CI->getCalledFunction() == ForwardVarArgsTo) ||
2903 CI->isMustTailCall())) {
2904 // Collect attributes for non-vararg parameters.
2905 AttributeList Attrs = CI->getAttributes();
2906 SmallVector<AttributeSet, 8> ArgAttrs;
2907 if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
2908 for (unsigned ArgNo = 0;
2909 ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
2910 ArgAttrs.push_back(Elt: Attrs.getParamAttrs(ArgNo));
2911 }
2912
2913 // Add VarArg attributes.
2914 ArgAttrs.append(in_start: VarArgsAttrs.begin(), in_end: VarArgsAttrs.end());
2915 Attrs = AttributeList::get(C&: CI->getContext(), FnAttrs: Attrs.getFnAttrs(),
2916 RetAttrs: Attrs.getRetAttrs(), ArgAttrs);
2917 // Add VarArgs to existing parameters.
2918 SmallVector<Value *, 6> Params(CI->args());
2919 Params.append(in_start: VarArgsToForward.begin(), in_end: VarArgsToForward.end());
2920 CallInst *NewCI = CallInst::Create(
2921 Ty: CI->getFunctionType(), Func: CI->getCalledOperand(), Args: Params, NameStr: "", InsertBefore: CI->getIterator());
2922 NewCI->setDebugLoc(CI->getDebugLoc());
2923 NewCI->setAttributes(Attrs);
2924 NewCI->setCallingConv(CI->getCallingConv());
2925 CI->replaceAllUsesWith(V: NewCI);
2926 CI->eraseFromParent();
2927 CI = NewCI;
2928 }
2929
2930 if (Function *F = CI->getCalledFunction())
2931 InlinedDeoptimizeCalls |=
2932 F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
2933
2934 // We need to reduce the strength of any inlined tail calls. For
2935 // musttail, we have to avoid introducing potential unbounded stack
2936 // growth. For example, if functions 'f' and 'g' are mutually recursive
2937 // with musttail, we can inline 'g' into 'f' so long as we preserve
2938 // musttail on the cloned call to 'f'. If either the inlined call site
2939 // or the cloned call site is *not* musttail, the program already has
2940 // one frame of stack growth, so it's safe to remove musttail. Here is
2941 // a table of example transformations:
2942 //
2943 // f -> musttail g -> musttail f ==> f -> musttail f
2944 // f -> musttail g -> tail f ==> f -> tail f
2945 // f -> g -> musttail f ==> f -> f
2946 // f -> g -> tail f ==> f -> f
2947 //
2948 // Inlined notail calls should remain notail calls.
2949 CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
2950 if (ChildTCK != CallInst::TCK_NoTail)
2951 ChildTCK = std::min(a: CallSiteTailKind, b: ChildTCK);
2952 CI->setTailCallKind(ChildTCK);
2953 InlinedMustTailCalls |= CI->isMustTailCall();
2954
2955 // Call sites inlined through a 'nounwind' call site should be
2956 // 'nounwind' as well. However, avoid marking call sites explicitly
2957 // where possible. This helps expose more opportunities for CSE after
2958 // inlining, commonly when the callee is an intrinsic.
2959 if (MarkNoUnwind && !CI->doesNotThrow())
2960 CI->setDoesNotThrow();
2961 }
2962 }
2963 }
2964
2965 // Leave lifetime markers for the static alloca's, scoping them to the
2966 // function we just inlined.
2967 // We need to insert lifetime intrinsics even at O0 to avoid invalid
2968 // access caused by multithreaded coroutines. The check
2969 // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.
2970 if ((InsertLifetime || Caller->isPresplitCoroutine()) &&
2971 !IFI.StaticAllocas.empty()) {
2972 IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin());
2973 for (AllocaInst *AI : IFI.StaticAllocas) {
2974 // Don't mark swifterror allocas. They can't have bitcast uses.
2975 if (AI->isSwiftError())
2976 continue;
2977
2978 // If the alloca is already scoped to something smaller than the whole
2979 // function then there's no need to add redundant, less accurate markers.
2980 if (hasLifetimeMarkers(AI))
2981 continue;
2982
2983 // Try to determine the size of the allocation.
2984 ConstantInt *AllocaSize = nullptr;
2985 if (ConstantInt *AIArraySize =
2986 dyn_cast<ConstantInt>(Val: AI->getArraySize())) {
2987 auto &DL = Caller->getDataLayout();
2988 Type *AllocaType = AI->getAllocatedType();
2989 TypeSize AllocaTypeSize = DL.getTypeAllocSize(Ty: AllocaType);
2990 uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
2991
2992 // Don't add markers for zero-sized allocas.
2993 if (AllocaArraySize == 0)
2994 continue;
2995
2996 // Check that array size doesn't saturate uint64_t and doesn't
2997 // overflow when it's multiplied by type size.
2998 if (!AllocaTypeSize.isScalable() &&
2999 AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
3000 std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
3001 AllocaTypeSize.getFixedValue()) {
3002 AllocaSize = ConstantInt::get(Ty: Type::getInt64Ty(C&: AI->getContext()),
3003 V: AllocaArraySize * AllocaTypeSize);
3004 }
3005 }
3006
3007 builder.CreateLifetimeStart(Ptr: AI, Size: AllocaSize);
3008 for (ReturnInst *RI : Returns) {
3009 // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
3010 // call and a return. The return kills all local allocas.
3011 if (InlinedMustTailCalls &&
3012 RI->getParent()->getTerminatingMustTailCall())
3013 continue;
3014 if (InlinedDeoptimizeCalls &&
3015 RI->getParent()->getTerminatingDeoptimizeCall())
3016 continue;
3017 IRBuilder<>(RI).CreateLifetimeEnd(Ptr: AI, Size: AllocaSize);
3018 }
3019 }
3020 }
3021
3022 // If the inlined code contained dynamic alloca instructions, wrap the inlined
3023 // code with llvm.stacksave/llvm.stackrestore intrinsics.
3024 if (InlinedFunctionInfo.ContainsDynamicAllocas) {
3025 // Insert the llvm.stacksave.
3026 CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
3027 .CreateStackSave(Name: "savedstack");
3028
3029 // Insert a call to llvm.stackrestore before any return instructions in the
3030 // inlined function.
3031 for (ReturnInst *RI : Returns) {
3032 // Don't insert llvm.stackrestore calls between a musttail or deoptimize
3033 // call and a return. The return will restore the stack pointer.
3034 if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
3035 continue;
3036 if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
3037 continue;
3038 IRBuilder<>(RI).CreateStackRestore(Ptr: SavedPtr);
3039 }
3040 }
3041
3042 // If we are inlining for an invoke instruction, we must make sure to rewrite
3043 // any call instructions into invoke instructions. This is sensitive to which
3044 // funclet pads were top-level in the inlinee, so must be done before
3045 // rewriting the "parent pad" links.
3046 if (auto *II = dyn_cast<InvokeInst>(Val: &CB)) {
3047 BasicBlock *UnwindDest = II->getUnwindDest();
3048 BasicBlock::iterator FirstNonPHI = UnwindDest->getFirstNonPHIIt();
3049 if (isa<LandingPadInst>(Val: FirstNonPHI)) {
3050 HandleInlinedLandingPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
3051 } else {
3052 HandleInlinedEHPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
3053 }
3054 }
3055
3056 // Update the lexical scopes of the new funclets and callsites.
3057 // Anything that had 'none' as its parent is now nested inside the callsite's
3058 // EHPad.
3059 if (CallSiteEHPad) {
3060 for (Function::iterator BB = FirstNewBlock->getIterator(),
3061 E = Caller->end();
3062 BB != E; ++BB) {
3063 // Add bundle operands to inlined call sites.
3064 PropagateOperandBundles(InlinedBB: BB, CallSiteEHPad);
3065
3066 // It is problematic if the inlinee has a cleanupret which unwinds to
3067 // caller and we inline it into a call site which doesn't unwind but into
3068 // an EH pad that does. Such an edge must be dynamically unreachable.
3069 // As such, we replace the cleanupret with unreachable.
3070 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator()))
3071 if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
3072 changeToUnreachable(I: CleanupRet);
3073
3074 BasicBlock::iterator I = BB->getFirstNonPHIIt();
3075 if (!I->isEHPad())
3076 continue;
3077
3078 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) {
3079 if (isa<ConstantTokenNone>(Val: CatchSwitch->getParentPad()))
3080 CatchSwitch->setParentPad(CallSiteEHPad);
3081 } else {
3082 auto *FPI = cast<FuncletPadInst>(Val&: I);
3083 if (isa<ConstantTokenNone>(Val: FPI->getParentPad()))
3084 FPI->setParentPad(CallSiteEHPad);
3085 }
3086 }
3087 }
3088
3089 if (InlinedDeoptimizeCalls) {
3090 // We need to at least remove the deoptimizing returns from the Return set,
3091 // so that the control flow from those returns does not get merged into the
3092 // caller (but terminate it instead). If the caller's return type does not
3093 // match the callee's return type, we also need to change the return type of
3094 // the intrinsic.
3095 if (Caller->getReturnType() == CB.getType()) {
3096 llvm::erase_if(C&: Returns, P: [](ReturnInst *RI) {
3097 return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
3098 });
3099 } else {
3100 SmallVector<ReturnInst *, 8> NormalReturns;
3101 Function *NewDeoptIntrinsic = Intrinsic::getOrInsertDeclaration(
3102 M: Caller->getParent(), id: Intrinsic::experimental_deoptimize,
3103 Tys: {Caller->getReturnType()});
3104
3105 for (ReturnInst *RI : Returns) {
3106 CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
3107 if (!DeoptCall) {
3108 NormalReturns.push_back(Elt: RI);
3109 continue;
3110 }
3111
3112 // The calling convention on the deoptimize call itself may be bogus,
3113 // since the code we're inlining may have undefined behavior (and may
3114 // never actually execute at runtime); but all
3115 // @llvm.experimental.deoptimize declarations have to have the same
3116 // calling convention in a well-formed module.
3117 auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
3118 NewDeoptIntrinsic->setCallingConv(CallingConv);
3119 auto *CurBB = RI->getParent();
3120 RI->eraseFromParent();
3121
3122 SmallVector<Value *, 4> CallArgs(DeoptCall->args());
3123
3124 SmallVector<OperandBundleDef, 1> OpBundles;
3125 DeoptCall->getOperandBundlesAsDefs(Defs&: OpBundles);
3126 auto DeoptAttributes = DeoptCall->getAttributes();
3127 DeoptCall->eraseFromParent();
3128 assert(!OpBundles.empty() &&
3129 "Expected at least the deopt operand bundle");
3130
3131 IRBuilder<> Builder(CurBB);
3132 CallInst *NewDeoptCall =
3133 Builder.CreateCall(Callee: NewDeoptIntrinsic, Args: CallArgs, OpBundles);
3134 NewDeoptCall->setCallingConv(CallingConv);
3135 NewDeoptCall->setAttributes(DeoptAttributes);
3136 if (NewDeoptCall->getType()->isVoidTy())
3137 Builder.CreateRetVoid();
3138 else
3139 Builder.CreateRet(V: NewDeoptCall);
3140 // Since the ret type is changed, remove the incompatible attributes.
3141 NewDeoptCall->removeRetAttrs(AttrsToRemove: AttributeFuncs::typeIncompatible(
3142 Ty: NewDeoptCall->getType(), AS: NewDeoptCall->getRetAttributes()));
3143 }
3144
3145 // Leave behind the normal returns so we can merge control flow.
3146 std::swap(LHS&: Returns, RHS&: NormalReturns);
3147 }
3148 }
3149
3150 // Handle any inlined musttail call sites. In order for a new call site to be
3151 // musttail, the source of the clone and the inlined call site must have been
3152 // musttail. Therefore it's safe to return without merging control into the
3153 // phi below.
3154 if (InlinedMustTailCalls) {
3155 // Check if we need to bitcast the result of any musttail calls.
3156 Type *NewRetTy = Caller->getReturnType();
3157 bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy;
3158
3159 // Handle the returns preceded by musttail calls separately.
3160 SmallVector<ReturnInst *, 8> NormalReturns;
3161 for (ReturnInst *RI : Returns) {
3162 CallInst *ReturnedMustTail =
3163 RI->getParent()->getTerminatingMustTailCall();
3164 if (!ReturnedMustTail) {
3165 NormalReturns.push_back(Elt: RI);
3166 continue;
3167 }
3168 if (!NeedBitCast)
3169 continue;
3170
3171 // Delete the old return and any preceding bitcast.
3172 BasicBlock *CurBB = RI->getParent();
3173 auto *OldCast = dyn_cast_or_null<BitCastInst>(Val: RI->getReturnValue());
3174 RI->eraseFromParent();
3175 if (OldCast)
3176 OldCast->eraseFromParent();
3177
3178 // Insert a new bitcast and return with the right type.
3179 IRBuilder<> Builder(CurBB);
3180 Builder.CreateRet(V: Builder.CreateBitCast(V: ReturnedMustTail, DestTy: NewRetTy));
3181 }
3182
3183 // Leave behind the normal returns so we can merge control flow.
3184 std::swap(LHS&: Returns, RHS&: NormalReturns);
3185 }
3186
3187 // Now that all of the transforms on the inlined code have taken place but
3188 // before we splice the inlined code into the CFG and lose track of which
3189 // blocks were actually inlined, collect the call sites. We only do this if
3190 // call graph updates weren't requested, as those provide value handle based
3191 // tracking of inlined call sites instead. Calls to intrinsics are not
3192 // collected because they are not inlineable.
3193 if (InlinedFunctionInfo.ContainsCalls) {
3194 // Otherwise just collect the raw call sites that were inlined.
3195 for (BasicBlock &NewBB :
3196 make_range(x: FirstNewBlock->getIterator(), y: Caller->end()))
3197 for (Instruction &I : NewBB)
3198 if (auto *CB = dyn_cast<CallBase>(Val: &I))
3199 if (!(CB->getCalledFunction() &&
3200 CB->getCalledFunction()->isIntrinsic()))
3201 IFI.InlinedCallSites.push_back(Elt: CB);
3202 }
3203
3204 // If we cloned in _exactly one_ basic block, and if that block ends in a
3205 // return instruction, we splice the body of the inlined callee directly into
3206 // the calling basic block.
3207 if (Returns.size() == 1 && std::distance(first: FirstNewBlock, last: Caller->end()) == 1) {
3208 // Move all of the instructions right before the call.
3209 OrigBB->splice(ToIt: CB.getIterator(), FromBB: &*FirstNewBlock, FromBeginIt: FirstNewBlock->begin(),
3210 FromEndIt: FirstNewBlock->end());
3211 // Remove the cloned basic block.
3212 Caller->back().eraseFromParent();
3213
3214 // If the call site was an invoke instruction, add a branch to the normal
3215 // destination.
3216 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
3217 BranchInst *NewBr = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator());
3218 NewBr->setDebugLoc(Returns[0]->getDebugLoc());
3219 }
3220
3221 // If the return instruction returned a value, replace uses of the call with
3222 // uses of the returned value.
3223 if (!CB.use_empty()) {
3224 ReturnInst *R = Returns[0];
3225 if (&CB == R->getReturnValue())
3226 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3227 else
3228 CB.replaceAllUsesWith(V: R->getReturnValue());
3229 }
3230 // Since we are now done with the Call/Invoke, we can delete it.
3231 CB.eraseFromParent();
3232
3233 // Since we are now done with the return instruction, delete it also.
3234 Returns[0]->eraseFromParent();
3235
3236 if (MergeAttributes)
3237 AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc);
3238
3239 // We are now done with the inlining.
3240 return InlineResult::success();
3241 }
3242
3243 // Otherwise, we have the normal case, of more than one block to inline or
3244 // multiple return sites.
3245
3246 // We want to clone the entire callee function into the hole between the
3247 // "starter" and "ender" blocks. How we accomplish this depends on whether
3248 // this is an invoke instruction or a call instruction.
3249 BasicBlock *AfterCallBB;
3250 BranchInst *CreatedBranchToNormalDest = nullptr;
3251 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
3252
3253 // Add an unconditional branch to make this look like the CallInst case...
3254 CreatedBranchToNormalDest = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator());
3255 // We intend to replace this DebugLoc with another later.
3256 CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getTemporary());
3257
3258 // Split the basic block. This guarantees that no PHI nodes will have to be
3259 // updated due to new incoming edges, and make the invoke case more
3260 // symmetric to the call case.
3261 AfterCallBB =
3262 OrigBB->splitBasicBlock(I: CreatedBranchToNormalDest->getIterator(),
3263 BBName: CalledFunc->getName() + ".exit");
3264
3265 } else { // It's a call
3266 // If this is a call instruction, we need to split the basic block that
3267 // the call lives in.
3268 //
3269 AfterCallBB = OrigBB->splitBasicBlock(I: CB.getIterator(),
3270 BBName: CalledFunc->getName() + ".exit");
3271 }
3272
3273 if (IFI.CallerBFI) {
3274 // Copy original BB's block frequency to AfterCallBB
3275 IFI.CallerBFI->setBlockFreq(BB: AfterCallBB,
3276 Freq: IFI.CallerBFI->getBlockFreq(BB: OrigBB));
3277 }
3278
3279 // Change the branch that used to go to AfterCallBB to branch to the first
3280 // basic block of the inlined function.
3281 //
3282 Instruction *Br = OrigBB->getTerminator();
3283 assert(Br && Br->getOpcode() == Instruction::Br &&
3284 "splitBasicBlock broken!");
3285 Br->setOperand(i: 0, Val: &*FirstNewBlock);
3286
3287 // Now that the function is correct, make it a little bit nicer. In
3288 // particular, move the basic blocks inserted from the end of the function
3289 // into the space made by splitting the source basic block.
3290 Caller->splice(ToIt: AfterCallBB->getIterator(), FromF: Caller, FromBeginIt: FirstNewBlock,
3291 FromEndIt: Caller->end());
3292
3293 // Handle all of the return instructions that we just cloned in, and eliminate
3294 // any users of the original call/invoke instruction.
3295 Type *RTy = CalledFunc->getReturnType();
3296
3297 PHINode *PHI = nullptr;
3298 if (Returns.size() > 1) {
3299 // The PHI node should go at the front of the new basic block to merge all
3300 // possible incoming values.
3301 if (!CB.use_empty()) {
3302 PHI = PHINode::Create(Ty: RTy, NumReservedValues: Returns.size(), NameStr: CB.getName());
3303 PHI->insertBefore(InsertPos: AfterCallBB->begin());
3304 // Anything that used the result of the function call should now use the
3305 // PHI node as their operand.
3306 CB.replaceAllUsesWith(V: PHI);
3307 }
3308
3309 // Loop over all of the return instructions adding entries to the PHI node
3310 // as appropriate.
3311 if (PHI) {
3312 for (ReturnInst *RI : Returns) {
3313 assert(RI->getReturnValue()->getType() == PHI->getType() &&
3314 "Ret value not consistent in function!");
3315 PHI->addIncoming(V: RI->getReturnValue(), BB: RI->getParent());
3316 }
3317 }
3318
3319 // Add a branch to the merge points and remove return instructions.
3320 DebugLoc Loc;
3321 for (ReturnInst *RI : Returns) {
3322 BranchInst *BI = BranchInst::Create(IfTrue: AfterCallBB, InsertBefore: RI->getIterator());
3323 Loc = RI->getDebugLoc();
3324 BI->setDebugLoc(Loc);
3325 RI->eraseFromParent();
3326 }
3327 // We need to set the debug location to *somewhere* inside the
3328 // inlined function. The line number may be nonsensical, but the
3329 // instruction will at least be associated with the right
3330 // function.
3331 if (CreatedBranchToNormalDest)
3332 CreatedBranchToNormalDest->setDebugLoc(Loc);
3333 } else if (!Returns.empty()) {
3334 // Otherwise, if there is exactly one return value, just replace anything
3335 // using the return value of the call with the computed value.
3336 if (!CB.use_empty()) {
3337 if (&CB == Returns[0]->getReturnValue())
3338 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3339 else
3340 CB.replaceAllUsesWith(V: Returns[0]->getReturnValue());
3341 }
3342
3343 // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
3344 BasicBlock *ReturnBB = Returns[0]->getParent();
3345 ReturnBB->replaceAllUsesWith(V: AfterCallBB);
3346
3347 // Splice the code from the return block into the block that it will return
3348 // to, which contains the code that was after the call.
3349 AfterCallBB->splice(ToIt: AfterCallBB->begin(), FromBB: ReturnBB);
3350
3351 if (CreatedBranchToNormalDest)
3352 CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
3353
3354 // Delete the return instruction now and empty ReturnBB now.
3355 Returns[0]->eraseFromParent();
3356 ReturnBB->eraseFromParent();
3357 } else if (!CB.use_empty()) {
3358 // In this case there are no returns to use, so there is no clear source
3359 // location for the "return".
3360 // FIXME: It may be correct to use the scope end line of the function here,
3361 // since this likely means we are falling out of the function.
3362 if (CreatedBranchToNormalDest)
3363 CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getUnknown());
3364 // No returns, but something is using the return value of the call. Just
3365 // nuke the result.
3366 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3367 }
3368
3369 // Since we are now done with the Call/Invoke, we can delete it.
3370 CB.eraseFromParent();
3371
3372 // If we inlined any musttail calls and the original return is now
3373 // unreachable, delete it. It can only contain a bitcast and ret.
3374 if (InlinedMustTailCalls && pred_empty(BB: AfterCallBB))
3375 AfterCallBB->eraseFromParent();
3376
3377 // We should always be able to fold the entry block of the function into the
3378 // single predecessor of the block...
3379 assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
3380 BasicBlock *CalleeEntry = cast<BranchInst>(Val: Br)->getSuccessor(i: 0);
3381
3382 // Splice the code entry block into calling block, right before the
3383 // unconditional branch.
3384 CalleeEntry->replaceAllUsesWith(V: OrigBB); // Update PHI nodes
3385 OrigBB->splice(ToIt: Br->getIterator(), FromBB: CalleeEntry);
3386
3387 // Remove the unconditional branch.
3388 Br->eraseFromParent();
3389
3390 // Now we can remove the CalleeEntry block, which is now empty.
3391 CalleeEntry->eraseFromParent();
3392
3393 // If we inserted a phi node, check to see if it has a single value (e.g. all
3394 // the entries are the same or undef). If so, remove the PHI so it doesn't
3395 // block other optimizations.
3396 if (PHI) {
3397 AssumptionCache *AC =
3398 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
3399 auto &DL = Caller->getDataLayout();
3400 if (Value *V = simplifyInstruction(I: PHI, Q: {DL, nullptr, nullptr, AC})) {
3401 PHI->replaceAllUsesWith(V);
3402 PHI->eraseFromParent();
3403 }
3404 }
3405
3406 if (MergeAttributes)
3407 AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc);
3408
3409 return InlineResult::success();
3410}
3411