InlineFunction.cpp source code [llvm_projects/llvm/lib/Transforms/Utils/InlineFunction.cpp]

1	//===- InlineFunction.cpp - Code to perform function inlining -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements inlining of a function into a call site, resolving
10	// parameters and the return value as appropriate.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/ADT/DenseMap.h"
15	#include "llvm/ADT/STLExtras.h"
16	#include "llvm/ADT/SetVector.h"
17	#include "llvm/ADT/SmallPtrSet.h"
18	#include "llvm/ADT/SmallVector.h"
19	#include "llvm/ADT/StringExtras.h"
20	#include "llvm/ADT/iterator_range.h"
21	#include "llvm/Analysis/AliasAnalysis.h"
22	#include "llvm/Analysis/AssumptionCache.h"
23	#include "llvm/Analysis/BlockFrequencyInfo.h"
24	#include "llvm/Analysis/CallGraph.h"
25	#include "llvm/Analysis/CaptureTracking.h"
26	#include "llvm/Analysis/CtxProfAnalysis.h"
27	#include "llvm/Analysis/IndirectCallVisitor.h"
28	#include "llvm/Analysis/InstructionSimplify.h"
29	#include "llvm/Analysis/MemoryProfileInfo.h"
30	#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
31	#include "llvm/Analysis/ObjCARCUtil.h"
32	#include "llvm/Analysis/ProfileSummaryInfo.h"
33	#include "llvm/Analysis/ValueTracking.h"
34	#include "llvm/Analysis/VectorUtils.h"
35	#include "llvm/IR/Argument.h"
36	#include "llvm/IR/AttributeMask.h"
37	#include "llvm/IR/Attributes.h"
38	#include "llvm/IR/BasicBlock.h"
39	#include "llvm/IR/CFG.h"
40	#include "llvm/IR/Constant.h"
41	#include "llvm/IR/ConstantRange.h"
42	#include "llvm/IR/Constants.h"
43	#include "llvm/IR/DataLayout.h"
44	#include "llvm/IR/DebugInfo.h"
45	#include "llvm/IR/DebugInfoMetadata.h"
46	#include "llvm/IR/DebugLoc.h"
47	#include "llvm/IR/DerivedTypes.h"
48	#include "llvm/IR/Dominators.h"
49	#include "llvm/IR/EHPersonalities.h"
50	#include "llvm/IR/Function.h"
51	#include "llvm/IR/GlobalVariable.h"
52	#include "llvm/IR/IRBuilder.h"
53	#include "llvm/IR/InlineAsm.h"
54	#include "llvm/IR/InstrTypes.h"
55	#include "llvm/IR/Instruction.h"
56	#include "llvm/IR/Instructions.h"
57	#include "llvm/IR/IntrinsicInst.h"
58	#include "llvm/IR/Intrinsics.h"
59	#include "llvm/IR/LLVMContext.h"
60	#include "llvm/IR/MDBuilder.h"
61	#include "llvm/IR/Metadata.h"
62	#include "llvm/IR/Module.h"
63	#include "llvm/IR/PatternMatch.h"
64	#include "llvm/IR/ProfDataUtils.h"
65	#include "llvm/IR/Type.h"
66	#include "llvm/IR/User.h"
67	#include "llvm/IR/Value.h"
68	#include "llvm/Support/Casting.h"
69	#include "llvm/Support/CommandLine.h"
70	#include "llvm/Support/ErrorHandling.h"
71	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
72	#include "llvm/Transforms/Utils/Cloning.h"
73	#include "llvm/Transforms/Utils/Local.h"
74	#include "llvm/Transforms/Utils/ValueMapper.h"
75	#include <algorithm>
76	#include <cassert>
77	#include <cstdint>
78	#include <deque>
79	#include <iterator>
80	#include <limits>
81	#include <optional>
82	#include <string>
83	#include <utility>
84	#include <vector>
85
86	#define DEBUG_TYPE "inline-function"
87
88	using namespace llvm;
89	using namespace llvm::memprof;
90	using ProfileCount = Function::ProfileCount;
91
92	static cl::opt<bool>
93	EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(Val: true),
94	cl::Hidden,
95	cl::desc ("Convert noalias attributes to metadata during inlining."));
96
97	static cl::opt<bool>
98	UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
99	cl::init(Val: true),
100	cl::desc ("Use the llvm.experimental.noalias.scope.decl "
101	"intrinsic during inlining."));
102
103	// Disabled by default, because the added alignment assumptions may increase
104	// compile-time and block optimizations. This option is not suitable for use
105	// with frontends that emit comprehensive parameter alignment annotations.
106	static cl::opt<bool>
107	PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
108	cl::init(Val: false), cl::Hidden,
109	cl::desc ("Convert align attributes to assumptions during inlining."));
110
111	static cl::opt<unsigned> InlinerAttributeWindow(
112	"max-inst-checked-for-throw-during-inlining", cl::Hidden,
113	cl::desc ("the maximum number of instructions analyzed for may throw during "
114	"attribute inference in inlined body"),
115	cl::init(Val: `4`));
116
117	namespace {
118
119	/// A class for recording information about inlining a landing pad.
120	class LandingPadInliningInfo {
121	/// Destination of the invoke's unwind.
122	BasicBlock *OuterResumeDest;
123
124	/// Destination for the callee's resume.
125	BasicBlock InnerResumeDest = nullptr*;
126
127	/// LandingPadInst associated with the invoke.
128	LandingPadInst CallerLPad = nullptr*;
129
130	/// PHI for EH values from landingpad insts.
131	PHINode InnerEHValuesPHI = nullptr*;
132
133	SmallVector<Value*, `8`> UnwindDestPHIValues;
134
135	public:
136	LandingPadInliningInfo(InvokeInst *II)
137	: OuterResumeDest(II->getUnwindDest()) {
138	// If there are PHI nodes in the unwind destination block, we need to keep
139	// track of which values came into them from the invoke before removing
140	// the edge from this block.
141	BasicBlock *InvokeBB = II->getParent();
142	BasicBlock::iterator I = OuterResumeDest->begin();
143	for (; isa<PHINode>(Val: I); ++I) {
144	// Save the value to use for this edge.
145	PHINode *PHI = cast<PHINode>(Val&: I);
146	UnwindDestPHIValues.push_back(Elt: PHI->getIncomingValueForBlock(BB: InvokeBB));
147	}
148
149	CallerLPad = cast<LandingPadInst>(Val&: I);
150	}
151
152	/// The outer unwind destination is the target of
153	/// unwind edges introduced for calls within the inlined function.
154	BasicBlock getOuterResumeDest() const* {
155	return OuterResumeDest;
156	}
157
158	BasicBlock *getInnerResumeDest();
159
160	LandingPadInst getLandingPadInst() const* { return CallerLPad; }
161
162	/// Forward the 'resume' instruction to the caller's landing pad block.
163	/// When the landing pad block has only one predecessor, this is
164	/// a simple branch. When there is more than one predecessor, we need to
165	/// split the landing pad block after the landingpad instruction and jump
166	/// to there.
167	void forwardResume(ResumeInst *RI,
168	SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
169
170	/// Add incoming-PHI values to the unwind destination block for the given
171	/// basic block, using the values for the original invoke's source block.
172	void addIncomingPHIValuesFor(BasicBlock BB) const* {
173	addIncomingPHIValuesForInto(src: BB, dest: OuterResumeDest);
174	}
175
176	void addIncomingPHIValuesForInto(BasicBlock src, BasicBlock dest) const {
177	BasicBlock::iterator I = dest->begin();
178	for (unsigned i = `0`, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
179	PHINode *phi = cast<PHINode>(Val&: I);
180	phi->addIncoming(V: UnwindDestPHIValues [i], BB: src);
181	}
182	}
183	};
184	} // end anonymous namespace
185
186	static IntrinsicInst *getConvergenceEntry(BasicBlock &BB) {
187	BasicBlock::iterator It = BB.getFirstNonPHIIt();
188	while (It != BB.end()) {
189	if (auto *IntrinsicCall = dyn_cast<ConvergenceControlInst>(Val&: It)) {
190	if (IntrinsicCall->isEntry()) {
191	return IntrinsicCall;
192	}
193	}
194	It = std::next(x: It);
195	}
196	return nullptr;
197	}
198
199	/// Get or create a target for the branch from ResumeInsts.
200	BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
201	if (InnerResumeDest) return InnerResumeDest;
202
203	// Split the landing pad.
204	BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
205	InnerResumeDest =
206	OuterResumeDest->splitBasicBlock(I: SplitPoint,
207	BBName: OuterResumeDest->getName() + ".body");
208
209	// The number of incoming edges we expect to the inner landing pad.
210	const unsigned PHICapacity = `2`;
211
212	// Create corresponding new PHIs for all the PHIs in the outer landing pad.
213	BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
214	BasicBlock::iterator I = OuterResumeDest->begin();
215	for (unsigned i = `0`, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
216	PHINode *OuterPHI = cast<PHINode>(Val&: I);
217	PHINode *InnerPHI = PHINode::Create(Ty: OuterPHI->getType(), NumReservedValues: PHICapacity,
218	NameStr: OuterPHI->getName() + ".lpad-body");
219	InnerPHI->insertBefore(InsertPos: InsertPoint);
220	OuterPHI->replaceAllUsesWith(V: InnerPHI);
221	InnerPHI->addIncoming(V: OuterPHI, BB: OuterResumeDest);
222	}
223
224	// Create a PHI for the exception values.
225	InnerEHValuesPHI =
226	PHINode::Create(Ty: CallerLPad->getType(), NumReservedValues: PHICapacity, NameStr: "eh.lpad-body");
227	InnerEHValuesPHI->insertBefore(InsertPos: InsertPoint);
228	CallerLPad->replaceAllUsesWith(V: InnerEHValuesPHI);
229	InnerEHValuesPHI->addIncoming(V: CallerLPad, BB: OuterResumeDest);
230
231	// All done.
232	return InnerResumeDest;
233	}
234
235	/// Forward the 'resume' instruction to the caller's landing pad block.
236	/// When the landing pad block has only one predecessor, this is a simple
237	/// branch. When there is more than one predecessor, we need to split the
238	/// landing pad block after the landingpad instruction and jump to there.
239	void LandingPadInliningInfo::forwardResume(
240	ResumeInst RI, SmallPtrSetImpl<LandingPadInst > &InlinedLPads) {
241	BasicBlock *Dest = getInnerResumeDest();
242	BasicBlock *Src = RI->getParent();
243
244	auto *BI = BranchInst::Create(IfTrue: Dest, InsertBefore: Src);
245	BI->setDebugLoc(RI->getDebugLoc());
246
247	// Update the PHIs in the destination. They were inserted in an order which
248	// makes this work.
249	addIncomingPHIValuesForInto(src: Src, dest: Dest);
250
251	InnerEHValuesPHI->addIncoming(V: RI->getOperand(i_nocapture: `0`), BB: Src);
252	RI->eraseFromParent();
253	}
254
255	/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
256	static Value getParentPad(Value EHPad) {
257	if (auto *FPI = dyn_cast<FuncletPadInst>(Val: EHPad))
258	return FPI->getParentPad();
259	return cast<CatchSwitchInst>(Val: EHPad)->getParentPad();
260	}
261
262	using UnwindDestMemoTy = DenseMap<Instruction , Value >;
263
264	/// Helper for getUnwindDestToken that does the descendant-ward part of
265	/// the search.
266	static Value getUnwindDestTokenHelper(Instruction EHPad,
267	UnwindDestMemoTy &MemoMap) {
268	SmallVector<Instruction *, `8`> Worklist(`1`, EHPad);
269
270	while (!Worklist.empty()) {
271	Instruction *CurrentPad = Worklist.pop_back_val();
272	// We only put pads on the worklist that aren't in the MemoMap. When
273	// we find an unwind dest for a pad we may update its ancestors, but
274	// the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
275	// so they should never get updated while queued on the worklist.
276	assert(!MemoMap.count(CurrentPad));
277	Value UnwindDestToken = nullptr*;
278	if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: CurrentPad)) {
279	if (CatchSwitch->hasUnwindDest()) {
280	UnwindDestToken = &*CatchSwitch->getUnwindDest()->getFirstNonPHIIt();
281	} else {
282	// Catchswitch doesn't have a 'nounwind' variant, and one might be
283	// annotated as "unwinds to caller" when really it's nounwind (see
284	// e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
285	// parent's unwind dest from this. We can check its catchpads'
286	// descendants, since they might include a cleanuppad with an
287	// "unwinds to caller" cleanupret, which can be trusted.
288	for (auto HI = CatchSwitch->handler_begin(),
289	HE = CatchSwitch->handler_end();
290	HI != HE && !UnwindDestToken; ++HI) {
291	BasicBlock HandlerBlock = HI;
292	auto *CatchPad =
293	cast<CatchPadInst>(Val: &*HandlerBlock->getFirstNonPHIIt());
294	for (User *Child : CatchPad->users()) {
295	// Intentionally ignore invokes here -- since the catchswitch is
296	// marked "unwind to caller", it would be a verifier error if it
297	// contained an invoke which unwinds out of it, so any invoke we'd
298	// encounter must unwind to some child of the catch.
299	if (!isa<CleanupPadInst>(Val: Child) && !isa<CatchSwitchInst>(Val: Child))
300	continue;
301
302	Instruction *ChildPad = cast<Instruction>(Val: Child);
303	auto Memo = MemoMap.find(Val: ChildPad);
304	if (Memo == MemoMap.end()) {
305	// Haven't figured out this child pad yet; queue it.
306	Worklist.push_back(Elt: ChildPad);
307	continue;
308	}
309	// We've already checked this child, but might have found that
310	// it offers no proof either way.
311	Value *ChildUnwindDestToken = Memo ->second;
312	if (!ChildUnwindDestToken)
313	continue;
314	// We already know the child's unwind dest, which can either
315	// be ConstantTokenNone to indicate unwind to caller, or can
316	// be another child of the catchpad. Only the former indicates
317	// the unwind dest of the catchswitch.
318	if (isa<ConstantTokenNone>(Val: ChildUnwindDestToken)) {
319	UnwindDestToken = ChildUnwindDestToken;
320	break;
321	}
322	assert(getParentPad(ChildUnwindDestToken) == CatchPad);
323	}
324	}
325	}
326	} else {
327	auto *CleanupPad = cast<CleanupPadInst>(Val: CurrentPad);
328	for (User *U : CleanupPad->users()) {
329	if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: U)) {
330	if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
331	UnwindDestToken = &*RetUnwindDest->getFirstNonPHIIt();
332	else
333	UnwindDestToken = ConstantTokenNone::get(Context&: CleanupPad->getContext());
334	break;
335	}
336	Value *ChildUnwindDestToken;
337	if (auto *Invoke = dyn_cast<InvokeInst>(Val: U)) {
338	ChildUnwindDestToken = &*Invoke->getUnwindDest()->getFirstNonPHIIt();
339	} else if (isa<CleanupPadInst>(Val: U) \|\| isa<CatchSwitchInst>(Val: U)) {
340	Instruction *ChildPad = cast<Instruction>(Val: U);
341	auto Memo = MemoMap.find(Val: ChildPad);
342	if (Memo == MemoMap.end()) {
343	// Haven't resolved this child yet; queue it and keep searching.
344	Worklist.push_back(Elt: ChildPad);
345	continue;
346	}
347	// We've checked this child, but still need to ignore it if it
348	// had no proof either way.
349	ChildUnwindDestToken = Memo ->second;
350	if (!ChildUnwindDestToken)
351	continue;
352	} else {
353	// Not a relevant user of the cleanuppad
354	continue;
355	}
356	// In a well-formed program, the child/invoke must either unwind to
357	// an(other) child of the cleanup, or exit the cleanup. In the
358	// first case, continue searching.
359	if (isa<Instruction>(Val: ChildUnwindDestToken) &&
360	getParentPad(EHPad: ChildUnwindDestToken) == CleanupPad)
361	continue;
362	UnwindDestToken = ChildUnwindDestToken;
363	break;
364	}
365	}
366	// If we haven't found an unwind dest for CurrentPad, we may have queued its
367	// children, so move on to the next in the worklist.
368	if (!UnwindDestToken)
369	continue;
370
371	// Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
372	// any ancestors of CurrentPad up to but not including UnwindDestToken's
373	// parent pad. Record this in the memo map, and check to see if the
374	// original EHPad being queried is one of the ones exited.
375	Value *UnwindParent;
376	if (auto *UnwindPad = dyn_cast<Instruction>(Val: UnwindDestToken))
377	UnwindParent = getParentPad(EHPad: UnwindPad);
378	else
379	UnwindParent = nullptr;
380	bool ExitedOriginalPad = false;
381	for (Instruction *ExitedPad = CurrentPad;
382	ExitedPad && ExitedPad != UnwindParent;
383	ExitedPad = dyn_cast<Instruction>(Val: getParentPad(EHPad: ExitedPad))) {
384	// Skip over catchpads since they just follow their catchswitches.
385	if (isa<CatchPadInst>(Val: ExitedPad))
386	continue;
387	MemoMap [ExitedPad] = UnwindDestToken;
388	ExitedOriginalPad \|= (ExitedPad == EHPad);
389	}
390
391	if (ExitedOriginalPad)
392	return UnwindDestToken;
393
394	// Continue the search.
395	}
396
397	// No definitive information is contained within this funclet.
398	return nullptr;
399	}
400
401	/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
402	/// return that pad instruction. If it unwinds to caller, return
403	/// ConstantTokenNone. If it does not have a definitive unwind destination,
404	/// return nullptr.
405	///
406	/// This routine gets invoked for calls in funclets in inlinees when inlining
407	/// an invoke. Since many funclets don't have calls inside them, it's queried
408	/// on-demand rather than building a map of pads to unwind dests up front.
409	/// Determining a funclet's unwind dest may require recursively searching its
410	/// descendants, and also ancestors and cousins if the descendants don't provide
411	/// an answer. Since most funclets will have their unwind dest immediately
412	/// available as the unwind dest of a catchswitch or cleanupret, this routine
413	/// searches top-down from the given pad and then up. To avoid worst-case
414	/// quadratic run-time given that approach, it uses a memo map to avoid
415	/// re-processing funclet trees. The callers that rewrite the IR as they go
416	/// take advantage of this, for correctness, by checking/forcing rewritten
417	/// pads' entries to match the original callee view.
418	static Value getUnwindDestToken(Instruction EHPad,
419	UnwindDestMemoTy &MemoMap) {
420	// Catchpads unwind to the same place as their catchswitch;
421	// redirct any queries on catchpads so the code below can
422	// deal with just catchswitches and cleanuppads.
423	if (auto *CPI = dyn_cast<CatchPadInst>(Val: EHPad))
424	EHPad = CPI->getCatchSwitch();
425
426	// Check if we've already determined the unwind dest for this pad.
427	auto Memo = MemoMap.find(Val: EHPad);
428	if (Memo != MemoMap.end())
429	return Memo ->second;
430
431	// Search EHPad and, if necessary, its descendants.
432	Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
433	assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != `0`));
434	if (UnwindDestToken)
435	return UnwindDestToken;
436
437	// No information is available for this EHPad from itself or any of its
438	// descendants. An unwind all the way out to a pad in the caller would
439	// need also to agree with the unwind dest of the parent funclet, so
440	// search up the chain to try to find a funclet with information. Put
441	// null entries in the memo map to avoid re-processing as we go up.
442	MemoMap [EHPad] = nullptr;
443	#ifndef NDEBUG
444	SmallPtrSet<Instruction *, `4`> TempMemos;
445	TempMemos.insert(EHPad);
446	#endif
447	Instruction *LastUselessPad = EHPad;
448	Value *AncestorToken;
449	for (AncestorToken = getParentPad(EHPad);
450	auto *AncestorPad = dyn_cast<Instruction>(Val: AncestorToken);
451	AncestorToken = getParentPad(EHPad: AncestorToken)) {
452	// Skip over catchpads since they just follow their catchswitches.
453	if (isa<CatchPadInst>(Val: AncestorPad))
454	continue;
455	// If the MemoMap had an entry mapping AncestorPad to nullptr, since we
456	// haven't yet called getUnwindDestTokenHelper for AncestorPad in this
457	// call to getUnwindDestToken, that would mean that AncestorPad had no
458	// information in itself, its descendants, or its ancestors. If that
459	// were the case, then we should also have recorded the lack of information
460	// for the descendant that we're coming from. So assert that we don't
461	// find a null entry in the MemoMap for AncestorPad.
462	assert(!MemoMap.count(AncestorPad) \|\| MemoMap[AncestorPad]);
463	auto AncestorMemo = MemoMap.find(Val: AncestorPad);
464	if (AncestorMemo == MemoMap.end()) {
465	UnwindDestToken = getUnwindDestTokenHelper(EHPad: AncestorPad, MemoMap);
466	} else {
467	UnwindDestToken = AncestorMemo ->second;
468	}
469	if (UnwindDestToken)
470	break;
471	LastUselessPad = AncestorPad;
472	MemoMap [LastUselessPad] = nullptr;
473	#ifndef NDEBUG
474	TempMemos.insert(LastUselessPad);
475	#endif
476	}
477
478	// We know that getUnwindDestTokenHelper was called on LastUselessPad and
479	// returned nullptr (and likewise for EHPad and any of its ancestors up to
480	// LastUselessPad), so LastUselessPad has no information from below. Since
481	// getUnwindDestTokenHelper must investigate all downward paths through
482	// no-information nodes to prove that a node has no information like this,
483	// and since any time it finds information it records it in the MemoMap for
484	// not just the immediately-containing funclet but also any ancestors also
485	// exited, it must be the case that, walking downward from LastUselessPad,
486	// visiting just those nodes which have not been mapped to an unwind dest
487	// by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
488	// they are just used to keep getUnwindDestTokenHelper from repeating work),
489	// any node visited must have been exhaustively searched with no information
490	// for it found.
491	SmallVector<Instruction *, `8`> Worklist(`1`, LastUselessPad);
492	while (!Worklist.empty()) {
493	Instruction *UselessPad = Worklist.pop_back_val();
494	auto Memo = MemoMap.find(Val: UselessPad);
495	if (Memo != MemoMap.end() && Memo ->second) {
496	// Here the name 'UselessPad' is a bit of a misnomer, because we've found
497	// that it is a funclet that does have information about unwinding to
498	// a particular destination; its parent was a useless pad.
499	// Since its parent has no information, the unwind edge must not escape
500	// the parent, and must target a sibling of this pad. This local unwind
501	// gives us no information about EHPad. Leave it and the subtree rooted
502	// at it alone.
503	assert(getParentPad(Memo->second) == getParentPad(UselessPad));
504	continue;
505	}
506	// We know we don't have information for UselesPad. If it has an entry in
507	// the MemoMap (mapping it to nullptr), it must be one of the TempMemos
508	// added on this invocation of getUnwindDestToken; if a previous invocation
509	// recorded nullptr, it would have had to prove that the ancestors of
510	// UselessPad, which include LastUselessPad, had no information, and that
511	// in turn would have required proving that the descendants of
512	// LastUselesPad, which include EHPad, have no information about
513	// LastUselessPad, which would imply that EHPad was mapped to nullptr in
514	// the MemoMap on that invocation, which isn't the case if we got here.
515	assert(!MemoMap.count(UselessPad) \|\| TempMemos.count(UselessPad));
516	// Assert as we enumerate users that 'UselessPad' doesn't have any unwind
517	// information that we'd be contradicting by making a map entry for it
518	// (which is something that getUnwindDestTokenHelper must have proved for
519	// us to get here). Just assert on is direct users here; the checks in
520	// this downward walk at its descendants will verify that they don't have
521	// any unwind edges that exit 'UselessPad' either (i.e. they either have no
522	// unwind edges or unwind to a sibling).
523	MemoMap [UselessPad] = UnwindDestToken;
524	if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: UselessPad)) {
525	assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
526	for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
527	auto CatchPad = &HandlerBlock->getFirstNonPHIIt();
528	for (User *U : CatchPad->users()) {
529	assert((!isa<InvokeInst>(U) \|\|
530	(getParentPad(&*cast<InvokeInst>(U)
531	->getUnwindDest()
532	->getFirstNonPHIIt()) == CatchPad)) &&
533	"Expected useless pad");
534	if (isa<CatchSwitchInst>(Val: U) \|\| isa<CleanupPadInst>(Val: U))
535	Worklist.push_back(Elt: cast<Instruction>(Val: U));
536	}
537	}
538	} else {
539	assert(isa<CleanupPadInst>(UselessPad));
540	for (User *U : UselessPad->users()) {
541	assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
542	assert(
543	(!isa<InvokeInst>(U) \|\|
544	(getParentPad(
545	&*cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHIIt()) ==
546	UselessPad)) &&
547	"Expected useless pad");
548	if (isa<CatchSwitchInst>(Val: U) \|\| isa<CleanupPadInst>(Val: U))
549	Worklist.push_back(Elt: cast<Instruction>(Val: U));
550	}
551	}
552	}
553
554	return UnwindDestToken;
555	}
556
557	/// When we inline a basic block into an invoke,
558	/// we have to turn all of the calls that can throw into invokes.
559	/// This function analyze BB to see if there are any calls, and if so,
560	/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
561	/// nodes in that block with the values specified in InvokeDestPHIValues.
562	static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
563	BasicBlock BB, BasicBlock UnwindEdge,
564	UnwindDestMemoTy FuncletUnwindMap = nullptr*) {
565	for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
566	// We only need to check for function calls: inlined invoke
567	// instructions require no special handling.
568	CallInst *CI = dyn_cast<CallInst>(Val: &I);
569
570	if (!CI \|\| CI->doesNotThrow())
571	continue;
572
573	// We do not need to (and in fact, cannot) convert possibly throwing calls
574	// to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
575	// invokes. The caller's "segment" of the deoptimization continuation
576	// attached to the newly inlined @llvm.experimental_deoptimize
577	// (resp. @llvm.experimental.guard) call should contain the exception
578	// handling logic, if any.
579	if (auto *F = CI->getCalledFunction())
580	if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize \|\|
581	F->getIntrinsicID() == Intrinsic::experimental_guard)
582	continue;
583
584	if (auto FuncletBundle = CI->getOperandBundle(ID: LLVMContext::OB_funclet)) {
585	// This call is nested inside a funclet. If that funclet has an unwind
586	// destination within the inlinee, then unwinding out of this call would
587	// be UB. Rewriting this call to an invoke which targets the inlined
588	// invoke's unwind dest would give the call's parent funclet multiple
589	// unwind destinations, which is something that subsequent EH table
590	// generation can't handle and that the veirifer rejects. So when we
591	// see such a call, leave it as a call.
592	auto *FuncletPad = cast<Instruction>(Val: FuncletBundle ->Inputs [`0`]);
593	Value *UnwindDestToken =
594	getUnwindDestToken(EHPad: FuncletPad, MemoMap&: *FuncletUnwindMap);
595	if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
596	continue;
597	#ifndef NDEBUG
598	Instruction *MemoKey;
599	if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
600	MemoKey = CatchPad->getCatchSwitch();
601	else
602	MemoKey = FuncletPad;
603	assert(FuncletUnwindMap->count(MemoKey) &&
604	(*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
605	"must get memoized to avoid confusing later searches");
606	#endif // NDEBUG
607	}
608
609	changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
610	return BB;
611	}
612	return nullptr;
613	}
614
615	/// If we inlined an invoke site, we need to convert calls
616	/// in the body of the inlined function into invokes.
617	///
618	/// II is the invoke instruction being inlined. FirstNewBlock is the first
619	/// block of the inlined code (the last block is the end of the function),
620	/// and InlineCodeInfo is information about the code that got inlined.
621	static void HandleInlinedLandingPad(InvokeInst II, BasicBlock FirstNewBlock,
622	ClonedCodeInfo &InlinedCodeInfo) {
623	BasicBlock *InvokeDest = II->getUnwindDest();
624
625	Function *Caller = FirstNewBlock->getParent();
626
627	// The inlined code is currently at the end of the function, scan from the
628	// start of the inlined code to its end, checking for stuff we need to
629	// rewrite.
630	LandingPadInliningInfo Invoke(II);
631
632	// Get all of the inlined landing pad instructions.
633	SmallPtrSet<LandingPadInst*, `16`> InlinedLPads;
634	for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
635	I != E; ++I)
636	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: I ->getTerminator()))
637	InlinedLPads.insert(Ptr: II->getLandingPadInst());
638
639	// Append the clauses from the outer landing pad instruction into the inlined
640	// landing pad instructions.
641	LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
642	for (LandingPadInst *InlinedLPad : InlinedLPads) {
643	unsigned OuterNum = OuterLPad->getNumClauses();
644	InlinedLPad->reserveClauses(Size: OuterNum);
645	for (unsigned OuterIdx = `0`; OuterIdx != OuterNum; ++OuterIdx)
646	InlinedLPad->addClause(ClauseVal: OuterLPad->getClause(Idx: OuterIdx));
647	if (OuterLPad->isCleanup())
648	InlinedLPad->setCleanup(true);
649	}
650
651	for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
652	BB != E; ++BB) {
653	if (InlinedCodeInfo.ContainsCalls)
654	if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
655	BB: &*BB, UnwindEdge: Invoke.getOuterResumeDest()))
656	// Update any PHI nodes in the exceptional block to indicate that there
657	// is now a new entry in them.
658	Invoke.addIncomingPHIValuesFor(BB: NewBB);
659
660	// Forward any resumes that are remaining here.
661	if (ResumeInst *RI = dyn_cast<ResumeInst>(Val: BB ->getTerminator()))
662	Invoke.forwardResume(RI, InlinedLPads);
663	}
664
665	// Now that everything is happy, we have one final detail. The PHI nodes in
666	// the exception destination block still have entries due to the original
667	// invoke instruction. Eliminate these entries (which might even delete the
668	// PHI node) now.
669	InvokeDest->removePredecessor(Pred: II->getParent());
670	}
671
672	/// If we inlined an invoke site, we need to convert calls
673	/// in the body of the inlined function into invokes.
674	///
675	/// II is the invoke instruction being inlined. FirstNewBlock is the first
676	/// block of the inlined code (the last block is the end of the function),
677	/// and InlineCodeInfo is information about the code that got inlined.
678	static void HandleInlinedEHPad(InvokeInst II, BasicBlock FirstNewBlock,
679	ClonedCodeInfo &InlinedCodeInfo) {
680	BasicBlock *UnwindDest = II->getUnwindDest();
681	Function *Caller = FirstNewBlock->getParent();
682
683	assert(UnwindDest->getFirstNonPHIIt()->isEHPad() && "unexpected BasicBlock!");
684
685	// If there are PHI nodes in the unwind destination block, we need to keep
686	// track of which values came into them from the invoke before removing the
687	// edge from this block.
688	SmallVector<Value *, `8`> UnwindDestPHIValues;
689	BasicBlock *InvokeBB = II->getParent();
690	for (PHINode &PHI : UnwindDest->phis()) {
691	// Save the value to use for this edge.
692	UnwindDestPHIValues.push_back(Elt: PHI.getIncomingValueForBlock(BB: InvokeBB));
693	}
694
695	// Add incoming-PHI values to the unwind destination block for the given basic
696	// block, using the values for the original invoke's source block.
697	auto UpdatePHINodes = [&](BasicBlock *Src) {
698	BasicBlock::iterator I = UnwindDest->begin();
699	for (Value *V : UnwindDestPHIValues) {
700	PHINode *PHI = cast<PHINode>(Val&: I);
701	PHI->addIncoming(V, BB: Src);
702	++I;
703	}
704	};
705
706	// This connects all the instructions which 'unwind to caller' to the invoke
707	// destination.
708	UnwindDestMemoTy FuncletUnwindMap;
709	for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
710	BB != E; ++BB) {
711	if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: BB ->getTerminator())) {
712	if (CRI->unwindsToCaller()) {
713	auto *CleanupPad = CRI->getCleanupPad();
714	CleanupReturnInst::Create(CleanupPad, UnwindBB: UnwindDest, InsertBefore: CRI->getIterator());
715	CRI->eraseFromParent();
716	UpdatePHINodes (&*BB);
717	// Finding a cleanupret with an unwind destination would confuse
718	// subsequent calls to getUnwindDestToken, so map the cleanuppad
719	// to short-circuit any such calls and recognize this as an "unwind
720	// to caller" cleanup.
721	assert(!FuncletUnwindMap.count(CleanupPad) \|\|
722	isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
723	FuncletUnwindMap [CleanupPad] =
724	ConstantTokenNone::get(Context&: Caller->getContext());
725	}
726	}
727
728	BasicBlock::iterator I = BB ->getFirstNonPHIIt();
729	if (!I ->isEHPad())
730	continue;
731
732	Instruction Replacement = nullptr*;
733	if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) {
734	if (CatchSwitch->unwindsToCaller()) {
735	Value *UnwindDestToken;
736	if (auto *ParentPad =
737	dyn_cast<Instruction>(Val: CatchSwitch->getParentPad())) {
738	// This catchswitch is nested inside another funclet. If that
739	// funclet has an unwind destination within the inlinee, then
740	// unwinding out of this catchswitch would be UB. Rewriting this
741	// catchswitch to unwind to the inlined invoke's unwind dest would
742	// give the parent funclet multiple unwind destinations, which is
743	// something that subsequent EH table generation can't handle and
744	// that the veirifer rejects. So when we see such a call, leave it
745	// as "unwind to caller".
746	UnwindDestToken = getUnwindDestToken(EHPad: ParentPad, MemoMap&: FuncletUnwindMap);
747	if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
748	continue;
749	} else {
750	// This catchswitch has no parent to inherit constraints from, and
751	// none of its descendants can have an unwind edge that exits it and
752	// targets another funclet in the inlinee. It may or may not have a
753	// descendant that definitively has an unwind to caller. In either
754	// case, we'll have to assume that any unwinds out of it may need to
755	// be routed to the caller, so treat it as though it has a definitive
756	// unwind to caller.
757	UnwindDestToken = ConstantTokenNone::get(Context&: Caller->getContext());
758	}
759	auto *NewCatchSwitch = CatchSwitchInst::Create(
760	ParentPad: CatchSwitch->getParentPad(), UnwindDest,
761	NumHandlers: CatchSwitch->getNumHandlers(), NameStr: CatchSwitch->getName(),
762	InsertBefore: CatchSwitch->getIterator());
763	for (BasicBlock *PadBB : CatchSwitch->handlers())
764	NewCatchSwitch->addHandler(Dest: PadBB);
765	// Propagate info for the old catchswitch over to the new one in
766	// the unwind map. This also serves to short-circuit any subsequent
767	// checks for the unwind dest of this catchswitch, which would get
768	// confused if they found the outer handler in the callee.
769	FuncletUnwindMap [NewCatchSwitch] = UnwindDestToken;
770	Replacement = NewCatchSwitch;
771	}
772	} else if (!isa<FuncletPadInst>(Val: I)) {
773	llvm_unreachable("unexpected EHPad!");
774	}
775
776	if (Replacement) {
777	Replacement->takeName(V: &*I);
778	I ->replaceAllUsesWith(V: Replacement);
779	I ->eraseFromParent();
780	UpdatePHINodes (&*BB);
781	}
782	}
783
784	if (InlinedCodeInfo.ContainsCalls)
785	for (Function::iterator BB = FirstNewBlock->getIterator(),
786	E = Caller->end();
787	BB != E; ++BB)
788	if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
789	BB: &*BB, UnwindEdge: UnwindDest, FuncletUnwindMap: &FuncletUnwindMap))
790	// Update any PHI nodes in the exceptional block to indicate that there
791	// is now a new entry in them.
792	UpdatePHINodes (NewBB);
793
794	// Now that everything is happy, we have one final detail. The PHI nodes in
795	// the exception destination block still have entries due to the original
796	// invoke instruction. Eliminate these entries (which might even delete the
797	// PHI node) now.
798	UnwindDest->removePredecessor(Pred: InvokeBB);
799	}
800
801	static bool haveCommonPrefix(MDNode *MIBStackContext,
802	MDNode *CallsiteStackContext) {
803	assert(MIBStackContext->getNumOperands() > `0` &&
804	CallsiteStackContext->getNumOperands() > `0`);
805	// Because of the context trimming performed during matching, the callsite
806	// context could have more stack ids than the MIB. We match up to the end of
807	// the shortest stack context.
808	for (auto MIBStackIter = MIBStackContext->op_begin(),
809	CallsiteStackIter = CallsiteStackContext->op_begin();
810	MIBStackIter != MIBStackContext->op_end() &&
811	CallsiteStackIter != CallsiteStackContext->op_end();
812	MIBStackIter++, CallsiteStackIter++) {
813	auto Val1 = mdconst::dyn_extract<ConstantInt>(MD: MIBStackIter);
814	auto Val2 = mdconst::dyn_extract<ConstantInt>(MD: CallsiteStackIter);
815	assert(Val1 && Val2);
816	if (Val1->getZExtValue() != Val2->getZExtValue())
817	return false;
818	}
819	return true;
820	}
821
822	static void removeMemProfMetadata(CallBase *Call) {
823	Call->setMetadata(KindID: LLVMContext::MD_memprof, Node: nullptr);
824	}
825
826	static void removeCallsiteMetadata(CallBase *Call) {
827	Call->setMetadata(KindID: LLVMContext::MD_callsite, Node: nullptr);
828	}
829
830	static void updateMemprofMetadata(CallBase *CI,
831	const std::vector<Metadata *> &MIBList,
832	OptimizationRemarkEmitter *ORE) {
833	assert(!MIBList.empty());
834	// Remove existing memprof, which will either be replaced or may not be needed
835	// if we are able to use a single allocation type function attribute.
836	removeMemProfMetadata(Call: CI);
837	CallStackTrie CallStack(ORE);
838	for (Metadata *MIB : MIBList)
839	CallStack.addCallStack(MIB: cast<MDNode>(Val: MIB));
840	bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI);
841	assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof));
842	if (!MemprofMDAttached)
843	// If we used a function attribute remove the callsite metadata as well.
844	removeCallsiteMetadata(Call: CI);
845	}
846
847	// Update the metadata on the inlined copy ClonedCall of a call OrigCall in the
848	// inlined callee body, based on the callsite metadata InlinedCallsiteMD from
849	// the call that was inlined.
850	static void propagateMemProfHelper(const CallBase *OrigCall,
851	CallBase *ClonedCall,
852	MDNode *InlinedCallsiteMD,
853	OptimizationRemarkEmitter *ORE) {
854	MDNode *OrigCallsiteMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_callsite);
855	MDNode ClonedCallsiteMD = nullptr*;
856	// Check if the call originally had callsite metadata, and update it for the
857	// new call in the inlined body.
858	if (OrigCallsiteMD) {
859	// The cloned call's context is now the concatenation of the original call's
860	// callsite metadata and the callsite metadata on the call where it was
861	// inlined.
862	ClonedCallsiteMD = MDNode::concatenate(A: OrigCallsiteMD, B: InlinedCallsiteMD);
863	ClonedCall->setMetadata(KindID: LLVMContext::MD_callsite, Node: ClonedCallsiteMD);
864	}
865
866	// Update any memprof metadata on the cloned call.
867	MDNode *OrigMemProfMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_memprof);
868	if (!OrigMemProfMD)
869	return;
870	// We currently expect that allocations with memprof metadata also have
871	// callsite metadata for the allocation's part of the context.
872	assert(OrigCallsiteMD);
873
874	// New call's MIB list.
875	std::vector<Metadata *> NewMIBList;
876
877	// For each MIB metadata, check if its call stack context starts with the
878	// new clone's callsite metadata. If so, that MIB goes onto the cloned call in
879	// the inlined body. If not, it stays on the out-of-line original call.
880	for (auto &MIBOp : OrigMemProfMD->operands()) {
881	MDNode *MIB = dyn_cast<MDNode>(Val: MIBOp);
882	// Stack is first operand of MIB.
883	MDNode *StackMD = getMIBStackNode(MIB);
884	assert(StackMD);
885	// See if the new cloned callsite context matches this profiled context.
886	if (haveCommonPrefix(MIBStackContext: StackMD, CallsiteStackContext: ClonedCallsiteMD))
887	// Add it to the cloned call's MIB list.
888	NewMIBList.push_back(x: MIB);
889	}
890	if (NewMIBList.empty()) {
891	removeMemProfMetadata(Call: ClonedCall);
892	removeCallsiteMetadata(Call: ClonedCall);
893	return;
894	}
895	if (NewMIBList.size() < OrigMemProfMD->getNumOperands())
896	updateMemprofMetadata(CI: ClonedCall, MIBList: NewMIBList, ORE);
897	}
898
899	// Update memprof related metadata (!memprof and !callsite) based on the
900	// inlining of Callee into the callsite at CB. The updates include merging the
901	// inlined callee's callsite metadata with that of the inlined call,
902	// and moving the subset of any memprof contexts to the inlined callee
903	// allocations if they match the new inlined call stack.
904	static void
905	propagateMemProfMetadata(Function *Callee, CallBase &CB,
906	bool ContainsMemProfMetadata,
907	const ValueMap<const Value *, WeakTrackingVH> &VMap,
908	OptimizationRemarkEmitter *ORE) {
909	MDNode *CallsiteMD = CB.getMetadata(KindID: LLVMContext::MD_callsite);
910	// Only need to update if the inlined callsite had callsite metadata, or if
911	// there was any memprof metadata inlined.
912	if (!CallsiteMD && !ContainsMemProfMetadata)
913	return;
914
915	// Propagate metadata onto the cloned calls in the inlined callee.
916	for (const auto &Entry : VMap) {
917	// See if this is a call that has been inlined and remapped, and not
918	// simplified away in the process.
919	auto *OrigCall = dyn_cast_or_null<CallBase>(Val: Entry.first);
920	auto *ClonedCall = dyn_cast_or_null<CallBase>(Val: Entry.second);
921	if (!OrigCall \|\| !ClonedCall)
922	continue;
923	// If the inlined callsite did not have any callsite metadata, then it isn't
924	// involved in any profiled call contexts, and we can remove any memprof
925	// metadata on the cloned call.
926	if (!CallsiteMD) {
927	removeMemProfMetadata(Call: ClonedCall);
928	removeCallsiteMetadata(Call: ClonedCall);
929	continue;
930	}
931	propagateMemProfHelper(OrigCall, ClonedCall, InlinedCallsiteMD: CallsiteMD, ORE);
932	}
933	}
934
935	/// When inlining a call site that has !llvm.mem.parallel_loop_access,
936	/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
937	/// be propagated to all memory-accessing cloned instructions.
938	static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
939	Function::iterator FEnd) {
940	MDNode *MemParallelLoopAccess =
941	CB.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
942	MDNode *AccessGroup = CB.getMetadata(KindID: LLVMContext::MD_access_group);
943	MDNode *AliasScope = CB.getMetadata(KindID: LLVMContext::MD_alias_scope);
944	MDNode *NoAlias = CB.getMetadata(KindID: LLVMContext::MD_noalias);
945	if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
946	return;
947
948	for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
949	for (Instruction &I : BB) {
950	// This metadata is only relevant for instructions that access memory.
951	if (!I.mayReadOrWriteMemory())
952	continue;
953
954	if (MemParallelLoopAccess) {
955	// TODO: This probably should not overwrite MemParalleLoopAccess.
956	MemParallelLoopAccess = MDNode::concatenate(
957	A: I.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access),
958	B: MemParallelLoopAccess);
959	I.setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access,
960	Node: MemParallelLoopAccess);
961	}
962
963	if (AccessGroup)
964	I.setMetadata(KindID: LLVMContext::MD_access_group, Node: uniteAccessGroups(
965	AccGroups1: I.getMetadata(KindID: LLVMContext::MD_access_group), AccGroups2: AccessGroup));
966
967	if (AliasScope)
968	I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MDNode::concatenate(
969	A: I.getMetadata(KindID: LLVMContext::MD_alias_scope), B: AliasScope));
970
971	if (NoAlias)
972	I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::concatenate(
973	A: I.getMetadata(KindID: LLVMContext::MD_noalias), B: NoAlias));
974	}
975	}
976	}
977
978	/// Bundle operands of the inlined function must be added to inlined call sites.
979	static void PropagateOperandBundles(Function::iterator InlinedBB,
980	Instruction *CallSiteEHPad) {
981	for (Instruction &II : llvm::make_early_inc_range(Range&: *InlinedBB)) {
982	CallBase *I = dyn_cast<CallBase>(Val: &II);
983	if (!I)
984	continue;
985	// Skip call sites which already have a "funclet" bundle.
986	if (I->getOperandBundle(ID: LLVMContext::OB_funclet))
987	continue;
988	// Skip call sites which are nounwind intrinsics (as long as they don't
989	// lower into regular function calls in the course of IR transformations).
990	auto *CalledFn =
991	dyn_cast<Function>(Val: I->getCalledOperand()->stripPointerCasts());
992	if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() &&
993	!IntrinsicInst::mayLowerToFunctionCall(IID: CalledFn->getIntrinsicID()))
994	continue;
995
996	SmallVector<OperandBundleDef, `1`> OpBundles;
997	I->getOperandBundlesAsDefs(Defs&: OpBundles);
998	OpBundles.emplace_back(Args: "funclet", Args&: CallSiteEHPad);
999
1000	Instruction *NewInst = CallBase::Create(CB: I, Bundles: OpBundles, InsertPt: I->getIterator());
1001	NewInst->takeName(V: I);
1002	I->replaceAllUsesWith(V: NewInst);
1003	I->eraseFromParent();
1004	}
1005	}
1006
1007	namespace {
1008	/// Utility for cloning !noalias and !alias.scope metadata. When a code region
1009	/// using scoped alias metadata is inlined, the aliasing relationships may not
1010	/// hold between the two version. It is necessary to create a deep clone of the
1011	/// metadata, putting the two versions in separate scope domains.
1012	class ScopedAliasMetadataDeepCloner {
1013	using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;
1014	SetVector<const MDNode *> MD;
1015	MetadataMap MDMap;
1016	void addRecursiveMetadataUses();
1017
1018	public:
1019	ScopedAliasMetadataDeepCloner(const Function *F);
1020
1021	/// Create a new clone of the scoped alias metadata, which will be used by
1022	/// subsequent remap() calls.
1023	void clone();
1024
1025	/// Remap instructions in the given range from the original to the cloned
1026	/// metadata.
1027	void remap(Function::iterator FStart, Function::iterator FEnd);
1028	};
1029	} // namespace
1030
1031	ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
1032	const Function *F) {
1033	for (const BasicBlock &BB : *F) {
1034	for (const Instruction &I : BB) {
1035	if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1036	MD.insert(X: M);
1037	if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1038	MD.insert(X: M);
1039
1040	// We also need to clone the metadata in noalias intrinsics.
1041	if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1042	MD.insert(X: Decl->getScopeList());
1043	}
1044	}
1045	addRecursiveMetadataUses();
1046	}
1047
1048	void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {
1049	SmallVector<const Metadata *, `16`> Queue(MD.begin(), MD.end());
1050	while (!Queue.empty()) {
1051	const MDNode *M = cast<MDNode>(Val: Queue.pop_back_val());
1052	for (const Metadata *Op : M->operands())
1053	if (const MDNode *OpMD = dyn_cast<MDNode>(Val: Op))
1054	if (MD.insert(X: OpMD))
1055	Queue.push_back(Elt: OpMD);
1056	}
1057	}
1058
1059	void ScopedAliasMetadataDeepCloner::clone() {
1060	assert(MDMap.empty() && "clone() already called ?");
1061
1062	SmallVector<TempMDTuple, `16`> DummyNodes;
1063	for (const MDNode *I : MD) {
1064	DummyNodes.push_back(Elt: MDTuple::getTemporary(Context&: I->getContext(), MDs: {}));
1065	MDMap [I].reset(MD: DummyNodes.back().get());
1066	}
1067
1068	// Create new metadata nodes to replace the dummy nodes, replacing old
1069	// metadata references with either a dummy node or an already-created new
1070	// node.
1071	SmallVector<Metadata *, `4`> NewOps;
1072	for (const MDNode *I : MD) {
1073	for (const Metadata *Op : I->operands()) {
1074	if (const MDNode *M = dyn_cast<MDNode>(Val: Op))
1075	NewOps.push_back(Elt: MDMap [M]);
1076	else
1077	NewOps.push_back(Elt: const_cast<Metadata *>(Op));
1078	}
1079
1080	MDNode *NewM = MDNode::get(Context&: I->getContext(), MDs: NewOps);
1081	MDTuple *TempM = cast<MDTuple>(Val&: MDMap [I]);
1082	assert(TempM->isTemporary() && "Expected temporary node");
1083
1084	TempM->replaceAllUsesWith(MD: NewM);
1085	NewOps.clear();
1086	}
1087	}
1088
1089	void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
1090	Function::iterator FEnd) {
1091	if (MDMap.empty())
1092	return; // Nothing to do.
1093
1094	for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
1095	for (Instruction &I : BB) {
1096	// TODO: The null checks for the MDMap.lookup() results should no longer
1097	// be necessary.
1098	if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1099	if (MDNode *MNew = MDMap.lookup(Val: M))
1100	I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MNew);
1101
1102	if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1103	if (MDNode *MNew = MDMap.lookup(Val: M))
1104	I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MNew);
1105
1106	if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1107	if (MDNode *MNew = MDMap.lookup(Val: Decl->getScopeList()))
1108	Decl->setScopeList(MNew);
1109	}
1110	}
1111	}
1112
1113	/// If the inlined function has noalias arguments,
1114	/// then add new alias scopes for each noalias argument, tag the mapped noalias
1115	/// parameters with noalias metadata specifying the new scope, and tag all
1116	/// non-derived loads, stores and memory intrinsics with the new alias scopes.
1117	static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
1118	const DataLayout &DL, AAResults *CalleeAAR,
1119	ClonedCodeInfo &InlinedFunctionInfo) {
1120	if (!EnableNoAliasConversion)
1121	return;
1122
1123	const Function *CalledFunc = CB.getCalledFunction();
1124	SmallVector<const Argument *, `4`> NoAliasArgs;
1125
1126	for (const Argument &Arg : CalledFunc->args())
1127	if (CB.paramHasAttr(ArgNo: Arg.getArgNo(), Kind: Attribute::NoAlias) && !Arg.use_empty())
1128	NoAliasArgs.push_back(Elt: &Arg);
1129
1130	if (NoAliasArgs.empty())
1131	return;
1132
1133	// To do a good job, if a noalias variable is captured, we need to know if
1134	// the capture point dominates the particular use we're considering.
1135	DominatorTree DT;
1136	DT.recalculate(Func&: const_cast<Function&>(*CalledFunc));
1137
1138	// noalias indicates that pointer values based on the argument do not alias
1139	// pointer values which are not based on it. So we add a new "scope" for each
1140	// noalias function argument. Accesses using pointers based on that argument
1141	// become part of that alias scope, accesses using pointers not based on that
1142	// argument are tagged as noalias with that scope.
1143
1144	DenseMap<const Argument , MDNode > NewScopes;
1145	MDBuilder MDB(CalledFunc->getContext());
1146
1147	// Create a new scope domain for this function.
1148	MDNode *NewDomain =
1149	MDB.createAnonymousAliasScopeDomain(Name: CalledFunc->getName());
1150	for (unsigned i = `0`, e = NoAliasArgs.size(); i != e; ++i) {
1151	const Argument *A = NoAliasArgs [i];
1152
1153	std::string Name = std::string (CalledFunc->getName());
1154	if (A->hasName()) {
1155	Name += ": %";
1156	Name += A->getName();
1157	} else {
1158	Name += ": argument ";
1159	Name += utostr(X: i);
1160	}
1161
1162	// Note: We always create a new anonymous root here. This is true regardless
1163	// of the linkage of the callee because the aliasing "scope" is not just a
1164	// property of the callee, but also all control dependencies in the caller.
1165	MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name);
1166	NewScopes.insert(KV: std::make_pair(x&: A, y&: NewScope));
1167
1168	if (UseNoAliasIntrinsic) {
1169	// Introduce a llvm.experimental.noalias.scope.decl for the noalias
1170	// argument.
1171	MDNode *AScopeList = MDNode::get(Context&: CalledFunc->getContext(), MDs: NewScope);
1172	auto *NoAliasDecl =
1173	IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(ScopeTag: AScopeList);
1174	// Ignore the result for now. The result will be used when the
1175	// llvm.noalias intrinsic is introduced.
1176	(void)NoAliasDecl;
1177	}
1178	}
1179
1180	// Iterate over all new instructions in the map; for all memory-access
1181	// instructions, add the alias scope metadata.
1182	for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
1183	VMI != VMIE; ++VMI) {
1184	if (const Instruction *I = dyn_cast<Instruction>(Val: VMI ->first)) {
1185	if (!VMI ->second)
1186	continue;
1187
1188	Instruction *NI = dyn_cast<Instruction>(Val&: VMI ->second);
1189	if (!NI \|\| InlinedFunctionInfo.isSimplified(From: I, To: NI))
1190	continue;
1191
1192	bool IsArgMemOnlyCall = false, IsFuncCall = false;
1193	SmallVector<const Value *, `2`> PtrArgs;
1194
1195	if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I))
1196	PtrArgs.push_back(Elt: LI->getPointerOperand());
1197	else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1198	PtrArgs.push_back(Elt: SI->getPointerOperand());
1199	else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(Val: I))
1200	PtrArgs.push_back(Elt: VAAI->getPointerOperand());
1201	else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Val: I))
1202	PtrArgs.push_back(Elt: CXI->getPointerOperand());
1203	else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Val: I))
1204	PtrArgs.push_back(Elt: RMWI->getPointerOperand());
1205	else if (const auto *Call = dyn_cast<CallBase>(Val: I)) {
1206	// If we know that the call does not access memory, then we'll still
1207	// know that about the inlined clone of this call site, and we don't
1208	// need to add metadata.
1209	if (Call->doesNotAccessMemory())
1210	continue;
1211
1212	IsFuncCall = true;
1213	if (CalleeAAR) {
1214	MemoryEffects ME = CalleeAAR->getMemoryEffects(Call);
1215
1216	// We'll retain this knowledge without additional metadata.
1217	if (ME.onlyAccessesInaccessibleMem())
1218	continue;
1219
1220	if (ME.onlyAccessesArgPointees())
1221	IsArgMemOnlyCall = true;
1222	}
1223
1224	for (Value *Arg : Call->args()) {
1225	// Only care about pointer arguments. If a noalias argument is
1226	// accessed through a non-pointer argument, it must be captured
1227	// first (e.g. via ptrtoint), and we protect against captures below.
1228	if (!Arg->getType()->isPointerTy())
1229	continue;
1230
1231	PtrArgs.push_back(Elt: Arg);
1232	}
1233	}
1234
1235	// If we found no pointers, then this instruction is not suitable for
1236	// pairing with an instruction to receive aliasing metadata.
1237	// However, if this is a call, this we might just alias with none of the
1238	// noalias arguments.
1239	if (PtrArgs.empty() && !IsFuncCall)
1240	continue;
1241
1242	// It is possible that there is only one underlying object, but you
1243	// need to go through several PHIs to see it, and thus could be
1244	// repeated in the Objects list.
1245	SmallPtrSet<const Value *, `4`> ObjSet;
1246	SmallVector<Metadata *, `4`> Scopes, NoAliases;
1247
1248	for (const Value *V : PtrArgs) {
1249	SmallVector<const Value *, `4`> Objects;
1250	getUnderlyingObjects(V, Objects, / LI = / nullptr);
1251
1252	ObjSet.insert_range(R&: Objects);
1253	}
1254
1255	// Figure out if we're derived from anything that is not a noalias
1256	// argument.
1257	bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false,
1258	UsesUnknownObject = false;
1259	for (const Value *V : ObjSet) {
1260	// Is this value a constant that cannot be derived from any pointer
1261	// value (we need to exclude constant expressions, for example, that
1262	// are formed from arithmetic on global symbols).
1263	bool IsNonPtrConst = isa<ConstantInt>(Val: V) \|\| isa<ConstantFP>(Val: V) \|\|
1264	isa<ConstantPointerNull>(Val: V) \|\|
1265	isa<ConstantDataVector>(Val: V) \|\| isa<UndefValue>(Val: V);
1266	if (IsNonPtrConst)
1267	continue;
1268
1269	// If this is anything other than a noalias argument, then we cannot
1270	// completely describe the aliasing properties using alias.scope
1271	// metadata (and, thus, won't add any).
1272	if (const Argument *A = dyn_cast<Argument>(Val: V)) {
1273	if (!CB.paramHasAttr(ArgNo: A->getArgNo(), Kind: Attribute::NoAlias))
1274	UsesAliasingPtr = true;
1275	} else {
1276	UsesAliasingPtr = true;
1277	}
1278
1279	if (isEscapeSource(V)) {
1280	// An escape source can only alias with a noalias argument if it has
1281	// been captured beforehand.
1282	RequiresNoCaptureBefore = true;
1283	} else if (!isa<Argument>(Val: V) && !isIdentifiedObject(V)) {
1284	// If this is neither an escape source, nor some identified object
1285	// (which cannot directly alias a noalias argument), nor some other
1286	// argument (which, by definition, also cannot alias a noalias
1287	// argument), conservatively do not make any assumptions.
1288	UsesUnknownObject = true;
1289	}
1290	}
1291
1292	// Nothing we can do if the used underlying object cannot be reliably
1293	// determined.
1294	if (UsesUnknownObject)
1295	continue;
1296
1297	// A function call can always get captured noalias pointers (via other
1298	// parameters, globals, etc.).
1299	if (IsFuncCall && !IsArgMemOnlyCall)
1300	RequiresNoCaptureBefore = true;
1301
1302	// First, we want to figure out all of the sets with which we definitely
1303	// don't alias. Iterate over all noalias set, and add those for which:
1304	// 1. The noalias argument is not in the set of objects from which we
1305	// definitely derive.
1306	// 2. The noalias argument has not yet been captured.
1307	// An arbitrary function that might load pointers could see captured
1308	// noalias arguments via other noalias arguments or globals, and so we
1309	// must always check for prior capture.
1310	for (const Argument *A : NoAliasArgs) {
1311	if (ObjSet.contains(Ptr: A))
1312	continue; // May be based on a noalias argument.
1313
1314	// It might be tempting to skip the PointerMayBeCapturedBefore check if
1315	// A->hasNoCaptureAttr() is true, but this is incorrect because
1316	// nocapture only guarantees that no copies outlive the function, not
1317	// that the value cannot be locally captured.
1318	if (!RequiresNoCaptureBefore \|\|
1319	!capturesAnything(CC: PointerMayBeCapturedBefore(
1320	V: A, /ReturnCaptures=/false, I, DT: &DT, /IncludeI=/false,
1321	Mask: CaptureComponents::Provenance)))
1322	NoAliases.push_back(Elt: NewScopes [A]);
1323	}
1324
1325	if (!NoAliases.empty())
1326	NI->setMetadata(KindID: LLVMContext::MD_noalias,
1327	Node: MDNode::concatenate(
1328	A: NI->getMetadata(KindID: LLVMContext::MD_noalias),
1329	B: MDNode::get(Context&: CalledFunc->getContext(), MDs: NoAliases)));
1330
1331	// Next, we want to figure out all of the sets to which we might belong.
1332	// We might belong to a set if the noalias argument is in the set of
1333	// underlying objects. If there is some non-noalias argument in our list
1334	// of underlying objects, then we cannot add a scope because the fact
1335	// that some access does not alias with any set of our noalias arguments
1336	// cannot itself guarantee that it does not alias with this access
1337	// (because there is some pointer of unknown origin involved and the
1338	// other access might also depend on this pointer). We also cannot add
1339	// scopes to arbitrary functions unless we know they don't access any
1340	// non-parameter pointer-values.
1341	bool CanAddScopes = !UsesAliasingPtr;
1342	if (CanAddScopes && IsFuncCall)
1343	CanAddScopes = IsArgMemOnlyCall;
1344
1345	if (CanAddScopes)
1346	for (const Argument *A : NoAliasArgs) {
1347	if (ObjSet.count(Ptr: A))
1348	Scopes.push_back(Elt: NewScopes [A]);
1349	}
1350
1351	if (!Scopes.empty())
1352	NI->setMetadata(
1353	KindID: LLVMContext::MD_alias_scope,
1354	Node: MDNode::concatenate(A: NI->getMetadata(KindID: LLVMContext::MD_alias_scope),
1355	B: MDNode::get(Context&: CalledFunc->getContext(), MDs: Scopes)));
1356	}
1357	}
1358	}
1359
1360	static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
1361	ReturnInst *End) {
1362
1363	assert(Begin->getParent() == End->getParent() &&
1364	"Expected to be in same basic block!");
1365	auto BeginIt = Begin->getIterator();
1366	assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator");
1367	return !llvm::isGuaranteedToTransferExecutionToSuccessor(
1368	Begin: ++BeginIt, End: End->getIterator(), ScanLimit: InlinerAttributeWindow + `1`);
1369	}
1370
1371	// Add attributes from CB params and Fn attributes that can always be propagated
1372	// to the corresponding argument / inner callbases.
1373	static void AddParamAndFnBasicAttributes(const CallBase &CB,
1374	ValueToValueMapTy &VMap,
1375	ClonedCodeInfo &InlinedFunctionInfo) {
1376	auto *CalledFunction = CB.getCalledFunction();
1377	auto &Context = CalledFunction->getContext();
1378
1379	// Collect valid attributes for all params.
1380	SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
1381	bool HasAttrToPropagate = false;
1382
1383	// Attributes we can only propagate if the exact parameter is forwarded.
1384	// We can propagate both poison generating and UB generating attributes
1385	// without any extra checks. The only attribute that is tricky to propagate
1386	// is `noundef` (skipped for now) as that can create new UB where previous
1387	// behavior was just using a poison value.
1388	static const Attribute::AttrKind ExactAttrsToPropagate[] = {
1389	Attribute::Dereferenceable, Attribute::DereferenceableOrNull,
1390	Attribute::NonNull, Attribute::NoFPClass,
1391	Attribute::Alignment, Attribute::Range};
1392
1393	for (unsigned I = `0`, E = CB.arg_size(); I < E; ++I) {
1394	ValidObjParamAttrs.emplace_back(Args: AttrBuilder {CB.getContext()});
1395	ValidExactParamAttrs.emplace_back(Args: AttrBuilder {CB.getContext()});
1396	// Access attributes can be propagated to any param with the same underlying
1397	// object as the argument.
1398	if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadNone))
1399	ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadNone);
1400	if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadOnly))
1401	ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadOnly);
1402
1403	for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
1404	Attribute Attr = CB.getParamAttr(ArgNo: I, Kind: AK);
1405	if (Attr.isValid())
1406	ValidExactParamAttrs.back().addAttribute(A: Attr);
1407	}
1408
1409	HasAttrToPropagate \|= ValidObjParamAttrs.back().hasAttributes();
1410	HasAttrToPropagate \|= ValidExactParamAttrs.back().hasAttributes();
1411	}
1412
1413	// Won't be able to propagate anything.
1414	if (!HasAttrToPropagate)
1415	return;
1416
1417	for (BasicBlock &BB : *CalledFunction) {
1418	for (Instruction &Ins : BB) {
1419	const auto *InnerCB = dyn_cast<CallBase>(Val: &Ins);
1420	if (!InnerCB)
1421	continue;
1422	auto *NewInnerCB = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: InnerCB));
1423	if (!NewInnerCB)
1424	continue;
1425	// The InnerCB might have be simplified during the inlining
1426	// process which can make propagation incorrect.
1427	if (InlinedFunctionInfo.isSimplified(From: InnerCB, To: NewInnerCB))
1428	continue;
1429
1430	AttributeList AL = NewInnerCB->getAttributes();
1431	for (unsigned I = `0`, E = InnerCB->arg_size(); I < E; ++I) {
1432	// It's unsound or requires special handling to propagate
1433	// attributes to byval arguments. Even if CalledFunction
1434	// doesn't e.g. write to the argument (readonly), the call to
1435	// NewInnerCB may write to its by-value copy.
1436	if (NewInnerCB->paramHasAttr(ArgNo: I, Kind: Attribute::ByVal))
1437	continue;
1438
1439	// Don't bother propagating attrs to constants.
1440	if (match(V: NewInnerCB->getArgOperand(i: I),
1441	P: llvm::PatternMatch::m_ImmConstant()))
1442	continue;
1443
1444	// Check if the underlying value for the parameter is an argument.
1445	const Argument *Arg = dyn_cast<Argument>(Val: InnerCB->getArgOperand(i: I));
1446	unsigned ArgNo;
1447	if (Arg) {
1448	ArgNo = Arg->getArgNo();
1449	// For dereferenceable, dereferenceable_or_null, align, etc...
1450	// we don't want to propagate if the existing param has the same
1451	// attribute with "better" constraints. So remove from the
1452	// new AL if the region of the existing param is larger than
1453	// what we can propagate.
1454	AttrBuilder NewAB{
1455	Context, AttributeSet::get(C&: Context, B: ValidExactParamAttrs [ArgNo])};
1456	if (AL.getParamDereferenceableBytes(Index: I) >
1457	NewAB.getDereferenceableBytes())
1458	NewAB.removeAttribute(Val: Attribute::Dereferenceable);
1459	if (AL.getParamDereferenceableOrNullBytes(ArgNo: I) >
1460	NewAB.getDereferenceableOrNullBytes())
1461	NewAB.removeAttribute(Val: Attribute::DereferenceableOrNull);
1462	if (AL.getParamAlignment(ArgNo: I).valueOrOne() >
1463	NewAB.getAlignment().valueOrOne())
1464	NewAB.removeAttribute(Val: Attribute::Alignment);
1465	if (auto ExistingRange = AL.getParamRange(ArgNo: I)) {
1466	if (auto NewRange = NewAB.getRange()) {
1467	ConstantRange CombinedRange =
1468	ExistingRange ->intersectWith(CR: *NewRange);
1469	NewAB.removeAttribute(Val: Attribute::Range);
1470	NewAB.addRangeAttr(CR: CombinedRange);
1471	}
1472	}
1473
1474	if (FPClassTest ExistingNoFP = AL.getParamNoFPClass(ArgNo: I))
1475	NewAB.addNoFPClassAttr(NoFPClassMask: ExistingNoFP \| NewAB.getNoFPClass());
1476
1477	AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: NewAB);
1478	} else if (NewInnerCB->getArgOperand(i: I)->getType()->isPointerTy()) {
1479	// Check if the underlying value for the parameter is an argument.
1480	const Value *UnderlyingV =
1481	getUnderlyingObject(V: InnerCB->getArgOperand(i: I));
1482	Arg = dyn_cast<Argument>(Val: UnderlyingV);
1483	if (!Arg)
1484	continue;
1485	ArgNo = Arg->getArgNo();
1486	} else {
1487	continue;
1488	}
1489
1490	// If so, propagate its access attributes.
1491	AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: ValidObjParamAttrs [ArgNo]);
1492
1493	// We can have conflicting attributes from the inner callsite and
1494	// to-be-inlined callsite. In that case, choose the most
1495	// restrictive.
1496
1497	// readonly + writeonly means we can never deref so make readnone.
1498	if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) &&
1499	AL.hasParamAttr(ArgNo: I, Kind: Attribute::WriteOnly))
1500	AL = AL.addParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadNone);
1501
1502	// If have readnone, need to clear readonly/writeonly
1503	if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone)) {
1504	AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadOnly);
1505	AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::WriteOnly);
1506	}
1507
1508	// Writable cannot exist in conjunction w/ readonly/readnone
1509	if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) \|\|
1510	AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone))
1511	AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::Writable);
1512	}
1513	NewInnerCB->setAttributes(AL);
1514	}
1515	}
1516	}
1517
1518	// Only allow these white listed attributes to be propagated back to the
1519	// callee. This is because other attributes may only be valid on the call
1520	// itself, i.e. attributes such as signext and zeroext.
1521
1522	// Attributes that are always okay to propagate as if they are violated its
1523	// immediate UB.
1524	static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) {
1525	AttrBuilder Valid(CB.getContext());
1526	if (auto DerefBytes = CB.getRetDereferenceableBytes())
1527	Valid.addDereferenceableAttr(Bytes: DerefBytes);
1528	if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes())
1529	Valid.addDereferenceableOrNullAttr(Bytes: DerefOrNullBytes);
1530	if (CB.hasRetAttr(Kind: Attribute::NoAlias))
1531	Valid.addAttribute(Val: Attribute::NoAlias);
1532	if (CB.hasRetAttr(Kind: Attribute::NoUndef))
1533	Valid.addAttribute(Val: Attribute::NoUndef);
1534	return Valid;
1535	}
1536
1537	// Attributes that need additional checks as propagating them may change
1538	// behavior or cause new UB.
1539	static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {
1540	AttrBuilder Valid(CB.getContext());
1541	if (CB.hasRetAttr(Kind: Attribute::NonNull))
1542	Valid.addAttribute(Val: Attribute::NonNull);
1543	if (CB.hasRetAttr(Kind: Attribute::Alignment))
1544	Valid.addAlignmentAttr(Align: CB.getRetAlign());
1545	if (std::optional<ConstantRange> Range = CB.getRange())
1546	Valid.addRangeAttr(CR: *Range);
1547	return Valid;
1548	}
1549
1550	static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap,
1551	ClonedCodeInfo &InlinedFunctionInfo) {
1552	AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);
1553	AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
1554	if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes())
1555	return;
1556	auto *CalledFunction = CB.getCalledFunction();
1557	auto &Context = CalledFunction->getContext();
1558
1559	for (auto &BB : *CalledFunction) {
1560	auto *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator());
1561	if (!RI \|\| !isa<CallBase>(Val: RI->getOperand(i_nocapture: `0`)))
1562	continue;
1563	auto *RetVal = cast<CallBase>(Val: RI->getOperand(i_nocapture: `0`));
1564	// Check that the cloned RetVal exists and is a call, otherwise we cannot
1565	// add the attributes on the cloned RetVal. Simplification during inlining
1566	// could have transformed the cloned instruction.
1567	auto *NewRetVal = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: RetVal));
1568	if (!NewRetVal)
1569	continue;
1570
1571	// The RetVal might have be simplified during the inlining
1572	// process which can make propagation incorrect.
1573	if (InlinedFunctionInfo.isSimplified(From: RetVal, To: NewRetVal))
1574	continue;
1575	// Backward propagation of attributes to the returned value may be incorrect
1576	// if it is control flow dependent.
1577	// Consider:
1578	// @callee {
1579	// %rv = call @foo()
1580	// %rv2 = call @bar()
1581	// if (%rv2 != null)
1582	// return %rv2
1583	// if (%rv == null)
1584	// exit()
1585	// return %rv
1586	// }
1587	// caller() {
1588	// %val = call nonnull @callee()
1589	// }
1590	// Here we cannot add the nonnull attribute on either foo or bar. So, we
1591	// limit the check to both RetVal and RI are in the same basic block and
1592	// there are no throwing/exiting instructions between these instructions.
1593	if (RI->getParent() != RetVal->getParent() \|\|
1594	MayContainThrowingOrExitingCallAfterCB(Begin: RetVal, End: RI))
1595	continue;
1596	// Add to the existing attributes of NewRetVal, i.e. the cloned call
1597	// instruction.
1598	// NB! When we have the same attribute already existing on NewRetVal, but
1599	// with a differing value, the AttributeList's merge API honours the already
1600	// existing attribute value (i.e. attributes such as dereferenceable,
1601	// dereferenceable_or_null etc). See AttrBuilder::merge for more details.
1602	AttributeList AL = NewRetVal->getAttributes();
1603	if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes())
1604	ValidUB.removeAttribute(Val: Attribute::Dereferenceable);
1605	if (ValidUB.getDereferenceableOrNullBytes() <
1606	AL.getRetDereferenceableOrNullBytes())
1607	ValidUB.removeAttribute(Val: Attribute::DereferenceableOrNull);
1608	AttributeList NewAL = AL.addRetAttributes(C&: Context, B: ValidUB);
1609	// Attributes that may generate poison returns are a bit tricky. If we
1610	// propagate them, other uses of the callsite might have their behavior
1611	// change or cause UB (if they have noundef) b.c of the new potential
1612	// poison.
1613	// Take the following three cases:
1614	//
1615	// 1)
1616	// define nonnull ptr @foo() {
1617	// %p = call ptr @bar()
1618	// call void @use(ptr %p) willreturn nounwind
1619	// ret ptr %p
1620	// }
1621	//
1622	// 2)
1623	// define noundef nonnull ptr @foo() {
1624	// %p = call ptr @bar()
1625	// call void @use(ptr %p) willreturn nounwind
1626	// ret ptr %p
1627	// }
1628	//
1629	// 3)
1630	// define nonnull ptr @foo() {
1631	// %p = call noundef ptr @bar()
1632	// ret ptr %p
1633	// }
1634	//
1635	// In case 1, we can't propagate nonnull because poison value in @use may
1636	// change behavior or trigger UB.
1637	// In case 2, we don't need to be concerned about propagating nonnull, as
1638	// any new poison at @use will trigger UB anyways.
1639	// In case 3, we can never propagate nonnull because it may create UB due to
1640	// the noundef on @bar.
1641	if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne())
1642	ValidPG.removeAttribute(Val: Attribute::Alignment);
1643	if (ValidPG.hasAttributes()) {
1644	Attribute CBRange = ValidPG.getAttribute(Kind: Attribute::Range);
1645	if (CBRange.isValid()) {
1646	Attribute NewRange = AL.getRetAttr(Kind: Attribute::Range);
1647	if (NewRange.isValid()) {
1648	ValidPG.addRangeAttr(
1649	CR: CBRange.getRange().intersectWith(CR: NewRange.getRange()));
1650	}
1651	}
1652	// Three checks.
1653	// If the callsite has `noundef`, then a poison due to violating the
1654	// return attribute will create UB anyways so we can always propagate.
1655	// Otherwise, if the return value (callee to be inlined) has `noundef`, we
1656	// can't propagate as a new poison return will cause UB.
1657	// Finally, check if the return value has no uses whose behavior may
1658	// change/may cause UB if we potentially return poison. At the moment this
1659	// is implemented overly conservatively with a single-use check.
1660	// TODO: Update the single-use check to iterate through uses and only bail
1661	// if we have a potentially dangerous use.
1662
1663	if (CB.hasRetAttr(Kind: Attribute::NoUndef) \|\|
1664	(RetVal->hasOneUse() && !RetVal->hasRetAttr(Kind: Attribute::NoUndef)))
1665	NewAL = NewAL.addRetAttributes(C&: Context, B: ValidPG);
1666	}
1667	NewRetVal->setAttributes(NewAL);
1668	}
1669	}
1670
1671	/// If the inlined function has non-byval align arguments, then
1672	/// add @llvm.assume-based alignment assumptions to preserve this information.
1673	static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
1674	if (!PreserveAlignmentAssumptions \|\| !IFI.GetAssumptionCache)
1675	return;
1676
1677	AssumptionCache AC = &IFI.GetAssumptionCache (CB.getCaller());
1678	auto &DL = CB.getDataLayout();
1679
1680	// To avoid inserting redundant assumptions, we should check for assumptions
1681	// already in the caller. To do this, we might need a DT of the caller.
1682	DominatorTree DT;
1683	bool DTCalculated = false;
1684
1685	Function *CalledFunc = CB.getCalledFunction();
1686	for (Argument &Arg : CalledFunc->args()) {
1687	if (!Arg.getType()->isPointerTy() \|\| Arg.hasPassPointeeByValueCopyAttr() \|\|
1688	Arg.use_empty())
1689	continue;
1690	MaybeAlign Alignment = Arg.getParamAlign();
1691	if (!Alignment)
1692	continue;
1693
1694	if (!DTCalculated) {
1695	DT.recalculate(Func&: *CB.getCaller());
1696	DTCalculated = true;
1697	}
1698	// If we can already prove the asserted alignment in the context of the
1699	// caller, then don't bother inserting the assumption.
1700	Value *ArgVal = CB.getArgOperand(i: Arg.getArgNo());
1701	if (getKnownAlignment(V: ArgVal, DL, CxtI: &CB, AC, DT: &DT) >= *Alignment)
1702	continue;
1703
1704	CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption(
1705	DL, PtrValue: ArgVal, Alignment: Alignment ->value());
1706	AC->registerAssumption(CI: cast<AssumeInst>(Val: NewAsmp));
1707	}
1708	}
1709
1710	static void HandleByValArgumentInit(Type ByValType, Value Dst, Value *Src,
1711	MaybeAlign SrcAlign, Module *M,
1712	BasicBlock *InsertBlock,
1713	InlineFunctionInfo &IFI,
1714	Function *CalledFunc) {
1715	IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
1716
1717	Value *Size =
1718	Builder.getInt64(C: M->getDataLayout().getTypeStoreSize(Ty: ByValType));
1719
1720	Align DstAlign = Dst->getPointerAlignment(DL: M->getDataLayout());
1721
1722	// Generate a memcpy with the correct alignments.
1723	CallInst *CI = Builder.CreateMemCpy(Dst, DstAlign, Src, SrcAlign, Size);
1724
1725	// The verifier requires that all calls of debug-info-bearing functions
1726	// from debug-info-bearing functions have a debug location (for inlining
1727	// purposes). Assign a dummy location to satisfy the constraint.
1728	if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram())
1729	if (DISubprogram *SP = CalledFunc->getSubprogram())
1730	CI->setDebugLoc(DILocation::get(Context&: SP->getContext(), Line: `0`, Column: `0`, Scope: SP));
1731	}
1732
1733	/// When inlining a call site that has a byval argument,
1734	/// we have to make the implicit memcpy explicit by adding it.
1735	static Value HandleByValArgument(Type ByValType, Value *Arg,
1736	Instruction *TheCall,
1737	const Function *CalledFunc,
1738	InlineFunctionInfo &IFI,
1739	MaybeAlign ByValAlignment) {
1740	Function *Caller = TheCall->getFunction();
1741	const DataLayout &DL = Caller->getDataLayout();
1742
1743	// If the called function is readonly, then it could not mutate the caller's
1744	// copy of the byval'd memory. In this case, it is safe to elide the copy and
1745	// temporary.
1746	if (CalledFunc->onlyReadsMemory()) {
1747	// If the byval argument has a specified alignment that is greater than the
1748	// passed in pointer, then we either have to round up the input pointer or
1749	// give up on this transformation.
1750	if (ByValAlignment.valueOrOne() == `1`)
1751	return Arg;
1752
1753	AssumptionCache *AC =
1754	IFI.GetAssumptionCache ? &IFI.GetAssumptionCache (Caller) : nullptr*;
1755
1756	// If the pointer is already known to be sufficiently aligned, or if we can
1757	// round it up to a larger alignment, then we don't need a temporary.
1758	if (getOrEnforceKnownAlignment(V: Arg, PrefAlign: *ByValAlignment, DL, CxtI: TheCall, AC) >=
1759	*ByValAlignment)
1760	return Arg;
1761
1762	// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
1763	// for code quality, but rarely happens and is required for correctness.
1764	}
1765
1766	// Create the alloca. If we have DataLayout, use nice alignment.
1767	Align Alignment = DL.getPrefTypeAlign(Ty: ByValType);
1768
1769	// If the byval had an alignment specified, we must* use at least that*
1770	// alignment, as it is required by the byval argument (and uses of the
1771	// pointer inside the callee).
1772	if (ByValAlignment)
1773	Alignment = std::max(a: Alignment, b: *ByValAlignment);
1774
1775	AllocaInst *NewAlloca =
1776	new AllocaInst (ByValType, Arg->getType()->getPointerAddressSpace(),
1777	nullptr, Alignment, Arg->getName());
1778	NewAlloca->setDebugLoc(DebugLoc::getCompilerGenerated());
1779	NewAlloca->insertBefore(InsertPos: Caller->begin()->begin());
1780	IFI.StaticAllocas.push_back(Elt: NewAlloca);
1781
1782	// Uses of the argument in the function should use our new alloca
1783	// instead.
1784	return NewAlloca;
1785	}
1786
1787	// Check whether this Value is used by a lifetime intrinsic.
1788	static bool isUsedByLifetimeMarker(Value *V) {
1789	for (User *U : V->users())
1790	if (isa<LifetimeIntrinsic>(Val: U))
1791	return true;
1792	return false;
1793	}
1794
1795	// Check whether the given alloca already has
1796	// lifetime.start or lifetime.end intrinsics.
1797	static bool hasLifetimeMarkers(AllocaInst *AI) {
1798	Type *Ty = AI->getType();
1799	Type *Int8PtrTy =
1800	PointerType::get(C&: Ty->getContext(), AddressSpace: Ty->getPointerAddressSpace());
1801	if (Ty == Int8PtrTy)
1802	return isUsedByLifetimeMarker(V: AI);
1803
1804	// Do a scan to find all the casts to i8.*
1805	for (User *U : AI->users()) {
1806	if (U->getType() != Int8PtrTy) continue;
1807	if (U->stripPointerCasts() != AI) continue;
1808	if (isUsedByLifetimeMarker(V: U))
1809	return true;
1810	}
1811	return false;
1812	}
1813
1814	/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
1815	/// block. Allocas used in inalloca calls and allocas of dynamic array size
1816	/// cannot be static.
1817	static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
1818	return isa<Constant>(Val: AI->getArraySize()) && !AI->isUsedWithInAlloca();
1819	}
1820
1821	/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
1822	/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
1823	static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
1824	LLVMContext &Ctx,
1825	DenseMap<const MDNode , MDNode > &IANodes) {
1826	auto IA = DebugLoc::appendInlinedAt(DL: OrigDL, InlinedAt, Ctx, Cache&: IANodes);
1827	return DILocation::get(Context&: Ctx, Line: OrigDL.getLine(), Column: OrigDL.getCol(),
1828	Scope: OrigDL.getScope(), InlinedAt: IA, ImplicitCode: OrigDL.isImplicitCode(),
1829	AtomGroup: OrigDL ->getAtomGroup(), AtomRank: OrigDL ->getAtomRank());
1830	}
1831
1832	/// Update inlined instructions' line numbers to
1833	/// to encode location where these instructions are inlined.
1834	static void fixupLineNumbers(Function *Fn, Function::iterator FI,
1835	Instruction TheCall, bool* CalleeHasDebugInfo) {
1836	if (!TheCall->getDebugLoc())
1837	return;
1838
1839	// Don't propagate the source location atom from the call to inlined nodebug
1840	// instructions, and avoid putting it in the InlinedAt field of inlined
1841	// not-nodebug instructions. FIXME: Possibly worth transferring/generating
1842	// an atom for the returned value, otherwise we miss stepping on inlined
1843	// nodebug functions (which is different to existing behaviour).
1844	DebugLoc TheCallDL = TheCall->getDebugLoc()->getWithoutAtom();
1845
1846	auto &Ctx = Fn->getContext();
1847	DILocation *InlinedAtNode = TheCallDL;
1848
1849	// Create a unique call site, not to be confused with any other call from the
1850	// same location.
1851	InlinedAtNode = DILocation::getDistinct(
1852	Context&: Ctx, Line: InlinedAtNode->getLine(), Column: InlinedAtNode->getColumn(),
1853	Scope: InlinedAtNode->getScope(), InlinedAt: InlinedAtNode->getInlinedAt());
1854
1855	// Cache the inlined-at nodes as they're built so they are reused, without
1856	// this every instruction's inlined-at chain would become distinct from each
1857	// other.
1858	DenseMap<const MDNode , MDNode > IANodes;
1859
1860	// Check if we are not generating inline line tables and want to use
1861	// the call site location instead.
1862	bool NoInlineLineTables = Fn->hasFnAttribute(Kind: "no-inline-line-tables");
1863
1864	// Helper-util for updating the metadata attached to an instruction.
1865	auto UpdateInst = [&](Instruction &I) {
1866	// Loop metadata needs to be updated so that the start and end locs
1867	// reference inlined-at locations.
1868	auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
1869	&IANodes](Metadata MD) -> Metadata {
1870	if (auto *Loc = dyn_cast_or_null<DILocation>(Val: MD))
1871	return inlineDebugLoc(OrigDL: Loc, InlinedAt: InlinedAtNode, Ctx, IANodes).get();
1872	return MD;
1873	};
1874	updateLoopMetadataDebugLocations(I, Updater: updateLoopInfoLoc);
1875
1876	if (!NoInlineLineTables)
1877	if (DebugLoc DL = I.getDebugLoc()) {
1878	DebugLoc IDL =
1879	inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode, Ctx&: I.getContext(), IANodes);
1880	I.setDebugLoc(IDL);
1881	return;
1882	}
1883
1884	if (CalleeHasDebugInfo && !NoInlineLineTables)
1885	return;
1886
1887	// If the inlined instruction has no line number, or if inline info
1888	// is not being generated, make it look as if it originates from the call
1889	// location. This is important for ((__always_inline, __nodebug__))
1890	// functions which must use caller location for all instructions in their
1891	// function body.
1892
1893	// Don't update static allocas, as they may get moved later.
1894	if (auto *AI = dyn_cast<AllocaInst>(Val: &I))
1895	if (allocaWouldBeStaticInEntry(AI))
1896	return;
1897
1898	// Do not force a debug loc for pseudo probes, since they do not need to
1899	// be debuggable, and also they are expected to have a zero/null dwarf
1900	// discriminator at this point which could be violated otherwise.
1901	if (isa<PseudoProbeInst>(Val: I))
1902	return;
1903
1904	I.setDebugLoc(TheCallDL);
1905	};
1906
1907	// Helper-util for updating debug-info records attached to instructions.
1908	auto UpdateDVR = [&](DbgRecord *DVR) {
1909	assert(DVR->getDebugLoc() && "Debug Value must have debug loc");
1910	if (NoInlineLineTables) {
1911	DVR->setDebugLoc(TheCallDL);
1912	return;
1913	}
1914	DebugLoc DL = DVR->getDebugLoc();
1915	DebugLoc IDL =
1916	inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode,
1917	Ctx&: DVR->getMarker()->getParent()->getContext(), IANodes);
1918	DVR->setDebugLoc(IDL);
1919	};
1920
1921	// Iterate over all instructions, updating metadata and debug-info records.
1922	for (; FI != Fn->end(); ++FI) {
1923	for (Instruction &I : *FI) {
1924	UpdateInst (I);
1925	for (DbgRecord &DVR : I.getDbgRecordRange()) {
1926	UpdateDVR (&DVR);
1927	}
1928	}
1929
1930	// Remove debug info records if we're not keeping inline info.
1931	if (NoInlineLineTables) {
1932	BasicBlock::iterator BI = FI ->begin();
1933	while (BI != FI ->end()) {
1934	BI ->dropDbgRecords();
1935	++BI;
1936	}
1937	}
1938	}
1939	}
1940
1941	#undef DEBUG_TYPE
1942	#define DEBUG_TYPE "assignment-tracking"
1943	/// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB.
1944	static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL,
1945	const CallBase &CB) {
1946	at::StorageToVarsMap EscapedLocals;
1947	SmallPtrSet<const Value *, `4`> SeenBases;
1948
1949	LLVM_DEBUG(
1950	errs() << "# Finding caller local variables escaped by callee\n");
1951	for (const Value *Arg : CB.args()) {
1952	LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n");
1953	if (!Arg->getType()->isPointerTy()) {
1954	LLVM_DEBUG(errs() << " \| SKIP: Not a pointer\n");
1955	continue;
1956	}
1957
1958	const Instruction *I = dyn_cast<Instruction>(Val: Arg);
1959	if (!I) {
1960	LLVM_DEBUG(errs() << " \| SKIP: Not result of instruction\n");
1961	continue;
1962	}
1963
1964	// Walk back to the base storage.
1965	assert(Arg->getType()->isPtrOrPtrVectorTy());
1966	APInt TmpOffset(DL.getIndexTypeSizeInBits(Ty: Arg->getType()), `0`, false);
1967	const AllocaInst *Base = dyn_cast<AllocaInst>(
1968	Val: Arg->stripAndAccumulateConstantOffsets(DL, Offset&: TmpOffset, AllowNonInbounds: true));
1969	if (!Base) {
1970	LLVM_DEBUG(errs() << " \| SKIP: Couldn't walk back to base storage\n");
1971	continue;
1972	}
1973
1974	assert(Base);
1975	LLVM_DEBUG(errs() << " \| BASE: " << *Base << "\n");
1976	// We only need to process each base address once - skip any duplicates.
1977	if (!SeenBases.insert(Ptr: Base).second)
1978	continue;
1979
1980	// Find all local variables associated with the backing storage.
1981	auto CollectAssignsForStorage = [&](auto *DbgAssign) {
1982	// Skip variables from inlined functions - they are not local variables.
1983	if (DbgAssign->getDebugLoc().getInlinedAt())
1984	return;
1985	LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n");
1986	EscapedLocals [Base].insert(X: at::VarRecord(DbgAssign));
1987	};
1988	for_each(Range: at::getAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage);
1989	for_each(Range: at::getDVRAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage);
1990	}
1991	return EscapedLocals;
1992	}
1993
1994	static void trackInlinedStores(Function::iterator Start, Function::iterator End,
1995	const CallBase &CB) {
1996	LLVM_DEBUG(errs() << "trackInlinedStores into "
1997	<< Start->getParent()->getName() << " from "
1998	<< CB.getCalledFunction()->getName() << "\n");
1999	const DataLayout &DL = CB.getDataLayout();
2000	at::trackAssignments(Start, End, Vars: collectEscapedLocals(DL, CB), DL);
2001	}
2002
2003	/// Update inlined instructions' DIAssignID metadata. We need to do this
2004	/// otherwise a function inlined more than once into the same function
2005	/// will cause DIAssignID to be shared by many instructions.
2006	static void fixupAssignments(Function::iterator Start, Function::iterator End) {
2007	DenseMap<DIAssignID , DIAssignID > Map;
2008	// Loop over all the inlined instructions. If we find a DIAssignID
2009	// attachment or use, replace it with a new version.
2010	for (auto BBI = Start; BBI != End; ++BBI) {
2011	for (Instruction &I : *BBI)
2012	at::remapAssignID(Map, I);
2013	}
2014	}
2015	#undef DEBUG_TYPE
2016	#define DEBUG_TYPE "inline-function"
2017
2018	/// Update the block frequencies of the caller after a callee has been inlined.
2019	///
2020	/// Each block cloned into the caller has its block frequency scaled by the
2021	/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
2022	/// callee's entry block gets the same frequency as the callsite block and the
2023	/// relative frequencies of all cloned blocks remain the same after cloning.
2024	static void updateCallerBFI(BasicBlock *CallSiteBlock,
2025	const ValueToValueMapTy &VMap,
2026	BlockFrequencyInfo *CallerBFI,
2027	BlockFrequencyInfo *CalleeBFI,
2028	const BasicBlock &CalleeEntryBlock) {
2029	SmallPtrSet<BasicBlock *, `16`> ClonedBBs;
2030	for (auto Entry : VMap) {
2031	if (!isa<BasicBlock>(Val: Entry.first) \|\| !Entry.second)
2032	continue;
2033	auto *OrigBB = cast<BasicBlock>(Val: Entry.first);
2034	auto *ClonedBB = cast<BasicBlock>(Val: Entry.second);
2035	BlockFrequency Freq = CalleeBFI->getBlockFreq(BB: OrigBB);
2036	if (!ClonedBBs.insert(Ptr: ClonedBB).second) {
2037	// Multiple blocks in the callee might get mapped to one cloned block in
2038	// the caller since we prune the callee as we clone it. When that happens,
2039	// we want to use the maximum among the original blocks' frequencies.
2040	BlockFrequency NewFreq = CallerBFI->getBlockFreq(BB: ClonedBB);
2041	if (NewFreq > Freq)
2042	Freq = NewFreq;
2043	}
2044	CallerBFI->setBlockFreq(BB: ClonedBB, Freq);
2045	}
2046	BasicBlock *EntryClone = cast<BasicBlock>(Val: VMap.lookup(Val: &CalleeEntryBlock));
2047	CallerBFI->setBlockFreqAndScale(
2048	ReferenceBB: EntryClone, Freq: CallerBFI->getBlockFreq(BB: CallSiteBlock), BlocksToScale&: ClonedBBs);
2049	}
2050
2051	/// Update the branch metadata for cloned call instructions.
2052	static void updateCallProfile(Function Callee, const* ValueToValueMapTy &VMap,
2053	const ProfileCount &CalleeEntryCount,
2054	const CallBase &TheCall, ProfileSummaryInfo *PSI,
2055	BlockFrequencyInfo *CallerBFI) {
2056	if (CalleeEntryCount.isSynthetic() \|\| CalleeEntryCount.getCount() < `1`)
2057	return;
2058	auto CallSiteCount =
2059	PSI ? PSI->getProfileCount(CallInst: TheCall, BFI: CallerBFI) : std::nullopt;
2060	int64_t CallCount =
2061	std::min(a: CallSiteCount.value_or(u: `0`), b: CalleeEntryCount.getCount());
2062	updateProfileCallee(Callee, EntryDelta: -CallCount, VMap: &VMap);
2063	}
2064
2065	void llvm::updateProfileCallee(
2066	Function *Callee, int64_t EntryDelta,
2067	const ValueMap<const Value , WeakTrackingVH> VMap) {
2068	auto CalleeCount = Callee->getEntryCount();
2069	if (!CalleeCount)
2070	return;
2071
2072	const uint64_t PriorEntryCount = CalleeCount ->getCount();
2073
2074	// Since CallSiteCount is an estimate, it could exceed the original callee
2075	// count and has to be set to 0 so guard against underflow.
2076	const uint64_t NewEntryCount =
2077	(EntryDelta < `0` && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount)
2078	? `0`
2079	: PriorEntryCount + EntryDelta;
2080
2081	auto updateVTableProfWeight = [](CallBase CB, const* uint64_t NewEntryCount,
2082	const uint64_t PriorEntryCount) {
2083	Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB);
2084	if (VPtr)
2085	scaleProfData(I&: *VPtr, S: NewEntryCount, T: PriorEntryCount);
2086	};
2087
2088	// During inlining ?
2089	if (VMap) {
2090	uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
2091	for (auto Entry : *VMap) {
2092	if (isa<CallInst>(Val: Entry.first))
2093	if (auto *CI = dyn_cast_or_null<CallInst>(Val: Entry.second)) {
2094	CI->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount);
2095	updateVTableProfWeight (CI, CloneEntryCount, PriorEntryCount);
2096	}
2097
2098	if (isa<InvokeInst>(Val: Entry.first))
2099	if (auto *II = dyn_cast_or_null<InvokeInst>(Val: Entry.second)) {
2100	II->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount);
2101	updateVTableProfWeight (II, CloneEntryCount, PriorEntryCount);
2102	}
2103	}
2104	}
2105
2106	if (EntryDelta) {
2107	Callee->setEntryCount(Count: NewEntryCount);
2108
2109	for (BasicBlock &BB : *Callee)
2110	// No need to update the callsite if it is pruned during inlining.
2111	if (!VMap \|\| VMap->count(Val: &BB))
2112	for (Instruction &I : BB) {
2113	if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
2114	CI->updateProfWeight(S: NewEntryCount, T: PriorEntryCount);
2115	updateVTableProfWeight (CI, NewEntryCount, PriorEntryCount);
2116	}
2117	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &I)) {
2118	II->updateProfWeight(S: NewEntryCount, T: PriorEntryCount);
2119	updateVTableProfWeight (II, NewEntryCount, PriorEntryCount);
2120	}
2121	}
2122	}
2123	}
2124
2125	/// An operand bundle "clang.arc.attachedcall" on a call indicates the call
2126	/// result is implicitly consumed by a call to retainRV or claimRV immediately
2127	/// after the call. This function inlines the retainRV/claimRV calls.
2128	///
2129	/// There are three cases to consider:
2130	///
2131	/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned
2132	/// object in the callee return block, the autoreleaseRV call and the
2133	/// retainRV/claimRV call in the caller cancel out. If the call in the caller
2134	/// is a claimRV call, a call to objc_release is emitted.
2135	///
2136	/// 2. If there is a call in the callee return block that doesn't have operand
2137	/// bundle "clang.arc.attachedcall", the operand bundle on the original call
2138	/// is transferred to the call in the callee.
2139	///
2140	/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is
2141	/// a retainRV call.
2142	static void
2143	inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
2144	const SmallVectorImpl<ReturnInst *> &Returns) {
2145	assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
2146	bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
2147	IsUnsafeClaimRV = !IsRetainRV;
2148
2149	for (auto *RI : Returns) {
2150	Value *RetOpnd = objcarc::GetRCIdentityRoot(V: RI->getOperand(i_nocapture: `0`));
2151	bool InsertRetainCall = IsRetainRV;
2152	IRBuilder<> Builder(RI->getContext());
2153
2154	// Walk backwards through the basic block looking for either a matching
2155	// autoreleaseRV call or an unannotated call.
2156	auto InstRange = llvm::make_range(x: ++(RI->getIterator().getReverse()),
2157	y: RI->getParent()->rend());
2158	for (Instruction &I : llvm::make_early_inc_range(Range&: InstRange)) {
2159	// Ignore casts.
2160	if (isa<CastInst>(Val: I))
2161	continue;
2162
2163	if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) {
2164	if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue \|\|
2165	!II->use_empty() \|\|
2166	objcarc::GetRCIdentityRoot(V: II->getOperand(i_nocapture: `0`)) != RetOpnd)
2167	break;
2168
2169	// If we've found a matching authoreleaseRV call:
2170	// - If claimRV is attached to the call, insert a call to objc_release
2171	// and erase the autoreleaseRV call.
2172	// - If retainRV is attached to the call, just erase the autoreleaseRV
2173	// call.
2174	if (IsUnsafeClaimRV) {
2175	Builder.SetInsertPoint(II);
2176	Builder.CreateIntrinsic(ID: Intrinsic::objc_release, Args: RetOpnd);
2177	}
2178	II->eraseFromParent();
2179	InsertRetainCall = false;
2180	break;
2181	}
2182
2183	auto *CI = dyn_cast<CallInst>(Val: &I);
2184
2185	if (!CI)
2186	break;
2187
2188	if (objcarc::GetRCIdentityRoot(V: CI) != RetOpnd \|\|
2189	objcarc::hasAttachedCallOpBundle(CB: CI))
2190	break;
2191
2192	// If we've found an unannotated call that defines RetOpnd, add a
2193	// "clang.arc.attachedcall" operand bundle.
2194	Value BundleArgs[] = {objcarc::getAttachedARCFunction(CB: &CB)};
2195	OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
2196	auto *NewCall = CallBase::addOperandBundle(
2197	CB: CI, ID: LLVMContext::OB_clang_arc_attachedcall, OB, InsertPt: CI->getIterator());
2198	NewCall->copyMetadata(SrcInst: *CI);
2199	CI->replaceAllUsesWith(V: NewCall);
2200	CI->eraseFromParent();
2201	InsertRetainCall = false;
2202	break;
2203	}
2204
2205	if (InsertRetainCall) {
2206	// The retainRV is attached to the call and we've failed to find a
2207	// matching autoreleaseRV or an annotated call in the callee. Emit a call
2208	// to objc_retain.
2209	Builder.SetInsertPoint(RI);
2210	Builder.CreateIntrinsic(ID: Intrinsic::objc_retain, Args: RetOpnd);
2211	}
2212	}
2213	}
2214
2215	// In contextual profiling, when an inline succeeds, we want to remap the
2216	// indices of the callee into the index space of the caller. We can't just leave
2217	// them as-is because the same callee may appear in other places in this caller
2218	// (other callsites), and its (callee's) counters and sub-contextual profile
2219	// tree would be potentially different.
2220	// Not all BBs of the callee may survive the opportunistic DCE InlineFunction
2221	// does (same goes for callsites in the callee).
2222	// We will return a pair of vectors, one for basic block IDs and one for
2223	// callsites. For such a vector V, V[Idx] will be -1 if the callee
2224	// instrumentation with index Idx did not survive inlining, and a new value
2225	// otherwise.
2226	// This function will update the caller's instrumentation intrinsics
2227	// accordingly, mapping indices as described above. We also replace the "name"
2228	// operand because we use it to distinguish between "own" instrumentation and
2229	// "from callee" instrumentation when performing the traversal of the CFG of the
2230	// caller. We traverse depth-first from the callsite's BB and up to the point we
2231	// hit BBs owned by the caller.
2232	// The return values will be then used to update the contextual
2233	// profile. Note: we only update the "name" and "index" operands in the
2234	// instrumentation intrinsics, we leave the hash and total nr of indices as-is,
2235	// it's not worth updating those.
2236	static std::pair<std::vector<int64_t>, std::vector<int64_t>>
2237	remapIndices(Function &Caller, BasicBlock *StartBB,
2238	PGOContextualProfile &CtxProf, uint32_t CalleeCounters,
2239	uint32_t CalleeCallsites) {
2240	// We'll allocate a new ID to imported callsite counters and callsites. We're
2241	// using -1 to indicate a counter we delete. Most likely the entry ID, for
2242	// example, will be deleted - we don't want 2 IDs in the same BB, and the
2243	// entry would have been cloned in the callsite's old BB.
2244	std::vector<int64_t> CalleeCounterMap;
2245	std::vector<int64_t> CalleeCallsiteMap;
2246	CalleeCounterMap.resize(new_size: CalleeCounters, x: -`1`);
2247	CalleeCallsiteMap.resize(new_size: CalleeCallsites, x: -`1`);
2248
2249	auto RewriteInstrIfNeeded = [&](InstrProfIncrementInst &Ins) -> bool {
2250	if (Ins.getNameValue() == &Caller)
2251	return false;
2252	const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2253	if (CalleeCounterMap [OldID] == -`1`)
2254	CalleeCounterMap [OldID] = CtxProf.allocateNextCounterIndex(F: Caller);
2255	const auto NewID = static_cast<uint32_t>(CalleeCounterMap [OldID]);
2256
2257	Ins.setNameValue(&Caller);
2258	Ins.setIndex(NewID);
2259	return true;
2260	};
2261
2262	auto RewriteCallsiteInsIfNeeded = [&](InstrProfCallsite &Ins) -> bool {
2263	if (Ins.getNameValue() == &Caller)
2264	return false;
2265	const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2266	if (CalleeCallsiteMap [OldID] == -`1`)
2267	CalleeCallsiteMap [OldID] = CtxProf.allocateNextCallsiteIndex(F: Caller);
2268	const auto NewID = static_cast<uint32_t>(CalleeCallsiteMap [OldID]);
2269
2270	Ins.setNameValue(&Caller);
2271	Ins.setIndex(NewID);
2272	return true;
2273	};
2274
2275	std::deque<BasicBlock *> Worklist;
2276	DenseSet<const BasicBlock *> Seen;
2277	// We will traverse the BBs starting from the callsite BB. The callsite BB
2278	// will have at least a BB ID - maybe its own, and in any case the one coming
2279	// from the cloned function's entry BB. The other BBs we'll start seeing from
2280	// there on may or may not have BB IDs. BBs with IDs belonging to our caller
2281	// are definitely not coming from the imported function and form a boundary
2282	// past which we don't need to traverse anymore. BBs may have no
2283	// instrumentation (because we originally inserted instrumentation as per
2284	// MST), in which case we'll traverse past them. An invariant we'll keep is
2285	// that a BB will have at most 1 BB ID. For example, in the callsite BB, we
2286	// will delete the callee BB's instrumentation. This doesn't result in
2287	// information loss: the entry BB of the callee will have the same count as
2288	// the callsite's BB. At the end of this traversal, all the callee's
2289	// instrumentation would be mapped into the caller's instrumentation index
2290	// space. Some of the callee's counters may be deleted (as mentioned, this
2291	// should result in no loss of information).
2292	Worklist.push_back(x: StartBB);
2293	while (!Worklist.empty()) {
2294	auto *BB = Worklist.front();
2295	Worklist.pop_front();
2296	bool Changed = false;
2297	auto BBID = CtxProfAnalysis::getBBInstrumentation(BB&: BB);
2298	if (BBID) {
2299	Changed \|= RewriteInstrIfNeeded (*BBID);
2300	// this may be the entryblock from the inlined callee, coming into a BB
2301	// that didn't have instrumentation because of MST decisions. Let's make
2302	// sure it's placed accordingly. This is a noop elsewhere.
2303	BBID->moveBefore(InsertPos: BB->getFirstInsertionPt());
2304	}
2305	for (auto &I : llvm::make_early_inc_range(Range&: *BB)) {
2306	if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Val: &I)) {
2307	if (isa<InstrProfIncrementInstStep>(Val: Inc)) {
2308	// Step instrumentation is used for select instructions. Inlining may
2309	// have propagated a constant resulting in the condition of the select
2310	// being resolved, case in which function cloning resolves the value
2311	// of the select, and elides the select instruction. If that is the
2312	// case, the step parameter of the instrumentation will reflect that.
2313	// We can delete the instrumentation in that case.
2314	if (isa<Constant>(Val: Inc->getStep())) {
2315	assert(!Inc->getNextNode() \|\| !isa<SelectInst>(Inc->getNextNode()));
2316	Inc->eraseFromParent();
2317	} else {
2318	assert(isa_and_nonnull<SelectInst>(Inc->getNextNode()));
2319	RewriteInstrIfNeeded (*Inc);
2320	}
2321	} else if (Inc != BBID) {
2322	// If we're here it means that the BB had more than 1 IDs, presumably
2323	// some coming from the callee. We "made up our mind" to keep the
2324	// first one (which may or may not have been originally the caller's).
2325	// All the others are superfluous and we delete them.
2326	Inc->eraseFromParent();
2327	Changed = true;
2328	}
2329	} else if (auto *CS = dyn_cast<InstrProfCallsite>(Val: &I)) {
2330	Changed \|= RewriteCallsiteInsIfNeeded (*CS);
2331	}
2332	}
2333	if (!BBID \|\| Changed)
2334	for (auto *Succ : successors(BB))
2335	if (Seen.insert(V: Succ).second)
2336	Worklist.push_back(x: Succ);
2337	}
2338
2339	assert(!llvm::is_contained(CalleeCounterMap, `0`) &&
2340	"Counter index mapping should be either to -1 or to non-zero index, "
2341	"because the 0 "
2342	"index corresponds to the entry BB of the caller");
2343	assert(!llvm::is_contained(CalleeCallsiteMap, `0`) &&
2344	"Callsite index mapping should be either to -1 or to non-zero index, "
2345	"because there should have been at least a callsite - the inlined one "
2346	"- which would have had a 0 index.");
2347
2348	return {std::move(CalleeCounterMap), std::move(CalleeCallsiteMap)};
2349	}
2350
2351	// Inline. If successful, update the contextual profile (if a valid one is
2352	// given).
2353	// The contextual profile data is organized in trees, as follows:
2354	// - each node corresponds to a function
2355	// - the root of each tree corresponds to an "entrypoint" - e.g.
2356	// RPC handler for server side
2357	// - the path from the root to a node is a particular call path
2358	// - the counters stored in a node are counter values observed in that
2359	// particular call path ("context")
2360	// - the edges between nodes are annotated with callsite IDs.
2361	//
2362	// Updating the contextual profile after an inlining means, at a high level,
2363	// copying over the data of the callee, intentionally without any value
2364	// scaling, and copying over the callees of the inlined callee.
2365	llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
2366	PGOContextualProfile &CtxProf,
2367	bool MergeAttributes,
2368	AAResults *CalleeAAR,
2369	bool InsertLifetime,
2370	Function *ForwardVarArgsTo) {
2371	if (!CtxProf.isInSpecializedModule())
2372	return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
2373	ForwardVarArgsTo);
2374
2375	auto &Caller = *CB.getCaller();
2376	auto &Callee = *CB.getCalledFunction();
2377	auto *StartBB = CB.getParent();
2378
2379	// Get some preliminary data about the callsite before it might get inlined.
2380	// Inlining shouldn't delete the callee, but it's cleaner (and low-cost) to
2381	// get this data upfront and rely less on InlineFunction's behavior.
2382	const auto CalleeGUID = AssignGUIDPass::getGUID(F: Callee);
2383	auto *CallsiteIDIns = CtxProfAnalysis::getCallsiteInstrumentation(CB);
2384	const auto CallsiteID =
2385	static_cast<uint32_t>(CallsiteIDIns->getIndex()->getZExtValue());
2386
2387	const auto NumCalleeCounters = CtxProf.getNumCounters(F: Callee);
2388	const auto NumCalleeCallsites = CtxProf.getNumCallsites(F: Callee);
2389
2390	auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
2391	ForwardVarArgsTo);
2392	if (!Ret.isSuccess())
2393	return Ret;
2394
2395	// Inlining succeeded, we don't need the instrumentation of the inlined
2396	// callsite.
2397	CallsiteIDIns->eraseFromParent();
2398
2399	// Assinging Maps and then capturing references into it in the lambda because
2400	// captured structured bindings are a C++20 extension. We do also need a
2401	// capture here, though.
2402	const auto IndicesMaps = remapIndices(Caller, StartBB, CtxProf,
2403	CalleeCounters: NumCalleeCounters, CalleeCallsites: NumCalleeCallsites);
2404	const uint32_t NewCountersSize = CtxProf.getNumCounters(F: Caller);
2405
2406	auto Updater = [&](PGOCtxProfContext &Ctx) {
2407	assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller));
2408	const auto &[CalleeCounterMap, CalleeCallsiteMap] = IndicesMaps;
2409	assert(
2410	(Ctx.counters().size() +
2411	llvm::count_if(CalleeCounterMap, [](auto V) { return V != -`1`; }) ==
2412	NewCountersSize) &&
2413	"The caller's counters size should have grown by the number of new "
2414	"distinct counters inherited from the inlined callee.");
2415	Ctx.resizeCounters(Size: NewCountersSize);
2416	// If the callsite wasn't exercised in this context, the value of the
2417	// counters coming from it is 0 - which it is right now, after resizing them
2418	// - and so we're done.
2419	auto CSIt = Ctx.callsites().find(x: CallsiteID);
2420	if (CSIt == Ctx.callsites().end())
2421	return;
2422	auto CalleeCtxIt = CSIt ->second.find(x: CalleeGUID);
2423	// The callsite was exercised, but not with this callee (so presumably this
2424	// is an indirect callsite). Again, we're done here.
2425	if (CalleeCtxIt == CSIt ->second.end())
2426	return;
2427
2428	// Let's pull in the counter values and the subcontexts coming from the
2429	// inlined callee.
2430	auto &CalleeCtx = CalleeCtxIt ->second;
2431	assert(CalleeCtx.guid() == CalleeGUID);
2432
2433	for (auto I = `0U`; I < CalleeCtx.counters().size(); ++I) {
2434	const int64_t NewIndex = CalleeCounterMap [I];
2435	if (NewIndex >= `0`) {
2436	assert(NewIndex != `0` && "counter index mapping shouldn't happen to a 0 "
2437	"index, that's the caller's entry BB");
2438	Ctx.counters()[NewIndex] = CalleeCtx.counters()[I];
2439	}
2440	}
2441	for (auto &[I, OtherSet] : CalleeCtx.callsites()) {
2442	const int64_t NewCSIdx = CalleeCallsiteMap [I];
2443	if (NewCSIdx >= `0`) {
2444	assert(NewCSIdx != `0` &&
2445	"callsite index mapping shouldn't happen to a 0 index, the "
2446	"caller must've had at least one callsite (with such an index)");
2447	Ctx.ingestAllContexts(CSId: NewCSIdx, Other: std::move(OtherSet));
2448	}
2449	}
2450	// We know the traversal is preorder, so it wouldn't have yet looked at the
2451	// sub-contexts of this context that it's currently visiting. Meaning, the
2452	// erase below invalidates no iterators.
2453	auto Deleted = Ctx.callsites().erase(x: CallsiteID);
2454	assert(Deleted);
2455	(void)Deleted;
2456	};
2457	CtxProf.update(Updater, F: Caller);
2458	return Ret;
2459	}
2460
2461	/// This function inlines the called function into the basic block of the
2462	/// caller. This returns false if it is not possible to inline this call.
2463	/// The program is still in a well defined state if this occurs though.
2464	///
2465	/// Note that this only does one level of inlining. For example, if the
2466	/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
2467	/// exists in the instruction stream. Similarly this will inline a recursive
2468	/// function by one level.
2469	llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
2470	bool MergeAttributes,
2471	AAResults *CalleeAAR,
2472	bool InsertLifetime,
2473	Function *ForwardVarArgsTo,
2474	OptimizationRemarkEmitter *ORE) {
2475	assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
2476
2477	// FIXME: we don't inline callbr yet.
2478	if (isa<CallBrInst>(Val: CB))
2479	return InlineResult::failure(Reason: "We don't inline callbr yet.");
2480
2481	// If IFI has any state in it, zap it before we fill it in.
2482	IFI.reset();
2483
2484	Function *CalledFunc = CB.getCalledFunction();
2485	if (!CalledFunc \|\| // Can't inline external function or indirect
2486	CalledFunc->isDeclaration()) // call!
2487	return InlineResult::failure(Reason: "external or indirect");
2488
2489	// The inliner does not know how to inline through calls with operand bundles
2490	// in general ...
2491	Value ConvergenceControlToken = nullptr*;
2492	if (CB.hasOperandBundles()) {
2493	for (int i = `0`, e = CB.getNumOperandBundles(); i != e; ++i) {
2494	auto OBUse = CB.getOperandBundleAt(Index: i);
2495	uint32_t Tag = OBUse.getTagID();
2496	// ... but it knows how to inline through "deopt" operand bundles ...
2497	if (Tag == LLVMContext::OB_deopt)
2498	continue;
2499	// ... and "funclet" operand bundles.
2500	if (Tag == LLVMContext::OB_funclet)
2501	continue;
2502	if (Tag == LLVMContext::OB_clang_arc_attachedcall)
2503	continue;
2504	if (Tag == LLVMContext::OB_kcfi)
2505	continue;
2506	if (Tag == LLVMContext::OB_convergencectrl) {
2507	ConvergenceControlToken = OBUse.Inputs [`0`].get();
2508	continue;
2509	}
2510
2511	return InlineResult::failure(Reason: "unsupported operand bundle");
2512	}
2513	}
2514
2515	// FIXME: The check below is redundant and incomplete. According to spec, if a
2516	// convergent call is missing a token, then the caller is using uncontrolled
2517	// convergence. If the callee has an entry intrinsic, then the callee is using
2518	// controlled convergence, and the call cannot be inlined. A proper
2519	// implemenation of this check requires a whole new analysis that identifies
2520	// convergence in every function. For now, we skip that and just do this one
2521	// cursory check. The underlying assumption is that in a compiler flow that
2522	// fully implements convergence control tokens, there is no mixing of
2523	// controlled and uncontrolled convergent operations in the whole program.
2524	if (CB.isConvergent()) {
2525	if (!ConvergenceControlToken &&
2526	getConvergenceEntry(BB&: CalledFunc->getEntryBlock())) {
2527	return InlineResult::failure(
2528	Reason: "convergent call needs convergencectrl operand");
2529	}
2530	}
2531
2532	// If the call to the callee cannot throw, set the 'nounwind' flag on any
2533	// calls that we inline.
2534	bool MarkNoUnwind = CB.doesNotThrow();
2535
2536	BasicBlock *OrigBB = CB.getParent();
2537	Function *Caller = OrigBB->getParent();
2538
2539	// GC poses two hazards to inlining, which only occur when the callee has GC:
2540	// 1. If the caller has no GC, then the callee's GC must be propagated to the
2541	// caller.
2542	// 2. If the caller has a differing GC, it is invalid to inline.
2543	if (CalledFunc->hasGC()) {
2544	if (!Caller->hasGC())
2545	Caller->setGC(CalledFunc->getGC());
2546	else if (CalledFunc->getGC() != Caller->getGC())
2547	return InlineResult::failure(Reason: "incompatible GC");
2548	}
2549
2550	// Get the personality function from the callee if it contains a landing pad.
2551	Constant *CalledPersonality =
2552	CalledFunc->hasPersonalityFn()
2553	? CalledFunc->getPersonalityFn()->stripPointerCasts()
2554	: nullptr;
2555
2556	// Find the personality function used by the landing pads of the caller. If it
2557	// exists, then check to see that it matches the personality function used in
2558	// the callee.
2559	Constant *CallerPersonality =
2560	Caller->hasPersonalityFn()
2561	? Caller->getPersonalityFn()->stripPointerCasts()
2562	: nullptr;
2563	if (CalledPersonality) {
2564	if (!CallerPersonality)
2565	Caller->setPersonalityFn(CalledPersonality);
2566	// If the personality functions match, then we can perform the
2567	// inlining. Otherwise, we can't inline.
2568	// TODO: This isn't 100% true. Some personality functions are proper
2569	// supersets of others and can be used in place of the other.
2570	else if (CalledPersonality != CallerPersonality)
2571	return InlineResult::failure(Reason: "incompatible personality");
2572	}
2573
2574	// We need to figure out which funclet the callsite was in so that we may
2575	// properly nest the callee.
2576	Instruction CallSiteEHPad = nullptr*;
2577	if (CallerPersonality) {
2578	EHPersonality Personality = classifyEHPersonality(Pers: CallerPersonality);
2579	if (isScopedEHPersonality(Pers: Personality)) {
2580	std::optional<OperandBundleUse> ParentFunclet =
2581	CB.getOperandBundle(ID: LLVMContext::OB_funclet);
2582	if (ParentFunclet)
2583	CallSiteEHPad = cast<FuncletPadInst>(Val: ParentFunclet ->Inputs.front());
2584
2585	// OK, the inlining site is legal. What about the target function?
2586
2587	if (CallSiteEHPad) {
2588	if (Personality == EHPersonality::MSVC_CXX) {
2589	// The MSVC personality cannot tolerate catches getting inlined into
2590	// cleanup funclets.
2591	if (isa<CleanupPadInst>(Val: CallSiteEHPad)) {
2592	// Ok, the call site is within a cleanuppad. Let's check the callee
2593	// for catchpads.
2594	for (const BasicBlock &CalledBB : *CalledFunc) {
2595	if (isa<CatchSwitchInst>(Val: CalledBB.getFirstNonPHIIt()))
2596	return InlineResult::failure(Reason: "catch in cleanup funclet");
2597	}
2598	}
2599	} else if (isAsynchronousEHPersonality(Pers: Personality)) {
2600	// SEH is even less tolerant, there may not be any sort of exceptional
2601	// funclet in the callee.
2602	for (const BasicBlock &CalledBB : *CalledFunc) {
2603	if (CalledBB.isEHPad())
2604	return InlineResult::failure(Reason: "SEH in cleanup funclet");
2605	}
2606	}
2607	}
2608	}
2609	}
2610
2611	// Determine if we are dealing with a call in an EHPad which does not unwind
2612	// to caller.
2613	bool EHPadForCallUnwindsLocally = false;
2614	if (CallSiteEHPad && isa<CallInst>(Val: CB)) {
2615	UnwindDestMemoTy FuncletUnwindMap;
2616	Value *CallSiteUnwindDestToken =
2617	getUnwindDestToken(EHPad: CallSiteEHPad, MemoMap&: FuncletUnwindMap);
2618
2619	EHPadForCallUnwindsLocally =
2620	CallSiteUnwindDestToken &&
2621	!isa<ConstantTokenNone>(Val: CallSiteUnwindDestToken);
2622	}
2623
2624	// Get an iterator to the last basic block in the function, which will have
2625	// the new function inlined after it.
2626	Function::iterator LastBlock = --Caller->end();
2627
2628	// Make sure to capture all of the return instructions from the cloned
2629	// function.
2630	SmallVector<ReturnInst*, `8`> Returns;
2631	ClonedCodeInfo InlinedFunctionInfo;
2632	Function::iterator FirstNewBlock;
2633
2634	{ // Scope to destroy VMap after cloning.
2635	ValueToValueMapTy VMap;
2636	struct ByValInit {
2637	Value *Dst;
2638	Value *Src;
2639	MaybeAlign SrcAlign;
2640	Type *Ty;
2641	};
2642	// Keep a list of tuples (dst, src, src_align) to emit byval
2643	// initializations. Src Alignment is only available though the callbase,
2644	// therefore has to be saved.
2645	SmallVector<ByValInit, `4`> ByValInits;
2646
2647	// When inlining a function that contains noalias scope metadata,
2648	// this metadata needs to be cloned so that the inlined blocks
2649	// have different "unique scopes" at every call site.
2650	// Track the metadata that must be cloned. Do this before other changes to
2651	// the function, so that we do not get in trouble when inlining caller ==
2652	// callee.
2653	ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());
2654
2655	auto &DL = Caller->getDataLayout();
2656
2657	// Calculate the vector of arguments to pass into the function cloner, which
2658	// matches up the formal to the actual argument values.
2659	auto AI = CB.arg_begin();
2660	unsigned ArgNo = `0`;
2661	for (Function::arg_iterator I = CalledFunc->arg_begin(),
2662	E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
2663	Value ActualArg = AI;
2664
2665	// When byval arguments actually inlined, we need to make the copy implied
2666	// by them explicit. However, we don't do this if the callee is readonly
2667	// or readnone, because the copy would be unneeded: the callee doesn't
2668	// modify the struct.
2669	if (CB.isByValArgument(ArgNo)) {
2670	ActualArg = HandleByValArgument(ByValType: CB.getParamByValType(ArgNo), Arg: ActualArg,
2671	TheCall: &CB, CalledFunc, IFI,
2672	ByValAlignment: CalledFunc->getParamAlign(ArgNo));
2673	if (ActualArg != *AI)
2674	ByValInits.push_back(Elt: {.Dst: ActualArg, .Src: (Value )AI,
2675	.SrcAlign: CB.getParamAlign(ArgNo),
2676	.Ty: CB.getParamByValType(ArgNo)});
2677	}
2678
2679	VMap [&*I] = ActualArg;
2680	}
2681
2682	// TODO: Remove this when users have been updated to the assume bundles.
2683	// Add alignment assumptions if necessary. We do this before the inlined
2684	// instructions are actually cloned into the caller so that we can easily
2685	// check what will be known at the start of the inlined code.
2686	AddAlignmentAssumptions(CB, IFI);
2687
2688	AssumptionCache *AC =
2689	IFI.GetAssumptionCache ? &IFI.GetAssumptionCache (Caller) : nullptr*;
2690
2691	/// Preserve all attributes on of the call and its parameters.
2692	salvageKnowledge(I: &CB, AC);
2693
2694	// We want the inliner to prune the code as it copies. We would LOVE to
2695	// have no dead or constant instructions leftover after inlining occurs
2696	// (which can happen, e.g., because an argument was constant), but we'll be
2697	// happy with whatever the cloner can do.
2698	CloneAndPruneFunctionInto(NewFunc: Caller, OldFunc: CalledFunc, VMap,
2699	/ModuleLevelChanges=/false, Returns, NameSuffix: ".i",
2700	CodeInfo: &InlinedFunctionInfo);
2701	// Remember the first block that is newly cloned over.
2702	FirstNewBlock = LastBlock; ++FirstNewBlock;
2703
2704	// Insert retainRV/clainRV runtime calls.
2705	objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(CB: &CB);
2706	if (RVCallKind != objcarc::ARCInstKind::None)
2707	inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns);
2708
2709	// Updated caller/callee profiles only when requested. For sample loader
2710	// inlining, the context-sensitive inlinee profile doesn't need to be
2711	// subtracted from callee profile, and the inlined clone also doesn't need
2712	// to be scaled based on call site count.
2713	if (IFI.UpdateProfile) {
2714	if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
2715	// Update the BFI of blocks cloned into the caller.
2716	updateCallerBFI(CallSiteBlock: OrigBB, VMap, CallerBFI: IFI.CallerBFI, CalleeBFI: IFI.CalleeBFI,
2717	CalleeEntryBlock: CalledFunc->front());
2718
2719	if (auto Profile = CalledFunc->getEntryCount())
2720	updateCallProfile(Callee: CalledFunc, VMap, CalleeEntryCount: *Profile, TheCall: CB, PSI: IFI.PSI,
2721	CallerBFI: IFI.CallerBFI);
2722	}
2723
2724	// Inject byval arguments initialization.
2725	for (ByValInit &Init : ByValInits)
2726	HandleByValArgumentInit(ByValType: Init.Ty, Dst: Init.Dst, Src: Init.Src, SrcAlign: Init.SrcAlign,
2727	M: Caller->getParent(), InsertBlock: &*FirstNewBlock, IFI,
2728	CalledFunc);
2729
2730	std::optional<OperandBundleUse> ParentDeopt =
2731	CB.getOperandBundle(ID: LLVMContext::OB_deopt);
2732	if (ParentDeopt) {
2733	SmallVector<OperandBundleDef, `2`> OpDefs;
2734
2735	for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
2736	CallBase *ICS = dyn_cast_or_null<CallBase>(Val&: VH);
2737	if (!ICS)
2738	continue; // instruction was DCE'd or RAUW'ed to undef
2739
2740	OpDefs.clear();
2741
2742	OpDefs.reserve(N: ICS->getNumOperandBundles());
2743
2744	for (unsigned COBi = `0`, COBe = ICS->getNumOperandBundles(); COBi < COBe;
2745	++COBi) {
2746	auto ChildOB = ICS->getOperandBundleAt(Index: COBi);
2747	if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
2748	// If the inlined call has other operand bundles, let them be
2749	OpDefs.emplace_back(Args&: ChildOB);
2750	continue;
2751	}
2752
2753	// It may be useful to separate this logic (of handling operand
2754	// bundles) out to a separate "policy" component if this gets crowded.
2755	// Prepend the parent's deoptimization continuation to the newly
2756	// inlined call's deoptimization continuation.
2757	std::vector<Value *> MergedDeoptArgs;
2758	MergedDeoptArgs.reserve(n: ParentDeopt ->Inputs.size() +
2759	ChildOB.Inputs.size());
2760
2761	llvm::append_range(C&: MergedDeoptArgs, R&: ParentDeopt ->Inputs);
2762	llvm::append_range(C&: MergedDeoptArgs, R&: ChildOB.Inputs);
2763
2764	OpDefs.emplace_back(Args: "deopt", Args: std::move(MergedDeoptArgs));
2765	}
2766
2767	Instruction *NewI = CallBase::Create(CB: ICS, Bundles: OpDefs, InsertPt: ICS->getIterator());
2768
2769	// Note: the RAUW does the appropriate fixup in VMap, so we need to do
2770	// this even if the call returns void.
2771	ICS->replaceAllUsesWith(V: NewI);
2772
2773	VH = nullptr;
2774	ICS->eraseFromParent();
2775	}
2776	}
2777
2778	// For 'nodebug' functions, the associated DISubprogram is always null.
2779	// Conservatively avoid propagating the callsite debug location to
2780	// instructions inlined from a function whose DISubprogram is not null.
2781	fixupLineNumbers(Fn: Caller, FI: FirstNewBlock, TheCall: &CB,
2782	CalleeHasDebugInfo: CalledFunc->getSubprogram() != nullptr);
2783
2784	if (isAssignmentTrackingEnabled(M: *Caller->getParent())) {
2785	// Interpret inlined stores to caller-local variables as assignments.
2786	trackInlinedStores(Start: FirstNewBlock, End: Caller->end(), CB);
2787
2788	// Update DIAssignID metadata attachments and uses so that they are
2789	// unique to this inlined instance.
2790	fixupAssignments(Start: FirstNewBlock, End: Caller->end());
2791	}
2792
2793	// Now clone the inlined noalias scope metadata.
2794	SAMetadataCloner.clone();
2795	SAMetadataCloner.remap(FStart: FirstNewBlock, FEnd: Caller->end());
2796
2797	// Add noalias metadata if necessary.
2798	AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);
2799
2800	// Clone return attributes on the callsite into the calls within the inlined
2801	// function which feed into its return value.
2802	AddReturnAttributes(CB, VMap, InlinedFunctionInfo);
2803
2804	// Clone attributes on the params of the callsite to calls within the
2805	// inlined function which use the same param.
2806	AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo);
2807
2808	propagateMemProfMetadata(
2809	Callee: CalledFunc, CB, ContainsMemProfMetadata: InlinedFunctionInfo.ContainsMemProfMetadata, VMap, ORE);
2810
2811	// Propagate metadata on the callsite if necessary.
2812	PropagateCallSiteMetadata(CB, FStart: FirstNewBlock, FEnd: Caller->end());
2813
2814	// Register any cloned assumptions.
2815	if (IFI.GetAssumptionCache)
2816	for (BasicBlock &NewBlock :
2817	make_range(x: FirstNewBlock ->getIterator(), y: Caller->end()))
2818	for (Instruction &I : NewBlock)
2819	if (auto *II = dyn_cast<AssumeInst>(Val: &I))
2820	IFI.GetAssumptionCache (*Caller).registerAssumption(CI: II);
2821	}
2822
2823	if (ConvergenceControlToken) {
2824	IntrinsicInst IntrinsicCall = getConvergenceEntry(BB&: FirstNewBlock);
2825	if (IntrinsicCall) {
2826	IntrinsicCall->replaceAllUsesWith(V: ConvergenceControlToken);
2827	IntrinsicCall->eraseFromParent();
2828	}
2829	}
2830
2831	// If there are any alloca instructions in the block that used to be the entry
2832	// block for the callee, move them to the entry block of the caller. First
2833	// calculate which instruction they should be inserted before. We insert the
2834	// instructions at the end of the current alloca list.
2835	{
2836	BasicBlock::iterator InsertPoint = Caller->begin()->begin();
2837	for (BasicBlock::iterator I = FirstNewBlock ->begin(),
2838	E = FirstNewBlock ->end(); I != E; ) {
2839	AllocaInst *AI = dyn_cast<AllocaInst>(Val: I ++);
2840	if (!AI) continue;
2841
2842	// If the alloca is now dead, remove it. This often occurs due to code
2843	// specialization.
2844	if (AI->use_empty()) {
2845	AI->eraseFromParent();
2846	continue;
2847	}
2848
2849	if (!allocaWouldBeStaticInEntry(AI))
2850	continue;
2851
2852	// Keep track of the static allocas that we inline into the caller.
2853	IFI.StaticAllocas.push_back(Elt: AI);
2854
2855	// Scan for the block of allocas that we can move over, and move them
2856	// all at once.
2857	while (isa<AllocaInst>(Val: I) &&
2858	!cast<AllocaInst>(Val&: I)->use_empty() &&
2859	allocaWouldBeStaticInEntry(AI: cast<AllocaInst>(Val&: I))) {
2860	IFI.StaticAllocas.push_back(Elt: cast<AllocaInst>(Val&: I));
2861	++I;
2862	}
2863
2864	// Transfer all of the allocas over in a block. Using splice means
2865	// that the instructions aren't removed from the symbol table, then
2866	// reinserted.
2867	I.setTailBit(true);
2868	Caller->getEntryBlock().splice(ToIt: InsertPoint, FromBB: &*FirstNewBlock,
2869	FromBeginIt: AI->getIterator(), FromEndIt: I);
2870	}
2871	}
2872
2873	SmallVector<Value*,`4`> VarArgsToForward;
2874	SmallVector<AttributeSet, `4`> VarArgsAttrs;
2875	for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
2876	i < CB.arg_size(); i++) {
2877	VarArgsToForward.push_back(Elt: CB.getArgOperand(i));
2878	VarArgsAttrs.push_back(Elt: CB.getAttributes().getParamAttrs(ArgNo: i));
2879	}
2880
2881	bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
2882	if (InlinedFunctionInfo.ContainsCalls) {
2883	CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
2884	if (CallInst *CI = dyn_cast<CallInst>(Val: &CB))
2885	CallSiteTailKind = CI->getTailCallKind();
2886
2887	// For inlining purposes, the "notail" marker is the same as no marker.
2888	if (CallSiteTailKind == CallInst::TCK_NoTail)
2889	CallSiteTailKind = CallInst::TCK_None;
2890
2891	for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
2892	++BB) {
2893	for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
2894	CallInst *CI = dyn_cast<CallInst>(Val: &I);
2895	if (!CI)
2896	continue;
2897
2898	// Forward varargs from inlined call site to calls to the
2899	// ForwardVarArgsTo function, if requested, and to musttail calls.
2900	if (!VarArgsToForward.empty() &&
2901	((ForwardVarArgsTo &&
2902	CI->getCalledFunction() == ForwardVarArgsTo) \|\|
2903	CI->isMustTailCall())) {
2904	// Collect attributes for non-vararg parameters.
2905	AttributeList Attrs = CI->getAttributes();
2906	SmallVector<AttributeSet, `8`> ArgAttrs;
2907	if (!Attrs.isEmpty() \|\| !VarArgsAttrs.empty()) {
2908	for (unsigned ArgNo = `0`;
2909	ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
2910	ArgAttrs.push_back(Elt: Attrs.getParamAttrs(ArgNo));
2911	}
2912
2913	// Add VarArg attributes.
2914	ArgAttrs.append(in_start: VarArgsAttrs.begin(), in_end: VarArgsAttrs.end());
2915	Attrs = AttributeList::get(C&: CI->getContext(), FnAttrs: Attrs.getFnAttrs(),
2916	RetAttrs: Attrs.getRetAttrs(), ArgAttrs);
2917	// Add VarArgs to existing parameters.
2918	SmallVector<Value *, `6`> Params(CI->args());
2919	Params.append(in_start: VarArgsToForward.begin(), in_end: VarArgsToForward.end());
2920	CallInst *NewCI = CallInst::Create(
2921	Ty: CI->getFunctionType(), Func: CI->getCalledOperand(), Args: Params, NameStr: "", InsertBefore: CI->getIterator());
2922	NewCI->setDebugLoc(CI->getDebugLoc());
2923	NewCI->setAttributes(Attrs);
2924	NewCI->setCallingConv(CI->getCallingConv());
2925	CI->replaceAllUsesWith(V: NewCI);
2926	CI->eraseFromParent();
2927	CI = NewCI;
2928	}
2929
2930	if (Function *F = CI->getCalledFunction())
2931	InlinedDeoptimizeCalls \|=
2932	F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
2933
2934	// We need to reduce the strength of any inlined tail calls. For
2935	// musttail, we have to avoid introducing potential unbounded stack
2936	// growth. For example, if functions 'f' and 'g' are mutually recursive
2937	// with musttail, we can inline 'g' into 'f' so long as we preserve
2938	// musttail on the cloned call to 'f'. If either the inlined call site
2939	// or the cloned call site is not* musttail, the program already has*
2940	// one frame of stack growth, so it's safe to remove musttail. Here is
2941	// a table of example transformations:
2942	//
2943	// f -> musttail g -> musttail f ==> f -> musttail f
2944	// f -> musttail g -> tail f ==> f -> tail f
2945	// f -> g -> musttail f ==> f -> f
2946	// f -> g -> tail f ==> f -> f
2947	//
2948	// Inlined notail calls should remain notail calls.
2949	CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
2950	if (ChildTCK != CallInst::TCK_NoTail)
2951	ChildTCK = std::min(a: CallSiteTailKind, b: ChildTCK);
2952	CI->setTailCallKind(ChildTCK);
2953	InlinedMustTailCalls \|= CI->isMustTailCall();
2954
2955	// Call sites inlined through a 'nounwind' call site should be
2956	// 'nounwind' as well. However, avoid marking call sites explicitly
2957	// where possible. This helps expose more opportunities for CSE after
2958	// inlining, commonly when the callee is an intrinsic.
2959	if (MarkNoUnwind && !CI->doesNotThrow())
2960	CI->setDoesNotThrow();
2961	}
2962	}
2963	}
2964
2965	// Leave lifetime markers for the static alloca's, scoping them to the
2966	// function we just inlined.
2967	// We need to insert lifetime intrinsics even at O0 to avoid invalid
2968	// access caused by multithreaded coroutines. The check
2969	// `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.
2970	if ((InsertLifetime \|\| Caller->isPresplitCoroutine()) &&
2971	!IFI.StaticAllocas.empty()) {
2972	IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock ->begin());
2973	for (AllocaInst *AI : IFI.StaticAllocas) {
2974	// Don't mark swifterror allocas. They can't have bitcast uses.
2975	if (AI->isSwiftError())
2976	continue;
2977
2978	// If the alloca is already scoped to something smaller than the whole
2979	// function then there's no need to add redundant, less accurate markers.
2980	if (hasLifetimeMarkers(AI))
2981	continue;
2982
2983	// Try to determine the size of the allocation.
2984	ConstantInt AllocaSize = nullptr*;
2985	if (ConstantInt *AIArraySize =
2986	dyn_cast<ConstantInt>(Val: AI->getArraySize())) {
2987	auto &DL = Caller->getDataLayout();
2988	Type *AllocaType = AI->getAllocatedType();
2989	TypeSize AllocaTypeSize = DL.getTypeAllocSize(Ty: AllocaType);
2990	uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
2991
2992	// Don't add markers for zero-sized allocas.
2993	if (AllocaArraySize == `0`)
2994	continue;
2995
2996	// Check that array size doesn't saturate uint64_t and doesn't
2997	// overflow when it's multiplied by type size.
2998	if (!AllocaTypeSize.isScalable() &&
2999	AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
3000	std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
3001	AllocaTypeSize.getFixedValue()) {
3002	AllocaSize = ConstantInt::get(Ty: Type::getInt64Ty(C&: AI->getContext()),
3003	V: AllocaArraySize * AllocaTypeSize);
3004	}
3005	}
3006
3007	builder.CreateLifetimeStart(Ptr: AI, Size: AllocaSize);
3008	for (ReturnInst *RI : Returns) {
3009	// Don't insert llvm.lifetime.end calls between a musttail or deoptimize
3010	// call and a return. The return kills all local allocas.
3011	if (InlinedMustTailCalls &&
3012	RI->getParent()->getTerminatingMustTailCall())
3013	continue;
3014	if (InlinedDeoptimizeCalls &&
3015	RI->getParent()->getTerminatingDeoptimizeCall())
3016	continue;
3017	IRBuilder<>(RI).CreateLifetimeEnd(Ptr: AI, Size: AllocaSize);
3018	}
3019	}
3020	}
3021
3022	// If the inlined code contained dynamic alloca instructions, wrap the inlined
3023	// code with llvm.stacksave/llvm.stackrestore intrinsics.
3024	if (InlinedFunctionInfo.ContainsDynamicAllocas) {
3025	// Insert the llvm.stacksave.
3026	CallInst SavedPtr = IRBuilder<>(&FirstNewBlock, FirstNewBlock ->begin())
3027	.CreateStackSave(Name: "savedstack");
3028
3029	// Insert a call to llvm.stackrestore before any return instructions in the
3030	// inlined function.
3031	for (ReturnInst *RI : Returns) {
3032	// Don't insert llvm.stackrestore calls between a musttail or deoptimize
3033	// call and a return. The return will restore the stack pointer.
3034	if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
3035	continue;
3036	if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
3037	continue;
3038	IRBuilder<>(RI).CreateStackRestore(Ptr: SavedPtr);
3039	}
3040	}
3041
3042	// If we are inlining for an invoke instruction, we must make sure to rewrite
3043	// any call instructions into invoke instructions. This is sensitive to which
3044	// funclet pads were top-level in the inlinee, so must be done before
3045	// rewriting the "parent pad" links.
3046	if (auto *II = dyn_cast<InvokeInst>(Val: &CB)) {
3047	BasicBlock *UnwindDest = II->getUnwindDest();
3048	BasicBlock::iterator FirstNonPHI = UnwindDest->getFirstNonPHIIt();
3049	if (isa<LandingPadInst>(Val: FirstNonPHI)) {
3050	HandleInlinedLandingPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
3051	} else {
3052	HandleInlinedEHPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
3053	}
3054	}
3055
3056	// Update the lexical scopes of the new funclets and callsites.
3057	// Anything that had 'none' as its parent is now nested inside the callsite's
3058	// EHPad.
3059	if (CallSiteEHPad) {
3060	for (Function::iterator BB = FirstNewBlock ->getIterator(),
3061	E = Caller->end();
3062	BB != E; ++BB) {
3063	// Add bundle operands to inlined call sites.
3064	PropagateOperandBundles(InlinedBB: BB, CallSiteEHPad);
3065
3066	// It is problematic if the inlinee has a cleanupret which unwinds to
3067	// caller and we inline it into a call site which doesn't unwind but into
3068	// an EH pad that does. Such an edge must be dynamically unreachable.
3069	// As such, we replace the cleanupret with unreachable.
3070	if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: BB ->getTerminator()))
3071	if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
3072	changeToUnreachable(I: CleanupRet);
3073
3074	BasicBlock::iterator I = BB ->getFirstNonPHIIt();
3075	if (!I ->isEHPad())
3076	continue;
3077
3078	if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) {
3079	if (isa<ConstantTokenNone>(Val: CatchSwitch->getParentPad()))
3080	CatchSwitch->setParentPad(CallSiteEHPad);
3081	} else {
3082	auto *FPI = cast<FuncletPadInst>(Val&: I);
3083	if (isa<ConstantTokenNone>(Val: FPI->getParentPad()))
3084	FPI->setParentPad(CallSiteEHPad);
3085	}
3086	}
3087	}
3088
3089	if (InlinedDeoptimizeCalls) {
3090	// We need to at least remove the deoptimizing returns from the Return set,
3091	// so that the control flow from those returns does not get merged into the
3092	// caller (but terminate it instead). If the caller's return type does not
3093	// match the callee's return type, we also need to change the return type of
3094	// the intrinsic.
3095	if (Caller->getReturnType() == CB.getType()) {
3096	llvm::erase_if(C&: Returns, P: [](ReturnInst *RI) {
3097	return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
3098	});
3099	} else {
3100	SmallVector<ReturnInst *, `8`> NormalReturns;
3101	Function *NewDeoptIntrinsic = Intrinsic::getOrInsertDeclaration(
3102	M: Caller->getParent(), id: Intrinsic::experimental_deoptimize,
3103	Tys: {Caller->getReturnType()});
3104
3105	for (ReturnInst *RI : Returns) {
3106	CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
3107	if (!DeoptCall) {
3108	NormalReturns.push_back(Elt: RI);
3109	continue;
3110	}
3111
3112	// The calling convention on the deoptimize call itself may be bogus,
3113	// since the code we're inlining may have undefined behavior (and may
3114	// never actually execute at runtime); but all
3115	// @llvm.experimental.deoptimize declarations have to have the same
3116	// calling convention in a well-formed module.
3117	auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
3118	NewDeoptIntrinsic->setCallingConv(CallingConv);
3119	auto *CurBB = RI->getParent();
3120	RI->eraseFromParent();
3121
3122	SmallVector<Value *, `4`> CallArgs(DeoptCall->args());
3123
3124	SmallVector<OperandBundleDef, `1`> OpBundles;
3125	DeoptCall->getOperandBundlesAsDefs(Defs&: OpBundles);
3126	auto DeoptAttributes = DeoptCall->getAttributes();
3127	DeoptCall->eraseFromParent();
3128	assert(!OpBundles.empty() &&
3129	"Expected at least the deopt operand bundle");
3130
3131	IRBuilder<> Builder(CurBB);
3132	CallInst *NewDeoptCall =
3133	Builder.CreateCall(Callee: NewDeoptIntrinsic, Args: CallArgs, OpBundles);
3134	NewDeoptCall->setCallingConv(CallingConv);
3135	NewDeoptCall->setAttributes(DeoptAttributes);
3136	if (NewDeoptCall->getType()->isVoidTy())
3137	Builder.CreateRetVoid();
3138	else
3139	Builder.CreateRet(V: NewDeoptCall);
3140	// Since the ret type is changed, remove the incompatible attributes.
3141	NewDeoptCall->removeRetAttrs(AttrsToRemove: AttributeFuncs::typeIncompatible(
3142	Ty: NewDeoptCall->getType(), AS: NewDeoptCall->getRetAttributes()));
3143	}
3144
3145	// Leave behind the normal returns so we can merge control flow.
3146	std::swap(LHS&: Returns, RHS&: NormalReturns);
3147	}
3148	}
3149
3150	// Handle any inlined musttail call sites. In order for a new call site to be
3151	// musttail, the source of the clone and the inlined call site must have been
3152	// musttail. Therefore it's safe to return without merging control into the
3153	// phi below.
3154	if (InlinedMustTailCalls) {
3155	// Check if we need to bitcast the result of any musttail calls.
3156	Type *NewRetTy = Caller->getReturnType();
3157	bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy;
3158
3159	// Handle the returns preceded by musttail calls separately.
3160	SmallVector<ReturnInst *, `8`> NormalReturns;
3161	for (ReturnInst *RI : Returns) {
3162	CallInst *ReturnedMustTail =
3163	RI->getParent()->getTerminatingMustTailCall();
3164	if (!ReturnedMustTail) {
3165	NormalReturns.push_back(Elt: RI);
3166	continue;
3167	}
3168	if (!NeedBitCast)
3169	continue;
3170
3171	// Delete the old return and any preceding bitcast.
3172	BasicBlock *CurBB = RI->getParent();
3173	auto *OldCast = dyn_cast_or_null<BitCastInst>(Val: RI->getReturnValue());
3174	RI->eraseFromParent();
3175	if (OldCast)
3176	OldCast->eraseFromParent();
3177
3178	// Insert a new bitcast and return with the right type.
3179	IRBuilder<> Builder(CurBB);
3180	Builder.CreateRet(V: Builder.CreateBitCast(V: ReturnedMustTail, DestTy: NewRetTy));
3181	}
3182
3183	// Leave behind the normal returns so we can merge control flow.
3184	std::swap(LHS&: Returns, RHS&: NormalReturns);
3185	}
3186
3187	// Now that all of the transforms on the inlined code have taken place but
3188	// before we splice the inlined code into the CFG and lose track of which
3189	// blocks were actually inlined, collect the call sites. We only do this if
3190	// call graph updates weren't requested, as those provide value handle based
3191	// tracking of inlined call sites instead. Calls to intrinsics are not
3192	// collected because they are not inlineable.
3193	if (InlinedFunctionInfo.ContainsCalls) {
3194	// Otherwise just collect the raw call sites that were inlined.
3195	for (BasicBlock &NewBB :
3196	make_range(x: FirstNewBlock ->getIterator(), y: Caller->end()))
3197	for (Instruction &I : NewBB)
3198	if (auto *CB = dyn_cast<CallBase>(Val: &I))
3199	if (!(CB->getCalledFunction() &&
3200	CB->getCalledFunction()->isIntrinsic()))
3201	IFI.InlinedCallSites.push_back(Elt: CB);
3202	}
3203
3204	// If we cloned in _exactly one_ basic block, and if that block ends in a
3205	// return instruction, we splice the body of the inlined callee directly into
3206	// the calling basic block.
3207	if (Returns.size() == `1` && std::distance(first: FirstNewBlock, last: Caller->end()) == `1`) {
3208	// Move all of the instructions right before the call.
3209	OrigBB->splice(ToIt: CB.getIterator(), FromBB: &*FirstNewBlock, FromBeginIt: FirstNewBlock ->begin(),
3210	FromEndIt: FirstNewBlock ->end());
3211	// Remove the cloned basic block.
3212	Caller->back().eraseFromParent();
3213
3214	// If the call site was an invoke instruction, add a branch to the normal
3215	// destination.
3216	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
3217	BranchInst *NewBr = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator());
3218	NewBr->setDebugLoc(Returns [`0`]->getDebugLoc());
3219	}
3220
3221	// If the return instruction returned a value, replace uses of the call with
3222	// uses of the returned value.
3223	if (!CB.use_empty()) {
3224	ReturnInst *R = Returns [`0`];
3225	if (&CB == R->getReturnValue())
3226	CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3227	else
3228	CB.replaceAllUsesWith(V: R->getReturnValue());
3229	}
3230	// Since we are now done with the Call/Invoke, we can delete it.
3231	CB.eraseFromParent();
3232
3233	// Since we are now done with the return instruction, delete it also.
3234	Returns [`0`]->eraseFromParent();
3235
3236	if (MergeAttributes)
3237	AttributeFuncs::mergeAttributesForInlining(Caller&: Caller, Callee: CalledFunc);
3238
3239	// We are now done with the inlining.
3240	return InlineResult::success();
3241	}
3242
3243	// Otherwise, we have the normal case, of more than one block to inline or
3244	// multiple return sites.
3245
3246	// We want to clone the entire callee function into the hole between the
3247	// "starter" and "ender" blocks. How we accomplish this depends on whether
3248	// this is an invoke instruction or a call instruction.
3249	BasicBlock *AfterCallBB;
3250	BranchInst CreatedBranchToNormalDest = nullptr*;
3251	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
3252
3253	// Add an unconditional branch to make this look like the CallInst case...
3254	CreatedBranchToNormalDest = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator());
3255	// We intend to replace this DebugLoc with another later.
3256	CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getTemporary());
3257
3258	// Split the basic block. This guarantees that no PHI nodes will have to be
3259	// updated due to new incoming edges, and make the invoke case more
3260	// symmetric to the call case.
3261	AfterCallBB =
3262	OrigBB->splitBasicBlock(I: CreatedBranchToNormalDest->getIterator(),
3263	BBName: CalledFunc->getName() + ".exit");
3264
3265	} else { // It's a call
3266	// If this is a call instruction, we need to split the basic block that
3267	// the call lives in.
3268	//
3269	AfterCallBB = OrigBB->splitBasicBlock(I: CB.getIterator(),
3270	BBName: CalledFunc->getName() + ".exit");
3271	}
3272
3273	if (IFI.CallerBFI) {
3274	// Copy original BB's block frequency to AfterCallBB
3275	IFI.CallerBFI->setBlockFreq(BB: AfterCallBB,
3276	Freq: IFI.CallerBFI->getBlockFreq(BB: OrigBB));
3277	}
3278
3279	// Change the branch that used to go to AfterCallBB to branch to the first
3280	// basic block of the inlined function.
3281	//
3282	Instruction *Br = OrigBB->getTerminator();
3283	assert(Br && Br->getOpcode() == Instruction::Br &&
3284	"splitBasicBlock broken!");
3285	Br->setOperand(i: `0`, Val: &*FirstNewBlock);
3286
3287	// Now that the function is correct, make it a little bit nicer. In
3288	// particular, move the basic blocks inserted from the end of the function
3289	// into the space made by splitting the source basic block.
3290	Caller->splice(ToIt: AfterCallBB->getIterator(), FromF: Caller, FromBeginIt: FirstNewBlock,
3291	FromEndIt: Caller->end());
3292
3293	// Handle all of the return instructions that we just cloned in, and eliminate
3294	// any users of the original call/invoke instruction.
3295	Type *RTy = CalledFunc->getReturnType();
3296
3297	PHINode PHI = nullptr*;
3298	if (Returns.size() > `1`) {
3299	// The PHI node should go at the front of the new basic block to merge all
3300	// possible incoming values.
3301	if (!CB.use_empty()) {
3302	PHI = PHINode::Create(Ty: RTy, NumReservedValues: Returns.size(), NameStr: CB.getName());
3303	PHI->insertBefore(InsertPos: AfterCallBB->begin());
3304	// Anything that used the result of the function call should now use the
3305	// PHI node as their operand.
3306	CB.replaceAllUsesWith(V: PHI);
3307	}
3308
3309	// Loop over all of the return instructions adding entries to the PHI node
3310	// as appropriate.
3311	if (PHI) {
3312	for (ReturnInst *RI : Returns) {
3313	assert(RI->getReturnValue()->getType() == PHI->getType() &&
3314	"Ret value not consistent in function!");
3315	PHI->addIncoming(V: RI->getReturnValue(), BB: RI->getParent());
3316	}
3317	}
3318
3319	// Add a branch to the merge points and remove return instructions.
3320	DebugLoc Loc;
3321	for (ReturnInst *RI : Returns) {
3322	BranchInst *BI = BranchInst::Create(IfTrue: AfterCallBB, InsertBefore: RI->getIterator());
3323	Loc = RI->getDebugLoc();
3324	BI->setDebugLoc(Loc);
3325	RI->eraseFromParent();
3326	}
3327	// We need to set the debug location to somewhere* inside the*
3328	// inlined function. The line number may be nonsensical, but the
3329	// instruction will at least be associated with the right
3330	// function.
3331	if (CreatedBranchToNormalDest)
3332	CreatedBranchToNormalDest->setDebugLoc(Loc);
3333	} else if (!Returns.empty()) {
3334	// Otherwise, if there is exactly one return value, just replace anything
3335	// using the return value of the call with the computed value.
3336	if (!CB.use_empty()) {
3337	if (&CB == Returns [`0`]->getReturnValue())
3338	CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3339	else
3340	CB.replaceAllUsesWith(V: Returns [`0`]->getReturnValue());
3341	}
3342
3343	// Update PHI nodes that use the ReturnBB to use the AfterCallBB.
3344	BasicBlock *ReturnBB = Returns [`0`]->getParent();
3345	ReturnBB->replaceAllUsesWith(V: AfterCallBB);
3346
3347	// Splice the code from the return block into the block that it will return
3348	// to, which contains the code that was after the call.
3349	AfterCallBB->splice(ToIt: AfterCallBB->begin(), FromBB: ReturnBB);
3350
3351	if (CreatedBranchToNormalDest)
3352	CreatedBranchToNormalDest->setDebugLoc(Returns [`0`]->getDebugLoc());
3353
3354	// Delete the return instruction now and empty ReturnBB now.
3355	Returns [`0`]->eraseFromParent();
3356	ReturnBB->eraseFromParent();
3357	} else if (!CB.use_empty()) {
3358	// In this case there are no returns to use, so there is no clear source
3359	// location for the "return".
3360	// FIXME: It may be correct to use the scope end line of the function here,
3361	// since this likely means we are falling out of the function.
3362	if (CreatedBranchToNormalDest)
3363	CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getUnknown());
3364	// No returns, but something is using the return value of the call. Just
3365	// nuke the result.
3366	CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
3367	}
3368
3369	// Since we are now done with the Call/Invoke, we can delete it.
3370	CB.eraseFromParent();
3371
3372	// If we inlined any musttail calls and the original return is now
3373	// unreachable, delete it. It can only contain a bitcast and ret.
3374	if (InlinedMustTailCalls && pred_empty(BB: AfterCallBB))
3375	AfterCallBB->eraseFromParent();
3376
3377	// We should always be able to fold the entry block of the function into the
3378	// single predecessor of the block...
3379	assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
3380	BasicBlock *CalleeEntry = cast<BranchInst>(Val: Br)->getSuccessor(i: `0`);
3381
3382	// Splice the code entry block into calling block, right before the
3383	// unconditional branch.
3384	CalleeEntry->replaceAllUsesWith(V: OrigBB); // Update PHI nodes
3385	OrigBB->splice(ToIt: Br->getIterator(), FromBB: CalleeEntry);
3386
3387	// Remove the unconditional branch.
3388	Br->eraseFromParent();
3389
3390	// Now we can remove the CalleeEntry block, which is now empty.
3391	CalleeEntry->eraseFromParent();
3392
3393	// If we inserted a phi node, check to see if it has a single value (e.g. all
3394	// the entries are the same or undef). If so, remove the PHI so it doesn't
3395	// block other optimizations.
3396	if (PHI) {
3397	AssumptionCache *AC =
3398	IFI.GetAssumptionCache ? &IFI.GetAssumptionCache (Caller) : nullptr*;
3399	auto &DL = Caller->getDataLayout();
3400	if (Value V = simplifyInstruction(I: PHI, Q: {DL, nullptr, nullptr*, AC})) {
3401	PHI->replaceAllUsesWith(V);
3402	PHI->eraseFromParent();
3403	}
3404	}
3405
3406	if (MergeAttributes)
3407	AttributeFuncs::mergeAttributesForInlining(Caller&: Caller, Callee: CalledFunc);
3408
3409	return InlineResult::success();
3410	}
3411

Browse the source code of llvm_projects/llvm/lib/Transforms/Utils/InlineFunction.cpp