InlineCost.cpp source code [llvm_projects/llvm/lib/Analysis/InlineCost.cpp]

1	//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements inline cost analysis.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/Analysis/InlineCost.h"
14	#include "llvm/ADT/STLExtras.h"
15	#include "llvm/ADT/SetVector.h"
16	#include "llvm/ADT/SmallPtrSet.h"
17	#include "llvm/ADT/SmallVector.h"
18	#include "llvm/ADT/Statistic.h"
19	#include "llvm/Analysis/AssumptionCache.h"
20	#include "llvm/Analysis/BlockFrequencyInfo.h"
21	#include "llvm/Analysis/CodeMetrics.h"
22	#include "llvm/Analysis/ConstantFolding.h"
23	#include "llvm/Analysis/DomConditionCache.h"
24	#include "llvm/Analysis/EphemeralValuesCache.h"
25	#include "llvm/Analysis/InstructionSimplify.h"
26	#include "llvm/Analysis/LoopInfo.h"
27	#include "llvm/Analysis/MemoryBuiltins.h"
28	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
29	#include "llvm/Analysis/ProfileSummaryInfo.h"
30	#include "llvm/Analysis/TargetLibraryInfo.h"
31	#include "llvm/Analysis/TargetTransformInfo.h"
32	#include "llvm/Analysis/ValueTracking.h"
33	#include "llvm/Config/llvm-config.h"
34	#include "llvm/IR/AssemblyAnnotationWriter.h"
35	#include "llvm/IR/CallingConv.h"
36	#include "llvm/IR/DataLayout.h"
37	#include "llvm/IR/Dominators.h"
38	#include "llvm/IR/GetElementPtrTypeIterator.h"
39	#include "llvm/IR/GlobalAlias.h"
40	#include "llvm/IR/InstVisitor.h"
41	#include "llvm/IR/IntrinsicInst.h"
42	#include "llvm/IR/Operator.h"
43	#include "llvm/IR/PatternMatch.h"
44	#include "llvm/Support/CommandLine.h"
45	#include "llvm/Support/Debug.h"
46	#include "llvm/Support/FormattedStream.h"
47	#include "llvm/Support/raw_ostream.h"
48	#include <climits>
49	#include <limits>
50	#include <optional>
51
52	using namespace llvm;
53
54	#define DEBUG_TYPE "inline-cost"
55
56	STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
57
58	static cl::opt<int>
59	DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(Val: `225`),
60	cl::desc ("Default amount of inlining to perform"));
61
62	// We introduce this option since there is a minor compile-time win by avoiding
63	// addition of TTI attributes (target-features in particular) to inline
64	// candidates when they are guaranteed to be the same as top level methods in
65	// some use cases. If we avoid adding the attribute, we need an option to avoid
66	// checking these attributes.
67	static cl::opt<bool> IgnoreTTIInlineCompatible(
68	"ignore-tti-inline-compatible", cl::Hidden, cl::init(Val: false),
69	cl::desc ("Ignore TTI attributes compatibility check between callee/caller "
70	"during inline cost calculation"));
71
72	static cl::opt<bool> PrintInstructionComments(
73	"print-instruction-comments", cl::Hidden, cl::init(Val: false),
74	cl::desc ("Prints comments for instruction based on inline cost analysis"));
75
76	static cl::opt<int> InlineThreshold(
77	"inline-threshold", cl::Hidden, cl::init(Val: `225`),
78	cl::desc ("Control the amount of inlining to perform (default = 225)"));
79
80	static cl::opt<int> HintThreshold(
81	"inlinehint-threshold", cl::Hidden, cl::init(Val: `325`),
82	cl::desc ("Threshold for inlining functions with inline hint"));
83
84	static cl::opt<int>
85	ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
86	cl::init(Val: `45`),
87	cl::desc ("Threshold for inlining cold callsites"));
88
89	static cl::opt<bool> InlineEnableCostBenefitAnalysis(
90	"inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(Val: false),
91	cl::desc ("Enable the cost-benefit analysis for the inliner"));
92
93	// InlineSavingsMultiplier overrides per TTI multipliers iff it is
94	// specified explicitly in command line options. This option is exposed
95	// for tuning and testing.
96	static cl::opt<int> InlineSavingsMultiplier(
97	"inline-savings-multiplier", cl::Hidden, cl::init(Val: `8`),
98	cl::desc ("Multiplier to multiply cycle savings by during inlining"));
99
100	// InlineSavingsProfitableMultiplier overrides per TTI multipliers iff it is
101	// specified explicitly in command line options. This option is exposed
102	// for tuning and testing.
103	static cl::opt<int> InlineSavingsProfitableMultiplier(
104	"inline-savings-profitable-multiplier", cl::Hidden, cl::init(Val: `4`),
105	cl::desc ("A multiplier on top of cycle savings to decide whether the "
106	"savings won't justify the cost"));
107
108	static cl::opt<int>
109	InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(Val: `100`),
110	cl::desc ("The maximum size of a callee that get's "
111	"inlined without sufficient cycle savings"));
112
113	// We introduce this threshold to help performance of instrumentation based
114	// PGO before we actually hook up inliner with analysis passes such as BPI and
115	// BFI.
116	static cl::opt<int> ColdThreshold(
117	"inlinecold-threshold", cl::Hidden, cl::init(Val: `45`),
118	cl::desc ("Threshold for inlining functions with cold attribute"));
119
120	static cl::opt<int>
121	HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(Val: `3000`),
122	cl::desc ("Threshold for hot callsites "));
123
124	static cl::opt<int> LocallyHotCallSiteThreshold(
125	"locally-hot-callsite-threshold", cl::Hidden, cl::init(Val: `525`),
126	cl::desc ("Threshold for locally hot callsites "));
127
128	static cl::opt<int> ColdCallSiteRelFreq(
129	"cold-callsite-rel-freq", cl::Hidden, cl::init(Val: `2`),
130	cl::desc ("Maximum block frequency, expressed as a percentage of caller's "
131	"entry frequency, for a callsite to be cold in the absence of "
132	"profile information."));
133
134	static cl::opt<uint64_t> HotCallSiteRelFreq(
135	"hot-callsite-rel-freq", cl::Hidden, cl::init(Val: `60`),
136	cl::desc ("Minimum block frequency, expressed as a multiple of caller's "
137	"entry frequency, for a callsite to be hot in the absence of "
138	"profile information."));
139
140	static cl::opt<int>
141	InstrCost("inline-instr-cost", cl::Hidden, cl::init(Val: `5`),
142	cl::desc ("Cost of a single instruction when inlining"));
143
144	static cl::opt<int>
145	MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(Val: `0`),
146	cl::desc ("Cost of load/store instruction when inlining"));
147
148	static cl::opt<int> CallPenalty(
149	"inline-call-penalty", cl::Hidden, cl::init(Val: `25`),
150	cl::desc ("Call penalty that is applied per callsite when inlining"));
151
152	static cl::opt<size_t>
153	StackSizeThreshold("inline-max-stacksize", cl::Hidden,
154	cl::init(Val: std::numeric_limits<size_t>::max()),
155	cl::desc ("Do not inline functions with a stack size "
156	"that exceeds the specified limit"));
157
158	static cl::opt<size_t> RecurStackSizeThreshold(
159	"recursive-inline-max-stacksize", cl::Hidden,
160	cl::init(Val: InlineConstants::TotalAllocaSizeRecursiveCaller),
161	cl::desc ("Do not inline recursive functions with a stack "
162	"size that exceeds the specified limit"));
163
164	static cl::opt<bool> OptComputeFullInlineCost(
165	"inline-cost-full", cl::Hidden,
166	cl::desc ("Compute the full inline cost of a call site even when the cost "
167	"exceeds the threshold."));
168
169	static cl::opt<bool> InlineCallerSupersetNoBuiltin(
170	"inline-caller-superset-nobuiltin", cl::Hidden, cl::init(Val: true),
171	cl::desc ("Allow inlining when caller has a superset of callee's nobuiltin "
172	"attributes."));
173
174	static cl::opt<bool> DisableGEPConstOperand(
175	"disable-gep-const-evaluation", cl::Hidden, cl::init(Val: false),
176	cl::desc ("Disables evaluation of GetElementPtr with constant operands"));
177
178	namespace llvm {
179	std::optional<int> getStringFnAttrAsInt(const Attribute &Attr) {
180	if (Attr.isValid()) {
181	int AttrValue = `0`;
182	if (!Attr.getValueAsString().getAsInteger(Radix: `10`, Result&: AttrValue))
183	return AttrValue;
184	}
185	return std::nullopt;
186	}
187
188	std::optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
189	return getStringFnAttrAsInt(Attr: CB.getFnAttr(Kind: AttrKind));
190	}
191
192	std::optional<int> getStringFnAttrAsInt(Function *F, StringRef AttrKind) {
193	return getStringFnAttrAsInt(Attr: F->getFnAttribute(Kind: AttrKind));
194	}
195
196	namespace InlineConstants {
197	int getInstrCost() { return InstrCost; }
198
199	} // namespace InlineConstants
200
201	} // namespace llvm
202
203	namespace {
204	class InlineCostCallAnalyzer;
205
206	// This struct is used to store information about inline cost of a
207	// particular instruction
208	struct InstructionCostDetail {
209	int CostBefore = `0`;
210	int CostAfter = `0`;
211	int ThresholdBefore = `0`;
212	int ThresholdAfter = `0`;
213
214	int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
215
216	int getCostDelta() const { return CostAfter - CostBefore; }
217
218	bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
219	};
220
221	class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {
222	private:
223	InlineCostCallAnalyzer *const ICCA;
224
225	public:
226	InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
227	void emitInstructionAnnot(const Instruction *I,
228	formatted_raw_ostream &OS) override;
229	};
230
231	/// Carry out call site analysis, in order to evaluate inlinability.
232	/// NOTE: the type is currently used as implementation detail of functions such
233	/// as llvm::getInlineCost. Note the function_ref constructor parameters - the
234	/// expectation is that they come from the outer scope, from the wrapper
235	/// functions. If we want to support constructing CallAnalyzer objects where
236	/// lambdas are provided inline at construction, or where the object needs to
237	/// otherwise survive past the scope of the provided functions, we need to
238	/// revisit the argument types.
239	class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
240	typedef InstVisitor<CallAnalyzer, bool> Base;
241	friend class InstVisitor<CallAnalyzer, bool>;
242
243	protected:
244	virtual ~CallAnalyzer() = default;
245	/// The TargetTransformInfo available for this compilation.
246	const TargetTransformInfo &TTI;
247
248	/// Getter for the cache of @llvm.assume intrinsics.
249	function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
250
251	/// Getter for BlockFrequencyInfo
252	function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
253
254	/// Getter for TargetLibraryInfo
255	function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
256
257	/// Profile summary information.
258	ProfileSummaryInfo *PSI;
259
260	/// The called function.
261	Function &F;
262
263	// Cache the DataLayout since we use it a lot.
264	const DataLayout &DL;
265
266	/// The OptimizationRemarkEmitter available for this compilation.
267	OptimizationRemarkEmitter *ORE;
268
269	/// The candidate callsite being analyzed. Please do not use this to do
270	/// analysis in the caller function; we want the inline cost query to be
271	/// easily cacheable. Instead, use the cover function paramHasAttr.
272	CallBase &CandidateCall;
273
274	/// Getter for the cache of ephemeral values.
275	function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache = nullptr;
276
277	/// Extension points for handling callsite features.
278	// Called before a basic block was analyzed.
279	virtual void onBlockStart(const BasicBlock *BB) {}
280
281	/// Called after a basic block was analyzed.
282	virtual void onBlockAnalyzed(const BasicBlock *BB) {}
283
284	/// Called before an instruction was analyzed
285	virtual void onInstructionAnalysisStart(const Instruction *I) {}
286
287	/// Called after an instruction was analyzed
288	virtual void onInstructionAnalysisFinish(const Instruction *I) {}
289
290	/// Called at the end of the analysis of the callsite. Return the outcome of
291	/// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
292	/// the reason it can't.
293	virtual InlineResult finalizeAnalysis() { return InlineResult::success(); }
294	/// Called when we're about to start processing a basic block, and every time
295	/// we are done processing an instruction. Return true if there is no point in
296	/// continuing the analysis (e.g. we've determined already the call site is
297	/// too expensive to inline)
298	virtual bool shouldStop() { return false; }
299
300	/// Called before the analysis of the callee body starts (with callsite
301	/// contexts propagated). It checks callsite-specific information. Return a
302	/// reason analysis can't continue if that's the case, or 'true' if it may
303	/// continue.
304	virtual InlineResult onAnalysisStart() { return InlineResult::success(); }
305	/// Called if the analysis engine decides SROA cannot be done for the given
306	/// alloca.
307	virtual void onDisableSROA(AllocaInst *Arg) {}
308
309	/// Called the analysis engine determines load elimination won't happen.
310	virtual void onDisableLoadElimination() {}
311
312	/// Called when we visit a CallBase, before the analysis starts. Return false
313	/// to stop further processing of the instruction.
314	virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
315
316	/// Called to account for a call.
317	virtual void onCallPenalty() {}
318
319	/// Called to account for a load or store.
320	virtual void onMemAccess(){};
321
322	/// Called to account for the expectation the inlining would result in a load
323	/// elimination.
324	virtual void onLoadEliminationOpportunity() {}
325
326	/// Called to account for the cost of argument setup for the Call in the
327	/// callee's body (not the callsite currently under analysis).
328	virtual void onCallArgumentSetup(const CallBase &Call) {}
329
330	/// Called to account for a load relative intrinsic.
331	virtual void onLoadRelativeIntrinsic() {}
332
333	/// Called to account for a lowered call.
334	virtual void onLoweredCall(Function F, CallBase &Call, bool* IsIndirectCall) {
335	}
336
337	/// Account for a jump table of given size. Return false to stop further
338	/// processing the switch instruction
339	virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
340
341	/// Account for a case cluster of given size. Return false to stop further
342	/// processing of the instruction.
343	virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
344
345	/// Called at the end of processing a switch instruction, with the given
346	/// number of case clusters.
347	virtual void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
348	bool DefaultDestUnreachable) {}
349
350	/// Called to account for any other instruction not specifically accounted
351	/// for.
352	virtual void onMissedSimplification() {}
353
354	/// Start accounting potential benefits due to SROA for the given alloca.
355	virtual void onInitializeSROAArg(AllocaInst *Arg) {}
356
357	/// Account SROA savings for the AllocaInst value.
358	virtual void onAggregateSROAUse(AllocaInst *V) {}
359
360	bool handleSROA(Value V, bool* DoNotDisable) {
361	// Check for SROA candidates in comparisons.
362	if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
363	if (DoNotDisable) {
364	onAggregateSROAUse(V: SROAArg);
365	return true;
366	}
367	disableSROAForArg(SROAArg);
368	}
369	return false;
370	}
371
372	bool IsCallerRecursive = false;
373	bool IsRecursiveCall = false;
374	bool ExposesReturnsTwice = false;
375	bool HasDynamicAlloca = false;
376	bool ContainsNoDuplicateCall = false;
377	bool HasReturn = false;
378	bool HasIndirectBr = false;
379	bool HasUninlineableIntrinsic = false;
380	bool InitsVargArgs = false;
381
382	/// Number of bytes allocated statically by the callee.
383	uint64_t AllocatedSize = `0`;
384	unsigned NumInstructions = `0`;
385	unsigned NumVectorInstructions = `0`;
386
387	/// While we walk the potentially-inlined instructions, we build up and
388	/// maintain a mapping of simplified values specific to this callsite. The
389	/// idea is to propagate any special information we have about arguments to
390	/// this call through the inlinable section of the function, and account for
391	/// likely simplifications post-inlining. The most important aspect we track
392	/// is CFG altering simplifications -- when we prove a basic block dead, that
393	/// can cause dramatic shifts in the cost of inlining a function.
394	/// Note: The simplified Value may be owned by the caller function.
395	DenseMap<Value , Value > SimplifiedValues;
396
397	/// Keep track of the values which map back (through function arguments) to
398	/// allocas on the caller stack which could be simplified through SROA.
399	DenseMap<Value , AllocaInst > SROAArgValues;
400
401	/// Keep track of Allocas for which we believe we may get SROA optimization.
402	DenseSet<AllocaInst *> EnabledSROAAllocas;
403
404	/// Keep track of values which map to a pointer base and constant offset.
405	DenseMap<Value , std::pair<Value , APInt>> ConstantOffsetPtrs;
406
407	/// Keep track of dead blocks due to the constant arguments.
408	SmallPtrSet<BasicBlock *, `16`> DeadBlocks;
409
410	/// The mapping of the blocks to their known unique successors due to the
411	/// constant arguments.
412	DenseMap<BasicBlock , BasicBlock > KnownSuccessors;
413
414	/// Model the elimination of repeated loads that is expected to happen
415	/// whenever we simplify away the stores that would otherwise cause them to be
416	/// loads.
417	bool EnableLoadElimination = true;
418
419	/// Whether we allow inlining for recursive call.
420	bool AllowRecursiveCall = false;
421
422	SmallPtrSet<Value *, `16`> LoadAddrSet;
423
424	AllocaInst getSROAArgForValueOrNull(Value V) const {
425	auto It = SROAArgValues.find(Val: V);
426	if (It == SROAArgValues.end() \|\| EnabledSROAAllocas.count(V: It ->second) == `0`)
427	return nullptr;
428	return It ->second;
429	}
430
431	/// Use a value in its given form directly if possible, otherwise try looking
432	/// for it in SimplifiedValues.
433	template <typename T> T getDirectOrSimplifiedValue(Value V) const {
434	if (auto *Direct = dyn_cast<T>(V))
435	return Direct;
436	return getSimplifiedValue<T>(V);
437	}
438
439	// Custom simplification helper routines.
440	bool isAllocaDerivedArg(Value *V);
441	void disableSROAForArg(AllocaInst *SROAArg);
442	void disableSROA(Value *V);
443	void findDeadBlocks(BasicBlock CurrBB, BasicBlock NextBB);
444	void disableLoadElimination();
445	bool isGEPFree(GetElementPtrInst &GEP);
446	bool canFoldInboundsGEP(GetElementPtrInst &I);
447	bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
448	bool simplifyCallSite(Function *F, CallBase &Call);
449	bool simplifyCmpInstForRecCall(CmpInst &Cmp);
450	bool simplifyInstruction(Instruction &I);
451	bool simplifyIntrinsicCallIsConstant(CallBase &CB);
452	bool simplifyIntrinsicCallObjectSize(CallBase &CB);
453	ConstantInt stripAndComputeInBoundsConstantOffsets(Value &V);
454	bool isLoweredToCall(Function *F, CallBase &Call);
455
456	/// Return true if the given argument to the function being considered for
457	/// inlining has the given attribute set either at the call site or the
458	/// function declaration. Primarily used to inspect call site specific
459	/// attributes since these can be more precise than the ones on the callee
460	/// itself.
461	bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
462
463	/// Return true if the given value is known non null within the callee if
464	/// inlined through this particular callsite.
465	bool isKnownNonNullInCallee(Value *V);
466
467	/// Return true if size growth is allowed when inlining the callee at \p Call.
468	bool allowSizeGrowth(CallBase &Call);
469
470	// Custom analysis routines.
471	InlineResult analyzeBlock(BasicBlock *BB,
472	const SmallPtrSetImpl<const Value *> &EphValues);
473
474	// Disable several entry points to the visitor so we don't accidentally use
475	// them by declaring but not defining them here.
476	void visit(Module *);
477	void visit(Module &);
478	void visit(Function *);
479	void visit(Function &);
480	void visit(BasicBlock *);
481	void visit(BasicBlock &);
482
483	// Provide base case for our instruction visit.
484	bool visitInstruction(Instruction &I);
485
486	// Our visit overrides.
487	bool visitAlloca(AllocaInst &I);
488	bool visitPHI(PHINode &I);
489	bool visitGetElementPtr(GetElementPtrInst &I);
490	bool visitBitCast(BitCastInst &I);
491	bool visitPtrToInt(PtrToIntInst &I);
492	bool visitIntToPtr(IntToPtrInst &I);
493	bool visitCastInst(CastInst &I);
494	bool visitCmpInst(CmpInst &I);
495	bool visitSub(BinaryOperator &I);
496	bool visitBinaryOperator(BinaryOperator &I);
497	bool visitFNeg(UnaryOperator &I);
498	bool visitLoad(LoadInst &I);
499	bool visitStore(StoreInst &I);
500	bool visitExtractValue(ExtractValueInst &I);
501	bool visitInsertValue(InsertValueInst &I);
502	bool visitCallBase(CallBase &Call);
503	bool visitReturnInst(ReturnInst &RI);
504	bool visitBranchInst(BranchInst &BI);
505	bool visitSelectInst(SelectInst &SI);
506	bool visitSwitchInst(SwitchInst &SI);
507	bool visitIndirectBrInst(IndirectBrInst &IBI);
508	bool visitResumeInst(ResumeInst &RI);
509	bool visitCleanupReturnInst(CleanupReturnInst &RI);
510	bool visitCatchReturnInst(CatchReturnInst &RI);
511	bool visitUnreachableInst(UnreachableInst &I);
512
513	public:
514	CallAnalyzer(
515	Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
516	function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
517	function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
518	function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
519	ProfileSummaryInfo PSI = nullptr*,
520	OptimizationRemarkEmitter ORE = nullptr*,
521	function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
522	nullptr)
523	: TTI(TTI), GetAssumptionCache (GetAssumptionCache), GetBFI (GetBFI),
524	GetTLI (GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
525	CandidateCall(Call), GetEphValuesCache (GetEphValuesCache) {}
526
527	InlineResult analyze();
528
529	/// Lookup simplified Value. May return a value owned by the caller.
530	Value getSimplifiedValueUnchecked(Value V) const {
531	return SimplifiedValues.lookup(Val: V);
532	}
533
534	/// Lookup simplified Value, but return nullptr if the simplified value is
535	/// owned by the caller.
536	template <typename T> T getSimplifiedValue(Value V) const {
537	Value *SimpleV = SimplifiedValues.lookup(Val: V);
538	if (!SimpleV)
539	return nullptr;
540
541	// Skip checks if we know T is a global. This has a small, but measurable
542	// impact on compile-time.
543	if constexpr (std::is_base_of_v<Constant, T>)
544	return dyn_cast<T>(SimpleV);
545
546	// Make sure the simplified Value is owned by this function
547	if (auto *I = dyn_cast<Instruction>(Val: SimpleV)) {
548	if (I->getFunction() != &F)
549	return nullptr;
550	} else if (auto *Arg = dyn_cast<Argument>(Val: SimpleV)) {
551	if (Arg->getParent() != &F)
552	return nullptr;
553	} else if (!isa<Constant>(Val: SimpleV))
554	return nullptr;
555	return dyn_cast<T>(SimpleV);
556	}
557
558	// Keep a bunch of stats about the cost savings found so we can print them
559	// out when debugging.
560	unsigned NumConstantArgs = `0`;
561	unsigned NumConstantOffsetPtrArgs = `0`;
562	unsigned NumAllocaArgs = `0`;
563	unsigned NumConstantPtrCmps = `0`;
564	unsigned NumConstantPtrDiffs = `0`;
565	unsigned NumInstructionsSimplified = `0`;
566
567	void dump();
568	};
569
570	// Considering forming a binary search, we should find the number of nodes
571	// which is same as the number of comparisons when lowered. For a given
572	// number of clusters, n, we can define a recursive function, f(n), to find
573	// the number of nodes in the tree. The recursion is :
574	// f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
575	// and f(n) = n, when n <= 3.
576	// This will lead a binary tree where the leaf should be either f(2) or f(3)
577	// when n > 3. So, the number of comparisons from leaves should be n, while
578	// the number of non-leaf should be :
579	// 2^(log2(n) - 1) - 1
580	// = 2^log2(n) 2^-1 - 1*
581	// = n / 2 - 1.
582	// Considering comparisons from leaf and non-leaf nodes, we can estimate the
583	// number of comparisons in a simple closed form :
584	// n + n / 2 - 1 = n 3 / 2 - 1*
585	int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
586	return `3` * static_cast<int64_t>(NumCaseCluster) / `2` - `1`;
587	}
588
589	/// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
590	/// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
591	class InlineCostCallAnalyzer final : public CallAnalyzer {
592	const bool ComputeFullInlineCost;
593	int LoadEliminationCost = `0`;
594	/// Bonus to be applied when percentage of vector instructions in callee is
595	/// high (see more details in updateThreshold).
596	int VectorBonus = `0`;
597	/// Bonus to be applied when the callee has only one reachable basic block.
598	int SingleBBBonus = `0`;
599
600	/// Tunable parameters that control the analysis.
601	const InlineParams &Params;
602
603	// This DenseMap stores the delta change in cost and threshold after
604	// accounting for the given instruction. The map is filled only with the
605	// flag PrintInstructionComments on.
606	DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap;
607
608	/// Upper bound for the inlining cost. Bonuses are being applied to account
609	/// for speculative "expected profit" of the inlining decision.
610	int Threshold = `0`;
611
612	/// The amount of StaticBonus applied.
613	int StaticBonusApplied = `0`;
614
615	/// Attempt to evaluate indirect calls to boost its inline cost.
616	const bool BoostIndirectCalls;
617
618	/// Ignore the threshold when finalizing analysis.
619	const bool IgnoreThreshold;
620
621	// True if the cost-benefit-analysis-based inliner is enabled.
622	const bool CostBenefitAnalysisEnabled;
623
624	/// Inlining cost measured in abstract units, accounts for all the
625	/// instructions expected to be executed for a given function invocation.
626	/// Instructions that are statically proven to be dead based on call-site
627	/// arguments are not counted here.
628	int Cost = `0`;
629
630	// The cumulative cost at the beginning of the basic block being analyzed. At
631	// the end of analyzing each basic block, "Cost - CostAtBBStart" represents
632	// the size of that basic block.
633	int CostAtBBStart = `0`;
634
635	// The static size of live but cold basic blocks. This is "static" in the
636	// sense that it's not weighted by profile counts at all.
637	int ColdSize = `0`;
638
639	// Whether inlining is decided by cost-threshold analysis.
640	bool DecidedByCostThreshold = false;
641
642	// Whether inlining is decided by cost-benefit analysis.
643	bool DecidedByCostBenefit = false;
644
645	// The cost-benefit pair computed by cost-benefit analysis.
646	std::optional<CostBenefitPair> CostBenefit;
647
648	bool SingleBB = true;
649
650	unsigned SROACostSavings = `0`;
651	unsigned SROACostSavingsLost = `0`;
652
653	/// The mapping of caller Alloca values to their accumulated cost savings. If
654	/// we have to disable SROA for one of the allocas, this tells us how much
655	/// cost must be added.
656	DenseMap<AllocaInst , int*> SROAArgCosts;
657
658	/// Return true if \p Call is a cold callsite.
659	bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
660
661	/// Update Threshold based on callsite properties such as callee
662	/// attributes and callee hotness for PGO builds. The Callee is explicitly
663	/// passed to support analyzing indirect calls whose target is inferred by
664	/// analysis.
665	void updateThreshold(CallBase &Call, Function &Callee);
666	/// Return a higher threshold if \p Call is a hot callsite.
667	std::optional<int> getHotCallSiteThreshold(CallBase &Call,
668	BlockFrequencyInfo *CallerBFI);
669
670	/// Handle a capped 'int' increment for Cost.
671	void addCost(int64_t Inc) {
672	Inc = std::clamp<int64_t>(val: Inc, INT_MIN, INT_MAX);
673	Cost = std::clamp<int64_t>(val: Inc + Cost, INT_MIN, INT_MAX);
674	}
675
676	void onDisableSROA(AllocaInst *Arg) override {
677	auto CostIt = SROAArgCosts.find(Val: Arg);
678	if (CostIt == SROAArgCosts.end())
679	return;
680	addCost(Inc: CostIt ->second);
681	SROACostSavings -= CostIt ->second;
682	SROACostSavingsLost += CostIt ->second;
683	SROAArgCosts.erase(I: CostIt);
684	}
685
686	void onDisableLoadElimination() override {
687	addCost(Inc: LoadEliminationCost);
688	LoadEliminationCost = `0`;
689	}
690
691	bool onCallBaseVisitStart(CallBase &Call) override {
692	if (std::optional<int> AttrCallThresholdBonus =
693	getStringFnAttrAsInt(CB&: Call, AttrKind: "call-threshold-bonus"))
694	Threshold += *AttrCallThresholdBonus;
695
696	if (std::optional<int> AttrCallCost =
697	getStringFnAttrAsInt(CB&: Call, AttrKind: "call-inline-cost")) {
698	addCost(Inc: *AttrCallCost);
699	// Prevent further processing of the call since we want to override its
700	// inline cost, not just add to it.
701	return false;
702	}
703	return true;
704	}
705
706	void onCallPenalty() override { addCost(Inc: CallPenalty); }
707
708	void onMemAccess() override { addCost(Inc: MemAccessCost); }
709
710	void onCallArgumentSetup(const CallBase &Call) override {
711	// Pay the price of the argument setup. We account for the average 1
712	// instruction per call argument setup here.
713	addCost(Inc: Call.arg_size() * InstrCost);
714	}
715	void onLoadRelativeIntrinsic() override {
716	// This is normally lowered to 4 LLVM instructions.
717	addCost(Inc: `3` * InstrCost);
718	}
719	void onLoweredCall(Function *F, CallBase &Call,
720	bool IsIndirectCall) override {
721	// We account for the average 1 instruction per call argument setup here.
722	addCost(Inc: Call.arg_size() * InstrCost);
723
724	// If we have a constant that we are calling as a function, we can peer
725	// through it and see the function target. This happens not infrequently
726	// during devirtualization and so we want to give it a hefty bonus for
727	// inlining, but cap that bonus in the event that inlining wouldn't pan out.
728	// Pretend to inline the function, with a custom threshold.
729	if (IsIndirectCall && BoostIndirectCalls) {
730	auto IndirectCallParams = Params;
731	IndirectCallParams.DefaultThreshold =
732	InlineConstants::IndirectCallThreshold;
733	/// FIXME: if InlineCostCallAnalyzer is derived from, this may need
734	/// to instantiate the derived class.
735	InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
736	GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
737	false);
738	if (CA.analyze().isSuccess()) {
739	// We were able to inline the indirect call! Subtract the cost from the
740	// threshold to get the bonus we want to apply, but don't go below zero.
741	Cost -= std::max(a: `0`, b: CA.getThreshold() - CA.getCost());
742	}
743	} else
744	// Otherwise simply add the cost for merely making the call.
745	addCost(Inc: TTI.getInlineCallPenalty(F: CandidateCall.getCaller(), Call,
746	DefaultCallPenalty: CallPenalty));
747	}
748
749	void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
750	bool DefaultDestUnreachable) override {
751	// If suitable for a jump table, consider the cost for the table size and
752	// branch to destination.
753	// Maximum valid cost increased in this function.
754	if (JumpTableSize) {
755	// Suppose a default branch includes one compare and one conditional
756	// branch if it's reachable.
757	if (!DefaultDestUnreachable)
758	addCost(Inc: `2` * InstrCost);
759	// Suppose a jump table requires one load and one jump instruction.
760	int64_t JTCost =
761	static_cast<int64_t>(JumpTableSize) * InstrCost + `2` * InstrCost;
762	addCost(Inc: JTCost);
763	return;
764	}
765
766	if (NumCaseCluster <= `3`) {
767	// Suppose a comparison includes one compare and one conditional branch.
768	// We can reduce a set of instructions if the default branch is
769	// undefined.
770	addCost(Inc: (NumCaseCluster - DefaultDestUnreachable) * `2` * InstrCost);
771	return;
772	}
773
774	int64_t ExpectedNumberOfCompare =
775	getExpectedNumberOfCompare(NumCaseCluster);
776	int64_t SwitchCost = ExpectedNumberOfCompare * `2` * InstrCost;
777
778	addCost(Inc: SwitchCost);
779	}
780	void onMissedSimplification() override { addCost(Inc: InstrCost); }
781
782	void onInitializeSROAArg(AllocaInst *Arg) override {
783	assert(Arg != nullptr &&
784	"Should not initialize SROA costs for null value.");
785	auto SROAArgCost = TTI.getCallerAllocaCost(CB: &CandidateCall, AI: Arg);
786	SROACostSavings += SROAArgCost;
787	SROAArgCosts [Arg] = SROAArgCost;
788	}
789
790	void onAggregateSROAUse(AllocaInst *SROAArg) override {
791	auto CostIt = SROAArgCosts.find(Val: SROAArg);
792	assert(CostIt != SROAArgCosts.end() &&
793	"expected this argument to have a cost");
794	CostIt ->second += InstrCost;
795	SROACostSavings += InstrCost;
796	}
797
798	void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
799
800	void onBlockAnalyzed(const BasicBlock *BB) override {
801	if (CostBenefitAnalysisEnabled) {
802	// Keep track of the static size of live but cold basic blocks. For now,
803	// we define a cold basic block to be one that's never executed.
804	assert(GetBFI && "GetBFI must be available");
805	BlockFrequencyInfo *BFI = &(GetBFI (F));
806	assert(BFI && "BFI must be available");
807	auto ProfileCount = BFI->getBlockProfileCount(BB);
808	if (*ProfileCount == `0`)
809	ColdSize += Cost - CostAtBBStart;
810	}
811
812	auto *TI = BB->getTerminator();
813	// If we had any successors at this point, than post-inlining is likely to
814	// have them as well. Note that we assume any basic blocks which existed
815	// due to branches or switches which folded above will also fold after
816	// inlining.
817	if (SingleBB && TI->getNumSuccessors() > `1`) {
818	// Take off the bonus we applied to the threshold.
819	Threshold -= SingleBBBonus;
820	SingleBB = false;
821	}
822	}
823
824	void onInstructionAnalysisStart(const Instruction *I) override {
825	// This function is called to store the initial cost of inlining before
826	// the given instruction was assessed.
827	if (!PrintInstructionComments)
828	return;
829	auto &CostDetail = InstructionCostDetailMap [I];
830	CostDetail.CostBefore = Cost;
831	CostDetail.ThresholdBefore = Threshold;
832	}
833
834	void onInstructionAnalysisFinish(const Instruction *I) override {
835	// This function is called to find new values of cost and threshold after
836	// the instruction has been assessed.
837	if (!PrintInstructionComments)
838	return;
839	auto &CostDetail = InstructionCostDetailMap [I];
840	CostDetail.CostAfter = Cost;
841	CostDetail.ThresholdAfter = Threshold;
842	}
843
844	bool isCostBenefitAnalysisEnabled() {
845	if (!PSI \|\| !PSI->hasProfileSummary())
846	return false;
847
848	if (!GetBFI)
849	return false;
850
851	if (InlineEnableCostBenefitAnalysis.getNumOccurrences()) {
852	// Honor the explicit request from the user.
853	if (!InlineEnableCostBenefitAnalysis)
854	return false;
855	} else {
856	// Otherwise, require instrumentation profile.
857	if (!PSI->hasInstrumentationProfile())
858	return false;
859	}
860
861	auto *Caller = CandidateCall.getParent()->getParent();
862	if (!Caller->getEntryCount())
863	return false;
864
865	BlockFrequencyInfo CallerBFI = &(GetBFI (Caller));
866	if (!CallerBFI)
867	return false;
868
869	// For now, limit to hot call site.
870	if (!PSI->isHotCallSite(CB: CandidateCall, BFI: CallerBFI))
871	return false;
872
873	// Make sure we have a nonzero entry count.
874	auto EntryCount = F.getEntryCount();
875	if (!EntryCount \|\| !EntryCount ->getCount())
876	return false;
877
878	BlockFrequencyInfo *CalleeBFI = &(GetBFI (F));
879	if (!CalleeBFI)
880	return false;
881
882	return true;
883	}
884
885	// A helper function to choose between command line override and default.
886	unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const {
887	if (InlineSavingsMultiplier.getNumOccurrences())
888	return InlineSavingsMultiplier;
889	return TTI.getInliningCostBenefitAnalysisSavingsMultiplier();
890	}
891
892	// A helper function to choose between command line override and default.
893	unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
894	if (InlineSavingsProfitableMultiplier.getNumOccurrences())
895	return InlineSavingsProfitableMultiplier;
896	return TTI.getInliningCostBenefitAnalysisProfitableMultiplier();
897	}
898
899	void OverrideCycleSavingsAndSizeForTesting(APInt &CycleSavings, int &Size) {
900	if (std::optional<int> AttrCycleSavings = getStringFnAttrAsInt(
901	CB&: CandidateCall, AttrKind: "inline-cycle-savings-for-test")) {
902	CycleSavings = *AttrCycleSavings;
903	}
904
905	if (std::optional<int> AttrRuntimeCost = getStringFnAttrAsInt(
906	CB&: CandidateCall, AttrKind: "inline-runtime-cost-for-test")) {
907	Size = *AttrRuntimeCost;
908	}
909	}
910
911	// Determine whether we should inline the given call site, taking into account
912	// both the size cost and the cycle savings. Return std::nullopt if we don't
913	// have sufficient profiling information to determine.
914	std::optional<bool> costBenefitAnalysis() {
915	if (!CostBenefitAnalysisEnabled)
916	return std::nullopt;
917
918	// buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0
919	// for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by
920	// falling back to the cost-based metric.
921	// TODO: Improve this hacky condition.
922	if (Threshold == `0`)
923	return std::nullopt;
924
925	assert(GetBFI);
926	BlockFrequencyInfo *CalleeBFI = &(GetBFI (F));
927	assert(CalleeBFI);
928
929	// The cycle savings expressed as the sum of InstrCost
930	// multiplied by the estimated dynamic count of each instruction we can
931	// avoid. Savings come from the call site cost, such as argument setup and
932	// the call instruction, as well as the instructions that are folded.
933	//
934	// We use 128-bit APInt here to avoid potential overflow. This variable
935	// should stay well below 10^^24 (or 2^^80) in practice. This "worst" case
936	// assumes that we can avoid or fold a billion instructions, each with a
937	// profile count of 10^^15 -- roughly the number of cycles for a 24-hour
938	// period on a 4GHz machine.
939	APInt CycleSavings(`128`, `0`);
940
941	for (auto &BB : F) {
942	APInt CurrentSavings(`128`, `0`);
943	for (auto &I : BB) {
944	if (BranchInst *BI = dyn_cast<BranchInst>(Val: &I)) {
945	// Count a conditional branch as savings if it becomes unconditional.
946	if (BI->isConditional() &&
947	getSimplifiedValue<ConstantInt>(V: BI->getCondition())) {
948	CurrentSavings += InstrCost;
949	}
950	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: &I)) {
951	if (getSimplifiedValue<ConstantInt>(V: SI->getCondition()))
952	CurrentSavings += InstrCost;
953	} else if (Value *V = dyn_cast<Value>(Val: &I)) {
954	// Count an instruction as savings if we can fold it.
955	if (SimplifiedValues.count(Val: V)) {
956	CurrentSavings += InstrCost;
957	}
958	}
959	}
960
961	auto ProfileCount = CalleeBFI->getBlockProfileCount(BB: &BB);
962	CurrentSavings = ProfileCount;
963	CycleSavings += CurrentSavings;
964	}
965
966	// Compute the cycle savings per call.
967	auto EntryProfileCount = F.getEntryCount();
968	assert(EntryProfileCount && EntryProfileCount->getCount());
969	auto EntryCount = EntryProfileCount ->getCount();
970	CycleSavings += EntryCount / `2`;
971	CycleSavings = CycleSavings.udiv(RHS: EntryCount);
972
973	// Compute the total savings for the call site.
974	auto *CallerBB = CandidateCall.getParent();
975	BlockFrequencyInfo CallerBFI = &(GetBFI ((CallerBB->getParent())));
976	CycleSavings += getCallsiteCost(TTI, Call: this->CandidateCall, DL);
977	CycleSavings = CallerBFI->getBlockProfileCount(BB: CallerBB);
978
979	// Remove the cost of the cold basic blocks to model the runtime cost more
980	// accurately. Both machine block placement and function splitting could
981	// place cold blocks further from hot blocks.
982	int Size = Cost - ColdSize;
983
984	// Allow tiny callees to be inlined regardless of whether they meet the
985	// savings threshold.
986	Size = Size > InlineSizeAllowance ? Size - InlineSizeAllowance : `1`;
987
988	OverrideCycleSavingsAndSizeForTesting(CycleSavings, Size);
989	CostBenefit.emplace(args: APInt (`128`, Size), args&: CycleSavings);
990
991	// Let R be the ratio of CycleSavings to Size. We accept the inlining
992	// opportunity if R is really high and reject if R is really low. If R is
993	// somewhere in the middle, we fall back to the cost-based analysis.
994	//
995	// Specifically, let R = CycleSavings / Size, we accept the inlining
996	// opportunity if:
997	//
998	// PSI->getOrCompHotCountThreshold()
999	// R > -------------------------------------------------
1000	// getInliningCostBenefitAnalysisSavingsMultiplier()
1001	//
1002	// and reject the inlining opportunity if:
1003	//
1004	// PSI->getOrCompHotCountThreshold()
1005	// R <= ----------------------------------------------------
1006	// getInliningCostBenefitAnalysisProfitableMultiplier()
1007	//
1008	// Otherwise, we fall back to the cost-based analysis.
1009	//
1010	// Implementation-wise, use multiplication (CycleSavings Multiplier,*
1011	// HotCountThreshold Size) rather than division to avoid precision loss.*
1012	APInt Threshold(`128`, PSI->getOrCompHotCountThreshold());
1013	Threshold *= Size;
1014
1015	APInt UpperBoundCycleSavings = CycleSavings;
1016	UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
1017	if (UpperBoundCycleSavings.uge(RHS: Threshold))
1018	return true;
1019
1020	APInt LowerBoundCycleSavings = CycleSavings;
1021	LowerBoundCycleSavings *=
1022	getInliningCostBenefitAnalysisProfitableMultiplier();
1023	if (LowerBoundCycleSavings.ult(RHS: Threshold))
1024	return false;
1025
1026	// Otherwise, fall back to the cost-based analysis.
1027	return std::nullopt;
1028	}
1029
1030	InlineResult finalizeAnalysis() override {
1031	// Loops generally act a lot like calls in that they act like barriers to
1032	// movement, require a certain amount of setup, etc. So when optimising for
1033	// size, we penalise any call sites that perform loops. We do this after all
1034	// other costs here, so will likely only be dealing with relatively small
1035	// functions (and hence DT and LI will hopefully be cheap).
1036	auto *Caller = CandidateCall.getFunction();
1037	if (Caller->hasMinSize()) {
1038	DominatorTree DT(F);
1039	LoopInfo LI(DT);
1040	int NumLoops = `0`;
1041	for (Loop *L : LI) {
1042	// Ignore loops that will not be executed
1043	if (DeadBlocks.count(Ptr: L->getHeader()))
1044	continue;
1045	NumLoops++;
1046	}
1047	addCost(Inc: NumLoops * InlineConstants::LoopPenalty);
1048	}
1049
1050	// We applied the maximum possible vector bonus at the beginning. Now,
1051	// subtract the excess bonus, if any, from the Threshold before
1052	// comparing against Cost.
1053	if (NumVectorInstructions <= NumInstructions / `10`)
1054	Threshold -= VectorBonus;
1055	else if (NumVectorInstructions <= NumInstructions / `2`)
1056	Threshold -= VectorBonus / `2`;
1057
1058	if (std::optional<int> AttrCost =
1059	getStringFnAttrAsInt(CB&: CandidateCall, AttrKind: "function-inline-cost"))
1060	Cost = *AttrCost;
1061
1062	if (std::optional<int> AttrCostMult = getStringFnAttrAsInt(
1063	CB&: CandidateCall,
1064	AttrKind: InlineConstants::FunctionInlineCostMultiplierAttributeName))
1065	Cost = AttrCostMult;
1066
1067	if (std::optional<int> AttrThreshold =
1068	getStringFnAttrAsInt(CB&: CandidateCall, AttrKind: "function-inline-threshold"))
1069	Threshold = *AttrThreshold;
1070
1071	if (auto Result = costBenefitAnalysis()) {
1072	DecidedByCostBenefit = true;
1073	if (*Result)
1074	return InlineResult::success();
1075	else
1076	return InlineResult::failure(Reason: "Cost over threshold.");
1077	}
1078
1079	if (IgnoreThreshold)
1080	return InlineResult::success();
1081
1082	DecidedByCostThreshold = true;
1083	return Cost < std::max(a: `1`, b: Threshold)
1084	? InlineResult::success()
1085	: InlineResult::failure(Reason: "Cost over threshold.");
1086	}
1087
1088	bool shouldStop() override {
1089	if (IgnoreThreshold \|\| ComputeFullInlineCost)
1090	return false;
1091	// Bail out the moment we cross the threshold. This means we'll under-count
1092	// the cost, but only when undercounting doesn't matter.
1093	if (Cost < Threshold)
1094	return false;
1095	DecidedByCostThreshold = true;
1096	return true;
1097	}
1098
1099	void onLoadEliminationOpportunity() override {
1100	LoadEliminationCost += InstrCost;
1101	}
1102
1103	InlineResult onAnalysisStart() override {
1104	// Perform some tweaks to the cost and threshold based on the direct
1105	// callsite information.
1106
1107	// We want to more aggressively inline vector-dense kernels, so up the
1108	// threshold, and we'll lower it if the % of vector instructions gets too
1109	// low. Note that these bonuses are some what arbitrary and evolved over
1110	// time by accident as much as because they are principled bonuses.
1111	//
1112	// FIXME: It would be nice to remove all such bonuses. At least it would be
1113	// nice to base the bonus values on something more scientific.
1114	assert(NumInstructions == `0`);
1115	assert(NumVectorInstructions == `0`);
1116
1117	// Update the threshold based on callsite properties
1118	updateThreshold(Call&: CandidateCall, Callee&: F);
1119
1120	// While Threshold depends on commandline options that can take negative
1121	// values, we want to enforce the invariant that the computed threshold and
1122	// bonuses are non-negative.
1123	assert(Threshold >= `0`);
1124	assert(SingleBBBonus >= `0`);
1125	assert(VectorBonus >= `0`);
1126
1127	// Speculatively apply all possible bonuses to Threshold. If cost exceeds
1128	// this Threshold any time, and cost cannot decrease, we can stop processing
1129	// the rest of the function body.
1130	Threshold += (SingleBBBonus + VectorBonus);
1131
1132	// Give out bonuses for the callsite, as the instructions setting them up
1133	// will be gone after inlining.
1134	addCost(Inc: -getCallsiteCost(TTI, Call: this->CandidateCall, DL));
1135
1136	// If this function uses the coldcc calling convention, prefer not to inline
1137	// it.
1138	if (F.getCallingConv() == CallingConv::Cold)
1139	Cost += InlineConstants::ColdccPenalty;
1140
1141	LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n");
1142
1143	// Check if we're done. This can happen due to bonuses and penalties.
1144	if (Cost >= Threshold && !ComputeFullInlineCost)
1145	return InlineResult::failure(Reason: "high cost");
1146
1147	return InlineResult::success();
1148	}
1149
1150	public:
1151	InlineCostCallAnalyzer(
1152	Function &Callee, CallBase &Call, const InlineParams &Params,
1153	const TargetTransformInfo &TTI,
1154	function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
1155	function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
1156	function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
1157	ProfileSummaryInfo PSI = nullptr*,
1158	OptimizationRemarkEmitter ORE = nullptr, bool* BoostIndirect = true,
1159	bool IgnoreThreshold = false,
1160	function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
1161	nullptr)
1162	: CallAnalyzer (Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1163	ORE, GetEphValuesCache),
1164	ComputeFullInlineCost(OptComputeFullInlineCost \|\|
1165	Params.ComputeFullInlineCost \|\| ORE \|\|
1166	isCostBenefitAnalysisEnabled()),
1167	Params(Params), Threshold(Params.DefaultThreshold),
1168	BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1169	CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1170	Writer (this) {
1171	AllowRecursiveCall = *Params.AllowRecursiveCall;
1172	}
1173
1174	/// Annotation Writer for instruction details
1175	InlineCostAnnotationWriter Writer;
1176
1177	void dump();
1178
1179	// Prints the same analysis as dump(), but its definition is not dependent
1180	// on the build.
1181	void print(raw_ostream &OS);
1182
1183	std::optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
1184	auto It = InstructionCostDetailMap.find(Val: I);
1185	if (It != InstructionCostDetailMap.end())
1186	return It ->second;
1187	return std::nullopt;
1188	}
1189
1190	virtual ~InlineCostCallAnalyzer() = default;
1191	int getThreshold() const { return Threshold; }
1192	int getCost() const { return Cost; }
1193	int getStaticBonusApplied() const { return StaticBonusApplied; }
1194	std::optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
1195	bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
1196	bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
1197	};
1198
1199	// Return true if CB is the sole call to local function Callee.
1200	static bool isSoleCallToLocalFunction(const CallBase &CB,
1201	const Function &Callee) {
1202	return Callee.hasLocalLinkage() && Callee.hasOneLiveUse() &&
1203	&Callee == CB.getCalledFunction();
1204	}
1205
1206	class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
1207	private:
1208	InlineCostFeatures Cost = {};
1209
1210	// FIXME: These constants are taken from the heuristic-based cost visitor.
1211	// These should be removed entirely in a later revision to avoid reliance on
1212	// heuristics in the ML inliner.
1213	static constexpr int JTCostMultiplier = `2`;
1214	static constexpr int CaseClusterCostMultiplier = `2`;
1215	static constexpr int SwitchDefaultDestCostMultiplier = `2`;
1216	static constexpr int SwitchCostMultiplier = `2`;
1217
1218	// FIXME: These are taken from the heuristic-based cost visitor: we should
1219	// eventually abstract these to the CallAnalyzer to avoid duplication.
1220	unsigned SROACostSavingOpportunities = `0`;
1221	int VectorBonus = `0`;
1222	int SingleBBBonus = `0`;
1223	int Threshold = `5`;
1224
1225	DenseMap<AllocaInst , unsigned*> SROACosts;
1226
1227	void increment(InlineCostFeatureIndex Feature, int64_t Delta = `1`) {
1228	Cost [static_cast<size_t>(Feature)] += Delta;
1229	}
1230
1231	void set(InlineCostFeatureIndex Feature, int64_t Value) {
1232	Cost [static_cast<size_t>(Feature)] = Value;
1233	}
1234
1235	void onDisableSROA(AllocaInst *Arg) override {
1236	auto CostIt = SROACosts.find(Val: Arg);
1237	if (CostIt == SROACosts.end())
1238	return;
1239
1240	increment(Feature: InlineCostFeatureIndex::sroa_losses, Delta: CostIt ->second);
1241	SROACostSavingOpportunities -= CostIt ->second;
1242	SROACosts.erase(I: CostIt);
1243	}
1244
1245	void onDisableLoadElimination() override {
1246	set(Feature: InlineCostFeatureIndex::load_elimination, Value: `1`);
1247	}
1248
1249	void onCallPenalty() override {
1250	increment(Feature: InlineCostFeatureIndex::call_penalty, Delta: CallPenalty);
1251	}
1252
1253	void onCallArgumentSetup(const CallBase &Call) override {
1254	increment(Feature: InlineCostFeatureIndex::call_argument_setup,
1255	Delta: Call.arg_size() * InstrCost);
1256	}
1257
1258	void onLoadRelativeIntrinsic() override {
1259	increment(Feature: InlineCostFeatureIndex::load_relative_intrinsic, Delta: `3` * InstrCost);
1260	}
1261
1262	void onLoweredCall(Function *F, CallBase &Call,
1263	bool IsIndirectCall) override {
1264	increment(Feature: InlineCostFeatureIndex::lowered_call_arg_setup,
1265	Delta: Call.arg_size() * InstrCost);
1266
1267	if (IsIndirectCall) {
1268	InlineParams IndirectCallParams = {/ DefaultThreshold/ `0`,
1269	/HintThreshold/ {},
1270	/ColdThreshold/ {},
1271	/OptSizeThreshold/ {},
1272	/OptMinSizeThreshold/ {},
1273	/HotCallSiteThreshold/ {},
1274	/LocallyHotCallSiteThreshold/ {},
1275	/ColdCallSiteThreshold/ {},
1276	/ComputeFullInlineCost/ true,
1277	/EnableDeferral/ true};
1278	IndirectCallParams.DefaultThreshold =
1279	InlineConstants::IndirectCallThreshold;
1280
1281	InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
1282	GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
1283	false, true);
1284	if (CA.analyze().isSuccess()) {
1285	increment(Feature: InlineCostFeatureIndex::nested_inline_cost_estimate,
1286	Delta: CA.getCost());
1287	increment(Feature: InlineCostFeatureIndex::nested_inlines, Delta: `1`);
1288	}
1289	} else {
1290	onCallPenalty();
1291	}
1292	}
1293
1294	void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
1295	bool DefaultDestUnreachable) override {
1296	if (JumpTableSize) {
1297	if (!DefaultDestUnreachable)
1298	increment(Feature: InlineCostFeatureIndex::switch_default_dest_penalty,
1299	Delta: SwitchDefaultDestCostMultiplier * InstrCost);
1300	int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +
1301	JTCostMultiplier * InstrCost;
1302	increment(Feature: InlineCostFeatureIndex::jump_table_penalty, Delta: JTCost);
1303	return;
1304	}
1305
1306	if (NumCaseCluster <= `3`) {
1307	increment(Feature: InlineCostFeatureIndex::case_cluster_penalty,
1308	Delta: (NumCaseCluster - DefaultDestUnreachable) *
1309	CaseClusterCostMultiplier * InstrCost);
1310	return;
1311	}
1312
1313	int64_t ExpectedNumberOfCompare =
1314	getExpectedNumberOfCompare(NumCaseCluster);
1315
1316	int64_t SwitchCost =
1317	ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost;
1318	increment(Feature: InlineCostFeatureIndex::switch_penalty, Delta: SwitchCost);
1319	}
1320
1321	void onMissedSimplification() override {
1322	increment(Feature: InlineCostFeatureIndex::unsimplified_common_instructions,
1323	Delta: InstrCost);
1324	}
1325
1326	void onInitializeSROAArg(AllocaInst *Arg) override {
1327	auto SROAArgCost = TTI.getCallerAllocaCost(CB: &CandidateCall, AI: Arg);
1328	SROACosts [Arg] = SROAArgCost;
1329	SROACostSavingOpportunities += SROAArgCost;
1330	}
1331
1332	void onAggregateSROAUse(AllocaInst *Arg) override {
1333	SROACosts.find(Val: Arg)->second += InstrCost;
1334	SROACostSavingOpportunities += InstrCost;
1335	}
1336
1337	void onBlockAnalyzed(const BasicBlock *BB) override {
1338	if (BB->getTerminator()->getNumSuccessors() > `1`)
1339	set(Feature: InlineCostFeatureIndex::is_multiple_blocks, Value: `1`);
1340	Threshold -= SingleBBBonus;
1341	}
1342
1343	InlineResult finalizeAnalysis() override {
1344	auto *Caller = CandidateCall.getFunction();
1345	if (Caller->hasMinSize()) {
1346	DominatorTree DT(F);
1347	LoopInfo LI(DT);
1348	for (Loop *L : LI) {
1349	// Ignore loops that will not be executed
1350	if (DeadBlocks.count(Ptr: L->getHeader()))
1351	continue;
1352	increment(Feature: InlineCostFeatureIndex::num_loops,
1353	Delta: InlineConstants::LoopPenalty);
1354	}
1355	}
1356	set(Feature: InlineCostFeatureIndex::dead_blocks, Value: DeadBlocks.size());
1357	set(Feature: InlineCostFeatureIndex::simplified_instructions,
1358	Value: NumInstructionsSimplified);
1359	set(Feature: InlineCostFeatureIndex::constant_args, Value: NumConstantArgs);
1360	set(Feature: InlineCostFeatureIndex::constant_offset_ptr_args,
1361	Value: NumConstantOffsetPtrArgs);
1362	set(Feature: InlineCostFeatureIndex::sroa_savings, Value: SROACostSavingOpportunities);
1363
1364	if (NumVectorInstructions <= NumInstructions / `10`)
1365	Threshold -= VectorBonus;
1366	else if (NumVectorInstructions <= NumInstructions / `2`)
1367	Threshold -= VectorBonus / `2`;
1368
1369	set(Feature: InlineCostFeatureIndex::threshold, Value: Threshold);
1370
1371	return InlineResult::success();
1372	}
1373
1374	bool shouldStop() override { return false; }
1375
1376	void onLoadEliminationOpportunity() override {
1377	increment(Feature: InlineCostFeatureIndex::load_elimination, Delta: `1`);
1378	}
1379
1380	InlineResult onAnalysisStart() override {
1381	increment(Feature: InlineCostFeatureIndex::callsite_cost,
1382	Delta: -`1` * getCallsiteCost(TTI, Call: this->CandidateCall, DL));
1383
1384	set(Feature: InlineCostFeatureIndex::cold_cc_penalty,
1385	Value: (F.getCallingConv() == CallingConv::Cold));
1386
1387	set(Feature: InlineCostFeatureIndex::last_call_to_static_bonus,
1388	Value: isSoleCallToLocalFunction(CB: CandidateCall, Callee: F));
1389
1390	// FIXME: we shouldn't repeat this logic in both the Features and Cost
1391	// analyzer - instead, we should abstract it to a common method in the
1392	// CallAnalyzer
1393	int SingleBBBonusPercent = `50`;
1394	int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1395	Threshold += TTI.adjustInliningThreshold(CB: &CandidateCall);
1396	Threshold *= TTI.getInliningThresholdMultiplier();
1397	SingleBBBonus = Threshold * SingleBBBonusPercent / `100`;
1398	VectorBonus = Threshold * VectorBonusPercent / `100`;
1399	Threshold += (SingleBBBonus + VectorBonus);
1400
1401	return InlineResult::success();
1402	}
1403
1404	public:
1405	InlineCostFeaturesAnalyzer(
1406	const TargetTransformInfo &TTI,
1407	function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1408	function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1409	function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
1410	ProfileSummaryInfo PSI, OptimizationRemarkEmitter ORE, Function &Callee,
1411	CallBase &Call)
1412	: CallAnalyzer (Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI,
1413	PSI) {}
1414
1415	const InlineCostFeatures &features() const { return Cost; }
1416	};
1417
1418	} // namespace
1419
1420	/// Test whether the given value is an Alloca-derived function argument.
1421	bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
1422	return SROAArgValues.count(Val: V);
1423	}
1424
1425	void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1426	onDisableSROA(Arg: SROAArg);
1427	EnabledSROAAllocas.erase(V: SROAArg);
1428	disableLoadElimination();
1429	}
1430
1431	void InlineCostAnnotationWriter::emitInstructionAnnot(
1432	const Instruction *I, formatted_raw_ostream &OS) {
1433	// The cost of inlining of the given instruction is printed always.
1434	// The threshold delta is printed only when it is non-zero. It happens
1435	// when we decided to give a bonus at a particular instruction.
1436	std::optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
1437	if (!Record)
1438	OS << "; No analysis for the instruction";
1439	else {
1440	OS << "; cost before = " << Record ->CostBefore
1441	<< ", cost after = " << Record ->CostAfter
1442	<< ", threshold before = " << Record ->ThresholdBefore
1443	<< ", threshold after = " << Record ->ThresholdAfter << ", ";
1444	OS << "cost delta = " << Record ->getCostDelta();
1445	if (Record ->hasThresholdChanged())
1446	OS << ", threshold delta = " << Record ->getThresholdDelta();
1447	}
1448	auto V = ICCA->getSimplifiedValueUnchecked(V: const_cast<Instruction >(I));
1449	if (V) {
1450	OS << ", simplified to ";
1451	V->print(O&: OS, IsForDebug: true);
1452	if (auto *VI = dyn_cast<Instruction>(Val: V)) {
1453	if (VI->getFunction() != I->getFunction())
1454	OS << " (caller instruction)";
1455	} else if (auto *VArg = dyn_cast<Argument>(Val: V)) {
1456	if (VArg->getParent() != I->getFunction())
1457	OS << " (caller argument)";
1458	}
1459	}
1460	OS << "\n";
1461	}
1462
1463	/// If 'V' maps to a SROA candidate, disable SROA for it.
1464	void CallAnalyzer::disableSROA(Value *V) {
1465	if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
1466	disableSROAForArg(SROAArg);
1467	}
1468	}
1469
1470	void CallAnalyzer::disableLoadElimination() {
1471	if (EnableLoadElimination) {
1472	onDisableLoadElimination();
1473	EnableLoadElimination = false;
1474	}
1475	}
1476
1477	/// Accumulate a constant GEP offset into an APInt if possible.
1478	///
1479	/// Returns false if unable to compute the offset for any reason. Respects any
1480	/// simplified values known during the analysis of this callsite.
1481	bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
1482	unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(Ty: GEP.getType());
1483	assert(IntPtrWidth == Offset.getBitWidth());
1484
1485	for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
1486	GTI != GTE; ++GTI) {
1487	ConstantInt *OpC =
1488	getDirectOrSimplifiedValue<ConstantInt>(V: GTI.getOperand());
1489	if (!OpC)
1490	return false;
1491	if (OpC->isZero())
1492	continue;
1493
1494	// Handle a struct index, which adds its field offset to the pointer.
1495	if (StructType *STy = GTI.getStructTypeOrNull()) {
1496	unsigned ElementIdx = OpC->getZExtValue();
1497	const StructLayout *SL = DL.getStructLayout(Ty: STy);
1498	Offset += APInt (IntPtrWidth, SL->getElementOffset(Idx: ElementIdx));
1499	continue;
1500	}
1501
1502	APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(DL));
1503	Offset += OpC->getValue().sextOrTrunc(width: IntPtrWidth) * TypeSize;
1504	}
1505	return true;
1506	}
1507
1508	/// Use TTI to check whether a GEP is free.
1509	///
1510	/// Respects any simplified values known during the analysis of this callsite.
1511	bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
1512	SmallVector<Value *, `4`> Operands;
1513	Operands.push_back(Elt: GEP.getOperand(i_nocapture: `0`));
1514	for (const Use &Op : GEP.indices())
1515	if (Constant *SimpleOp = getSimplifiedValue<Constant>(V: Op))
1516	Operands.push_back(Elt: SimpleOp);
1517	else
1518	Operands.push_back(Elt: Op);
1519	return TTI.getInstructionCost(U: &GEP, Operands,
1520	CostKind: TargetTransformInfo::TCK_SizeAndLatency) ==
1521	TargetTransformInfo::TCC_Free;
1522	}
1523
1524	bool CallAnalyzer::visitAlloca(AllocaInst &I) {
1525	disableSROA(V: I.getOperand(i_nocapture: `0`));
1526
1527	// Check whether inlining will turn a dynamic alloca into a static
1528	// alloca and handle that case.
1529	if (I.isArrayAllocation()) {
1530	Constant *Size = getSimplifiedValue<Constant>(V: I.getArraySize());
1531	if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Val: Size)) {
1532	// Sometimes a dynamic alloca could be converted into a static alloca
1533	// after this constant prop, and become a huge static alloca on an
1534	// unconditional CFG path. Avoid inlining if this is going to happen above
1535	// a threshold.
1536	// FIXME: If the threshold is removed or lowered too much, we could end up
1537	// being too pessimistic and prevent inlining non-problematic code. This
1538	// could result in unintended perf regressions. A better overall strategy
1539	// is needed to track stack usage during inlining.
1540	Type *Ty = I.getAllocatedType();
1541	AllocatedSize = SaturatingMultiplyAdd(
1542	X: AllocSize->getLimitedValue(),
1543	Y: DL.getTypeAllocSize(Ty).getKnownMinValue(), A: AllocatedSize);
1544	if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline)
1545	HasDynamicAlloca = true;
1546	return false;
1547	}
1548	}
1549
1550	// Accumulate the allocated size.
1551	if (I.isStaticAlloca()) {
1552	Type *Ty = I.getAllocatedType();
1553	AllocatedSize = SaturatingAdd(X: DL.getTypeAllocSize(Ty).getKnownMinValue(),
1554	Y: AllocatedSize);
1555	}
1556
1557	// FIXME: This is overly conservative. Dynamic allocas are inefficient for
1558	// a variety of reasons, and so we would like to not inline them into
1559	// functions which don't currently have a dynamic alloca. This simply
1560	// disables inlining altogether in the presence of a dynamic alloca.
1561	if (!I.isStaticAlloca())
1562	HasDynamicAlloca = true;
1563
1564	return false;
1565	}
1566
1567	bool CallAnalyzer::visitPHI(PHINode &I) {
1568	// FIXME: We need to propagate SROA disabling* through phi nodes, even*
1569	// though we don't want to propagate it's bonuses. The idea is to disable
1570	// SROA if it might* be used in an inappropriate manner.*
1571
1572	// Phi nodes are always zero-cost.
1573	// FIXME: Pointer sizes may differ between different address spaces, so do we
1574	// need to use correct address space in the call to getPointerSizeInBits here?
1575	// Or could we skip the getPointerSizeInBits call completely? As far as I can
1576	// see the ZeroOffset is used as a dummy value, so we can probably use any
1577	// bit width for the ZeroOffset?
1578	APInt ZeroOffset = APInt::getZero(numBits: DL.getPointerSizeInBits(AS: `0`));
1579	bool CheckSROA = I.getType()->isPointerTy();
1580
1581	// Track the constant or pointer with constant offset we've seen so far.
1582	Constant FirstC = nullptr*;
1583	std::pair<Value , APInt> FirstBaseAndOffset = {nullptr*, ZeroOffset};
1584	Value FirstV = nullptr*;
1585
1586	for (unsigned i = `0`, e = I.getNumIncomingValues(); i != e; ++i) {
1587	BasicBlock *Pred = I.getIncomingBlock(i);
1588	// If the incoming block is dead, skip the incoming block.
1589	if (DeadBlocks.count(Ptr: Pred))
1590	continue;
1591	// If the parent block of phi is not the known successor of the incoming
1592	// block, skip the incoming block.
1593	BasicBlock *KnownSuccessor = KnownSuccessors [Pred];
1594	if (KnownSuccessor && KnownSuccessor != I.getParent())
1595	continue;
1596
1597	Value *V = I.getIncomingValue(i);
1598	// If the incoming value is this phi itself, skip the incoming value.
1599	if (&I == V)
1600	continue;
1601
1602	Constant *C = getDirectOrSimplifiedValue<Constant>(V);
1603
1604	std::pair<Value , APInt> BaseAndOffset = {nullptr*, ZeroOffset};
1605	if (!C && CheckSROA)
1606	BaseAndOffset = ConstantOffsetPtrs.lookup(Val: V);
1607
1608	if (!C && !BaseAndOffset.first)
1609	// The incoming value is neither a constant nor a pointer with constant
1610	// offset, exit early.
1611	return true;
1612
1613	if (FirstC) {
1614	if (FirstC == C)
1615	// If we've seen a constant incoming value before and it is the same
1616	// constant we see this time, continue checking the next incoming value.
1617	continue;
1618	// Otherwise early exit because we either see a different constant or saw
1619	// a constant before but we have a pointer with constant offset this time.
1620	return true;
1621	}
1622
1623	if (FirstV) {
1624	// The same logic as above, but check pointer with constant offset here.
1625	if (FirstBaseAndOffset == BaseAndOffset)
1626	continue;
1627	return true;
1628	}
1629
1630	if (C) {
1631	// This is the 1st time we've seen a constant, record it.
1632	FirstC = C;
1633	continue;
1634	}
1635
1636	// The remaining case is that this is the 1st time we've seen a pointer with
1637	// constant offset, record it.
1638	FirstV = V;
1639	FirstBaseAndOffset = BaseAndOffset;
1640	}
1641
1642	// Check if we can map phi to a constant.
1643	if (FirstC) {
1644	SimplifiedValues [&I] = FirstC;
1645	return true;
1646	}
1647
1648	// Check if we can map phi to a pointer with constant offset.
1649	if (FirstBaseAndOffset.first) {
1650	ConstantOffsetPtrs [&I] = FirstBaseAndOffset;
1651
1652	if (auto *SROAArg = getSROAArgForValueOrNull(V: FirstV))
1653	SROAArgValues [&I] = SROAArg;
1654	}
1655
1656	return true;
1657	}
1658
1659	/// Check we can fold GEPs of constant-offset call site argument pointers.
1660	/// This requires target data and inbounds GEPs.
1661	///
1662	/// \return true if the specified GEP can be folded.
1663	bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
1664	// Check if we have a base + offset for the pointer.
1665	std::pair<Value *, APInt> BaseAndOffset =
1666	ConstantOffsetPtrs.lookup(Val: I.getPointerOperand());
1667	if (!BaseAndOffset.first)
1668	return false;
1669
1670	// Check if the offset of this GEP is constant, and if so accumulate it
1671	// into Offset.
1672	if (!accumulateGEPOffset(GEP&: cast<GEPOperator>(Val&: I), Offset&: BaseAndOffset.second))
1673	return false;
1674
1675	// Add the result as a new mapping to Base + Offset.
1676	ConstantOffsetPtrs [&I] = BaseAndOffset;
1677
1678	return true;
1679	}
1680
1681	bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
1682	auto *SROAArg = getSROAArgForValueOrNull(V: I.getPointerOperand());
1683
1684	// Lambda to check whether a GEP's indices are all constant.
1685	auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
1686	for (const Use &Op : GEP.indices())
1687	if (!getDirectOrSimplifiedValue<Constant>(V: Op))
1688	return false;
1689	return true;
1690	};
1691
1692	if (!DisableGEPConstOperand)
1693	if (simplifyInstruction(I))
1694	return true;
1695
1696	if ((I.isInBounds() && canFoldInboundsGEP(I)) \|\| IsGEPOffsetConstant (I)) {
1697	if (SROAArg)
1698	SROAArgValues [&I] = SROAArg;
1699
1700	// Constant GEPs are modeled as free.
1701	return true;
1702	}
1703
1704	// Variable GEPs will require math and will disable SROA.
1705	if (SROAArg)
1706	disableSROAForArg(SROAArg);
1707	return isGEPFree(GEP&: I);
1708	}
1709
1710	// Simplify \p Cmp if RHS is const and we can ValueTrack LHS.
1711	// This handles the case only when the Cmp instruction is guarding a recursive
1712	// call that will cause the Cmp to fail/succeed for the recursive call.
1713	bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
1714	// Bail out if LHS is not a function argument or RHS is NOT const:
1715	if (!isa<Argument>(Val: Cmp.getOperand(i_nocapture: `0`)) \|\| !isa<Constant>(Val: Cmp.getOperand(i_nocapture: `1`)))
1716	return false;
1717	auto *CmpOp = Cmp.getOperand(i_nocapture: `0`);
1718	// Make sure that the callsite is recursive:
1719	if (CandidateCall.getCaller() != &F)
1720	return false;
1721	// Only handle the case when the callsite has a single predecessor:
1722	auto *CallBB = CandidateCall.getParent();
1723	auto *Predecessor = CallBB->getSinglePredecessor();
1724	if (!Predecessor)
1725	return false;
1726	// Check if the callsite is guarded by the same Cmp instruction:
1727	auto *Br = dyn_cast<BranchInst>(Val: Predecessor->getTerminator());
1728	if (!Br \|\| Br->isUnconditional() \|\| Br->getCondition() != &Cmp)
1729	return false;
1730
1731	// Check if there is any arg of the recursive callsite is affecting the cmp
1732	// instr:
1733	bool ArgFound = false;
1734	Value FuncArg = nullptr, CallArg = nullptr;
1735	for (unsigned ArgNum = `0`;
1736	ArgNum < F.arg_size() && ArgNum < CandidateCall.arg_size(); ArgNum++) {
1737	FuncArg = F.getArg(i: ArgNum);
1738	CallArg = CandidateCall.getArgOperand(i: ArgNum);
1739	if (FuncArg == CmpOp && CallArg != CmpOp) {
1740	ArgFound = true;
1741	break;
1742	}
1743	}
1744	if (!ArgFound)
1745	return false;
1746
1747	// Now we have a recursive call that is guarded by a cmp instruction.
1748	// Check if this cmp can be simplified:
1749	SimplifyQuery SQ(DL, dyn_cast<Instruction>(Val: CallArg));
1750	CondContext CC(&Cmp);
1751	CC.Invert = (CallBB != Br->getSuccessor(i: `0`));
1752	SQ.CC = &CC;
1753	CC.AffectedValues.insert(Ptr: FuncArg);
1754	Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
1755	I: cast<CmpInst>(Val: &Cmp), NewOps: {CallArg, Cmp.getOperand(i_nocapture: `1`)}, Q: SQ);
1756	if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(Val: SimplifiedInstruction)) {
1757	// Make sure that the BB of the recursive call is NOT the true successor
1758	// of the icmp. In other words, make sure that the recursion depth is 1.
1759	if ((ConstVal->isOne() && CC.Invert) \|\|
1760	(ConstVal->isZero() && !CC.Invert)) {
1761	SimplifiedValues [&Cmp] = ConstVal;
1762	return true;
1763	}
1764	}
1765	return false;
1766	}
1767
1768	/// Simplify \p I if its operands are constants and update SimplifiedValues.
1769	bool CallAnalyzer::simplifyInstruction(Instruction &I) {
1770	SmallVector<Constant *> COps;
1771	for (Value *Op : I.operands()) {
1772	Constant *COp = getDirectOrSimplifiedValue<Constant>(V: Op);
1773	if (!COp)
1774	return false;
1775	COps.push_back(Elt: COp);
1776	}
1777	auto *C = ConstantFoldInstOperands(I: &I, Ops: COps, DL);
1778	if (!C)
1779	return false;
1780	SimplifiedValues [&I] = C;
1781	return true;
1782	}
1783
1784	/// Try to simplify a call to llvm.is.constant.
1785	///
1786	/// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
1787	/// we expect calls of this specific intrinsic to be infrequent.
1788	///
1789	/// FIXME: Given that we know CB's parent (F) caller
1790	/// (CandidateCall->getParent()->getParent()), we might be able to determine
1791	/// whether inlining F into F's caller would change how the call to
1792	/// llvm.is.constant would evaluate.
1793	bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
1794	Value *Arg = CB.getArgOperand(i: `0`);
1795	auto *C = getDirectOrSimplifiedValue<Constant>(V: Arg);
1796
1797	Type *RT = CB.getFunctionType()->getReturnType();
1798	SimplifiedValues [&CB] = ConstantInt::get(Ty: RT, V: C ? `1` : `0`);
1799	return true;
1800	}
1801
1802	bool CallAnalyzer::simplifyIntrinsicCallObjectSize(CallBase &CB) {
1803	// As per the langref, "The fourth argument to llvm.objectsize determines if
1804	// the value should be evaluated at runtime."
1805	if (cast<ConstantInt>(Val: CB.getArgOperand(i: `3`))->isOne())
1806	return false;
1807
1808	Value V = lowerObjectSizeCall(ObjectSize: &cast<IntrinsicInst>(Val&: CB), DL, TLI: nullptr*,
1809	/MustSucceed=/true);
1810	Constant *C = dyn_cast_or_null<Constant>(Val: V);
1811	if (C)
1812	SimplifiedValues [&CB] = C;
1813	return C;
1814	}
1815
1816	bool CallAnalyzer::visitBitCast(BitCastInst &I) {
1817	// Propagate constants through bitcasts.
1818	if (simplifyInstruction(I))
1819	return true;
1820
1821	// Track base/offsets through casts
1822	std::pair<Value *, APInt> BaseAndOffset =
1823	ConstantOffsetPtrs.lookup(Val: I.getOperand(i_nocapture: `0`));
1824	// Casts don't change the offset, just wrap it up.
1825	if (BaseAndOffset.first)
1826	ConstantOffsetPtrs [&I] = BaseAndOffset;
1827
1828	// Also look for SROA candidates here.
1829	if (auto *SROAArg = getSROAArgForValueOrNull(V: I.getOperand(i_nocapture: `0`)))
1830	SROAArgValues [&I] = SROAArg;
1831
1832	// Bitcasts are always zero cost.
1833	return true;
1834	}
1835
1836	bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
1837	// Propagate constants through ptrtoint.
1838	if (simplifyInstruction(I))
1839	return true;
1840
1841	// Track base/offset pairs when converted to a plain integer provided the
1842	// integer is large enough to represent the pointer.
1843	unsigned IntegerSize = I.getType()->getScalarSizeInBits();
1844	unsigned AS = I.getOperand(i_nocapture: `0`)->getType()->getPointerAddressSpace();
1845	if (IntegerSize == DL.getPointerSizeInBits(AS)) {
1846	std::pair<Value *, APInt> BaseAndOffset =
1847	ConstantOffsetPtrs.lookup(Val: I.getOperand(i_nocapture: `0`));
1848	if (BaseAndOffset.first)
1849	ConstantOffsetPtrs [&I] = BaseAndOffset;
1850	}
1851
1852	// This is really weird. Technically, ptrtoint will disable SROA. However,
1853	// unless that ptrtoint is used* somewhere in the live basic blocks after*
1854	// inlining, it will be nuked, and SROA should proceed. All of the uses which
1855	// would block SROA would also block SROA if applied directly to a pointer,
1856	// and so we can just add the integer in here. The only places where SROA is
1857	// preserved either cannot fire on an integer, or won't in-and-of themselves
1858	// disable SROA (ext) w/o some later use that we would see and disable.
1859	if (auto *SROAArg = getSROAArgForValueOrNull(V: I.getOperand(i_nocapture: `0`)))
1860	SROAArgValues [&I] = SROAArg;
1861
1862	return TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency) ==
1863	TargetTransformInfo::TCC_Free;
1864	}
1865
1866	bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
1867	// Propagate constants through ptrtoint.
1868	if (simplifyInstruction(I))
1869	return true;
1870
1871	// Track base/offset pairs when round-tripped through a pointer without
1872	// modifications provided the integer is not too large.
1873	Value *Op = I.getOperand(i_nocapture: `0`);
1874	unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
1875	if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
1876	std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Val: Op);
1877	if (BaseAndOffset.first)
1878	ConstantOffsetPtrs [&I] = BaseAndOffset;
1879	}
1880
1881	// "Propagate" SROA here in the same manner as we do for ptrtoint above.
1882	if (auto *SROAArg = getSROAArgForValueOrNull(V: Op))
1883	SROAArgValues [&I] = SROAArg;
1884
1885	return TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency) ==
1886	TargetTransformInfo::TCC_Free;
1887	}
1888
1889	bool CallAnalyzer::visitCastInst(CastInst &I) {
1890	// Propagate constants through casts.
1891	if (simplifyInstruction(I))
1892	return true;
1893
1894	// Disable SROA in the face of arbitrary casts we don't explicitly list
1895	// elsewhere.
1896	disableSROA(V: I.getOperand(i_nocapture: `0`));
1897
1898	// If this is a floating-point cast, and the target says this operation
1899	// is expensive, this may eventually become a library call. Treat the cost
1900	// as such.
1901	switch (I.getOpcode()) {
1902	case Instruction::FPTrunc:
1903	case Instruction::FPExt:
1904	case Instruction::UIToFP:
1905	case Instruction::SIToFP:
1906	case Instruction::FPToUI:
1907	case Instruction::FPToSI:
1908	if (TTI.getFPOpCost(Ty: I.getType()) == TargetTransformInfo::TCC_Expensive)
1909	onCallPenalty();
1910	break;
1911	default:
1912	break;
1913	}
1914
1915	return TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency) ==
1916	TargetTransformInfo::TCC_Free;
1917	}
1918
1919	bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
1920	return CandidateCall.paramHasAttr(ArgNo: A->getArgNo(), Kind: Attr);
1921	}
1922
1923	bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
1924	// Does the call site* have the NonNull attribute set on an argument? We*
1925	// use the attribute on the call site to memoize any analysis done in the
1926	// caller. This will also trip if the callee function has a non-null
1927	// parameter attribute, but that's a less interesting case because hopefully
1928	// the callee would already have been simplified based on that.
1929	if (Argument *A = dyn_cast<Argument>(Val: V))
1930	if (paramHasAttr(A, Attr: Attribute::NonNull))
1931	return true;
1932
1933	// Is this an alloca in the caller? This is distinct from the attribute case
1934	// above because attributes aren't updated within the inliner itself and we
1935	// always want to catch the alloca derived case.
1936	if (isAllocaDerivedArg(V))
1937	// We can actually predict the result of comparisons between an
1938	// alloca-derived value and null. Note that this fires regardless of
1939	// SROA firing.
1940	return true;
1941
1942	return false;
1943	}
1944
1945	bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
1946	// If the normal destination of the invoke or the parent block of the call
1947	// site is unreachable-terminated, there is little point in inlining this
1948	// unless there is literally zero cost.
1949	// FIXME: Note that it is possible that an unreachable-terminated block has a
1950	// hot entry. For example, in below scenario inlining hot_call_X() may be
1951	// beneficial :
1952	// main() {
1953	// hot_call_1();
1954	// ...
1955	// hot_call_N()
1956	// exit(0);
1957	// }
1958	// For now, we are not handling this corner case here as it is rare in real
1959	// code. In future, we should elaborate this based on BPI and BFI in more
1960	// general threshold adjusting heuristics in updateThreshold().
1961	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &Call)) {
1962	if (isa<UnreachableInst>(Val: II->getNormalDest()->getTerminator()))
1963	return false;
1964	} else if (isa<UnreachableInst>(Val: Call.getParent()->getTerminator()))
1965	return false;
1966
1967	return true;
1968	}
1969
1970	bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call,
1971	BlockFrequencyInfo *CallerBFI) {
1972	// If global profile summary is available, then callsite's coldness is
1973	// determined based on that.
1974	if (PSI && PSI->hasProfileSummary())
1975	return PSI->isColdCallSite(CB: Call, BFI: CallerBFI);
1976
1977	// Otherwise we need BFI to be available.
1978	if (!CallerBFI)
1979	return false;
1980
1981	// Determine if the callsite is cold relative to caller's entry. We could
1982	// potentially cache the computation of scaled entry frequency, but the added
1983	// complexity is not worth it unless this scaling shows up high in the
1984	// profiles.
1985	const BranchProbability ColdProb(ColdCallSiteRelFreq, `100`);
1986	auto CallSiteBB = Call.getParent();
1987	auto CallSiteFreq = CallerBFI->getBlockFreq(BB: CallSiteBB);
1988	auto CallerEntryFreq =
1989	CallerBFI->getBlockFreq(BB: &(Call.getCaller()->getEntryBlock()));
1990	return CallSiteFreq < CallerEntryFreq * ColdProb;
1991	}
1992
1993	std::optional<int>
1994	InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
1995	BlockFrequencyInfo *CallerBFI) {
1996
1997	// If global profile summary is available, then callsite's hotness is
1998	// determined based on that.
1999	if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CB: Call, BFI: CallerBFI))
2000	return Params.HotCallSiteThreshold;
2001
2002	// Otherwise we need BFI to be available and to have a locally hot callsite
2003	// threshold.
2004	if (!CallerBFI \|\| !Params.LocallyHotCallSiteThreshold)
2005	return std::nullopt;
2006
2007	// Determine if the callsite is hot relative to caller's entry. We could
2008	// potentially cache the computation of scaled entry frequency, but the added
2009	// complexity is not worth it unless this scaling shows up high in the
2010	// profiles.
2011	const BasicBlock *CallSiteBB = Call.getParent();
2012	BlockFrequency CallSiteFreq = CallerBFI->getBlockFreq(BB: CallSiteBB);
2013	BlockFrequency CallerEntryFreq = CallerBFI->getEntryFreq();
2014	std::optional<BlockFrequency> Limit = CallerEntryFreq.mul(Factor: HotCallSiteRelFreq);
2015	if (Limit && CallSiteFreq >= *Limit)
2016	return Params.LocallyHotCallSiteThreshold;
2017
2018	// Otherwise treat it normally.
2019	return std::nullopt;
2020	}
2021
2022	void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
2023	// If no size growth is allowed for this inlining, set Threshold to 0.
2024	if (!allowSizeGrowth(Call)) {
2025	Threshold = `0`;
2026	return;
2027	}
2028
2029	Function *Caller = Call.getCaller();
2030
2031	// return min(A, B) if B is valid.
2032	auto MinIfValid = [](int A, std::optional<int> B) {
2033	return B ? std::min(a: A, b: *B) : A;
2034	};
2035
2036	// return max(A, B) if B is valid.
2037	auto MaxIfValid = [](int A, std::optional<int> B) {
2038	return B ? std::max(a: A, b: *B) : A;
2039	};
2040
2041	// Various bonus percentages. These are multiplied by Threshold to get the
2042	// bonus values.
2043	// SingleBBBonus: This bonus is applied if the callee has a single reachable
2044	// basic block at the given callsite context. This is speculatively applied
2045	// and withdrawn if more than one basic block is seen.
2046	//
2047	// LstCallToStaticBonus: This large bonus is applied to ensure the inlining
2048	// of the last call to a static function as inlining such functions is
2049	// guaranteed to reduce code size.
2050	//
2051	// These bonus percentages may be set to 0 based on properties of the caller
2052	// and the callsite.
2053	int SingleBBBonusPercent = `50`;
2054	int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
2055	int LastCallToStaticBonus = TTI.getInliningLastCallToStaticBonus();
2056
2057	// Lambda to set all the above bonus and bonus percentages to 0.
2058	auto DisallowAllBonuses = [&]() {
2059	SingleBBBonusPercent = `0`;
2060	VectorBonusPercent = `0`;
2061	LastCallToStaticBonus = `0`;
2062	};
2063
2064	// Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
2065	// and reduce the threshold if the caller has the necessary attribute.
2066	if (Caller->hasMinSize()) {
2067	Threshold = MinIfValid (Threshold, Params.OptMinSizeThreshold);
2068	// For minsize, we want to disable the single BB bonus and the vector
2069	// bonuses, but not the last-call-to-static bonus. Inlining the last call to
2070	// a static function will, at the minimum, eliminate the parameter setup and
2071	// call/return instructions.
2072	SingleBBBonusPercent = `0`;
2073	VectorBonusPercent = `0`;
2074	} else if (Caller->hasOptSize())
2075	Threshold = MinIfValid (Threshold, Params.OptSizeThreshold);
2076
2077	// Adjust the threshold based on inlinehint attribute and profile based
2078	// hotness information if the caller does not have MinSize attribute.
2079	if (!Caller->hasMinSize()) {
2080	if (Callee.hasFnAttribute(Kind: Attribute::InlineHint))
2081	Threshold = MaxIfValid (Threshold, Params.HintThreshold);
2082
2083	// FIXME: After switching to the new passmanager, simplify the logic below
2084	// by checking only the callsite hotness/coldness as we will reliably
2085	// have local profile information.
2086	//
2087	// Callsite hotness and coldness can be determined if sample profile is
2088	// used (which adds hotness metadata to calls) or if caller's
2089	// BlockFrequencyInfo is available.
2090	BlockFrequencyInfo CallerBFI = GetBFI ? &(GetBFI (Caller)) : nullptr;
2091	auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
2092	if (!Caller->hasOptSize() && HotCallSiteThreshold) {
2093	LLVM_DEBUG(dbgs() << "Hot callsite.\n");
2094	// FIXME: This should update the threshold only if it exceeds the
2095	// current threshold, but AutoFDO + ThinLTO currently relies on this
2096	// behavior to prevent inlining of hot callsites during ThinLTO
2097	// compile phase.
2098	Threshold = *HotCallSiteThreshold;
2099	} else if (isColdCallSite(Call, CallerBFI)) {
2100	LLVM_DEBUG(dbgs() << "Cold callsite.\n");
2101	// Do not apply bonuses for a cold callsite including the
2102	// LastCallToStatic bonus. While this bonus might result in code size
2103	// reduction, it can cause the size of a non-cold caller to increase
2104	// preventing it from being inlined.
2105	DisallowAllBonuses ();
2106	Threshold = MinIfValid (Threshold, Params.ColdCallSiteThreshold);
2107	} else if (PSI) {
2108	// Use callee's global profile information only if we have no way of
2109	// determining this via callsite information.
2110	if (PSI->isFunctionEntryHot(F: &Callee)) {
2111	LLVM_DEBUG(dbgs() << "Hot callee.\n");
2112	// If callsite hotness can not be determined, we may still know
2113	// that the callee is hot and treat it as a weaker hint for threshold
2114	// increase.
2115	Threshold = MaxIfValid (Threshold, Params.HintThreshold);
2116	} else if (PSI->isFunctionEntryCold(F: &Callee)) {
2117	LLVM_DEBUG(dbgs() << "Cold callee.\n");
2118	// Do not apply bonuses for a cold callee including the
2119	// LastCallToStatic bonus. While this bonus might result in code size
2120	// reduction, it can cause the size of a non-cold caller to increase
2121	// preventing it from being inlined.
2122	DisallowAllBonuses ();
2123	Threshold = MinIfValid (Threshold, Params.ColdThreshold);
2124	}
2125	}
2126	}
2127
2128	Threshold += TTI.adjustInliningThreshold(CB: &Call);
2129
2130	// Finally, take the target-specific inlining threshold multiplier into
2131	// account.
2132	Threshold *= TTI.getInliningThresholdMultiplier();
2133
2134	SingleBBBonus = Threshold * SingleBBBonusPercent / `100`;
2135	VectorBonus = Threshold * VectorBonusPercent / `100`;
2136
2137	// If there is only one call of the function, and it has internal linkage,
2138	// the cost of inlining it drops dramatically. It may seem odd to update
2139	// Cost in updateThreshold, but the bonus depends on the logic in this method.
2140	if (isSoleCallToLocalFunction(CB: Call, Callee: F)) {
2141	Cost -= LastCallToStaticBonus;
2142	StaticBonusApplied = LastCallToStaticBonus;
2143	}
2144	}
2145
2146	bool CallAnalyzer::visitCmpInst(CmpInst &I) {
2147	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
2148	// First try to handle simplified comparisons.
2149	if (simplifyInstruction(I))
2150	return true;
2151
2152	// Try to handle comparison that can be simplified using ValueTracking.
2153	if (simplifyCmpInstForRecCall(Cmp&: I))
2154	return true;
2155
2156	if (I.getOpcode() == Instruction::FCmp)
2157	return false;
2158
2159	// Otherwise look for a comparison between constant offset pointers with
2160	// a common base.
2161	Value LHSBase, RHSBase;
2162	APInt LHSOffset, RHSOffset;
2163	std::tie(args&: LHSBase, args&: LHSOffset) = ConstantOffsetPtrs.lookup(Val: LHS);
2164	if (LHSBase) {
2165	std::tie(args&: RHSBase, args&: RHSOffset) = ConstantOffsetPtrs.lookup(Val: RHS);
2166	if (RHSBase && LHSBase == RHSBase) {
2167	// We have common bases, fold the icmp to a constant based on the
2168	// offsets.
2169	SimplifiedValues [&I] = ConstantInt::getBool(
2170	Ty: I.getType(),
2171	V: ICmpInst::compare(LHS: LHSOffset, RHS: RHSOffset, Pred: I.getPredicate()));
2172	++NumConstantPtrCmps;
2173	return true;
2174	}
2175	}
2176
2177	auto isImplicitNullCheckCmp = [](const CmpInst &I) {
2178	for (auto *User : I.users())
2179	if (auto *Instr = dyn_cast<Instruction>(Val: User))
2180	if (!Instr->getMetadata(KindID: LLVMContext::MD_make_implicit))
2181	return false;
2182	return true;
2183	};
2184
2185	// If the comparison is an equality comparison with null, we can simplify it
2186	// if we know the value (argument) can't be null
2187	if (I.isEquality() && isa<ConstantPointerNull>(Val: I.getOperand(i_nocapture: `1`))) {
2188	if (isKnownNonNullInCallee(V: I.getOperand(i_nocapture: `0`))) {
2189	bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
2190	SimplifiedValues [&I] = IsNotEqual ? ConstantInt::getTrue(Ty: I.getType())
2191	: ConstantInt::getFalse(Ty: I.getType());
2192	return true;
2193	}
2194	// Implicit null checks act as unconditional branches and their comparisons
2195	// should be treated as simplified and free of cost.
2196	if (isImplicitNullCheckCmp (I))
2197	return true;
2198	}
2199	return handleSROA(V: I.getOperand(i_nocapture: `0`), DoNotDisable: isa<ConstantPointerNull>(Val: I.getOperand(i_nocapture: `1`)));
2200	}
2201
2202	bool CallAnalyzer::visitSub(BinaryOperator &I) {
2203	// Try to handle a special case: we can fold computing the difference of two
2204	// constant-related pointers.
2205	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
2206	Value LHSBase, RHSBase;
2207	APInt LHSOffset, RHSOffset;
2208	std::tie(args&: LHSBase, args&: LHSOffset) = ConstantOffsetPtrs.lookup(Val: LHS);
2209	if (LHSBase) {
2210	std::tie(args&: RHSBase, args&: RHSOffset) = ConstantOffsetPtrs.lookup(Val: RHS);
2211	if (RHSBase && LHSBase == RHSBase) {
2212	// We have common bases, fold the subtract to a constant based on the
2213	// offsets.
2214	Constant *CLHS = ConstantInt::get(Context&: LHS->getContext(), V: LHSOffset);
2215	Constant *CRHS = ConstantInt::get(Context&: RHS->getContext(), V: RHSOffset);
2216	if (Constant *C = ConstantExpr::getSub(C1: CLHS, C2: CRHS)) {
2217	SimplifiedValues [&I] = C;
2218	++NumConstantPtrDiffs;
2219	return true;
2220	}
2221	}
2222	}
2223
2224	// Otherwise, fall back to the generic logic for simplifying and handling
2225	// instructions.
2226	return Base::visitSub(I);
2227	}
2228
2229	bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
2230	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
2231	Constant *CLHS = getDirectOrSimplifiedValue<Constant>(V: LHS);
2232	Constant *CRHS = getDirectOrSimplifiedValue<Constant>(V: RHS);
2233
2234	Value SimpleV = nullptr*;
2235	if (auto FI = dyn_cast<FPMathOperator>(Val: &I))
2236	SimpleV = simplifyBinOp(Opcode: I.getOpcode(), LHS: CLHS ? CLHS : LHS, RHS: CRHS ? CRHS : RHS,
2237	FMF: FI->getFastMathFlags(), Q: DL);
2238	else
2239	SimpleV =
2240	simplifyBinOp(Opcode: I.getOpcode(), LHS: CLHS ? CLHS : LHS, RHS: CRHS ? CRHS : RHS, Q: DL);
2241
2242	if (Constant *C = dyn_cast_or_null<Constant>(Val: SimpleV))
2243	SimplifiedValues [&I] = C;
2244
2245	if (SimpleV)
2246	return true;
2247
2248	// Disable any SROA on arguments to arbitrary, unsimplified binary operators.
2249	disableSROA(V: LHS);
2250	disableSROA(V: RHS);
2251
2252	// If the instruction is floating point, and the target says this operation
2253	// is expensive, this may eventually become a library call. Treat the cost
2254	// as such. Unless it's fneg which can be implemented with an xor.
2255	using namespace llvm::PatternMatch;
2256	if (I.getType()->isFloatingPointTy() &&
2257	TTI.getFPOpCost(Ty: I.getType()) == TargetTransformInfo::TCC_Expensive &&
2258	!match(V: &I, P: m_FNeg(X: m_Value())))
2259	onCallPenalty();
2260
2261	return false;
2262	}
2263
2264	bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
2265	Value *Op = I.getOperand(i_nocapture: `0`);
2266	Constant *COp = getDirectOrSimplifiedValue<Constant>(V: Op);
2267
2268	Value *SimpleV = simplifyFNegInst(
2269	Op: COp ? COp : Op, FMF: cast<FPMathOperator>(Val&: I).getFastMathFlags(), Q: DL);
2270
2271	if (Constant *C = dyn_cast_or_null<Constant>(Val: SimpleV))
2272	SimplifiedValues [&I] = C;
2273
2274	if (SimpleV)
2275	return true;
2276
2277	// Disable any SROA on arguments to arbitrary, unsimplified fneg.
2278	disableSROA(V: Op);
2279
2280	return false;
2281	}
2282
2283	bool CallAnalyzer::visitLoad(LoadInst &I) {
2284	if (handleSROA(V: I.getPointerOperand(), DoNotDisable: I.isSimple()))
2285	return true;
2286
2287	// If the data is already loaded from this address and hasn't been clobbered
2288	// by any stores or calls, this load is likely to be redundant and can be
2289	// eliminated.
2290	if (EnableLoadElimination &&
2291	!LoadAddrSet.insert(Ptr: I.getPointerOperand()).second && I.isUnordered()) {
2292	onLoadEliminationOpportunity();
2293	return true;
2294	}
2295
2296	onMemAccess();
2297	return false;
2298	}
2299
2300	bool CallAnalyzer::visitStore(StoreInst &I) {
2301	if (handleSROA(V: I.getPointerOperand(), DoNotDisable: I.isSimple()))
2302	return true;
2303
2304	// The store can potentially clobber loads and prevent repeated loads from
2305	// being eliminated.
2306	// FIXME:
2307	// 1. We can probably keep an initial set of eliminatable loads substracted
2308	// from the cost even when we finally see a store. We just need to disable
2309	// further* accumulation of elimination savings.*
2310	// 2. We should probably at some point thread MemorySSA for the callee into
2311	// this and then use that to actually compute really* precise savings.*
2312	disableLoadElimination();
2313
2314	onMemAccess();
2315	return false;
2316	}
2317
2318	bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
2319	Value *Op = I.getAggregateOperand();
2320
2321	// Special handling, because we want to simplify extractvalue with a
2322	// potential insertvalue from the caller.
2323	if (Value *SimpleOp = getSimplifiedValueUnchecked(V: Op)) {
2324	SimplifyQuery SQ(DL);
2325	Value *SimpleV = simplifyExtractValueInst(Agg: SimpleOp, Idxs: I.getIndices(), Q: SQ);
2326	if (SimpleV) {
2327	SimplifiedValues [&I] = SimpleV;
2328	return true;
2329	}
2330	}
2331
2332	// SROA can't look through these, but they may be free.
2333	return Base::visitExtractValue(I);
2334	}
2335
2336	bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
2337	// Constant folding for insert value is trivial.
2338	if (simplifyInstruction(I))
2339	return true;
2340
2341	// SROA can't look through these, but they may be free.
2342	return Base::visitInsertValue(I);
2343	}
2344
2345	/// Try to simplify a call site.
2346	///
2347	/// Takes a concrete function and callsite and tries to actually simplify it by
2348	/// analyzing the arguments and call itself with instsimplify. Returns true if
2349	/// it has simplified the callsite to some other entity (a constant), making it
2350	/// free.
2351	bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
2352	// FIXME: Using the instsimplify logic directly for this is inefficient
2353	// because we have to continually rebuild the argument list even when no
2354	// simplifications can be performed. Until that is fixed with remapping
2355	// inside of instsimplify, directly constant fold calls here.
2356	if (!canConstantFoldCallTo(Call: &Call, F))
2357	return false;
2358
2359	// Try to re-map the arguments to constants.
2360	SmallVector<Constant *, `4`> ConstantArgs;
2361	ConstantArgs.reserve(N: Call.arg_size());
2362	for (Value *I : Call.args()) {
2363	Constant *C = getDirectOrSimplifiedValue<Constant>(V: I);
2364	if (!C)
2365	return false; // This argument doesn't map to a constant.
2366
2367	ConstantArgs.push_back(Elt: C);
2368	}
2369	if (Constant *C = ConstantFoldCall(Call: &Call, F, Operands: ConstantArgs)) {
2370	SimplifiedValues [&Call] = C;
2371	return true;
2372	}
2373
2374	return false;
2375	}
2376
2377	bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) {
2378	const TargetLibraryInfo TLI = GetTLI ? &GetTLI (F) : nullptr;
2379	LibFunc LF;
2380	if (!TLI \|\| !TLI->getLibFunc(FDecl: *F, F&: LF) \|\| !TLI->has(F: LF))
2381	return TTI.isLoweredToCall(F);
2382
2383	switch (LF) {
2384	case LibFunc_memcpy_chk:
2385	case LibFunc_memmove_chk:
2386	case LibFunc_mempcpy_chk:
2387	case LibFunc_memset_chk: {
2388	// Calls to __memcpy_chk whose length is known to fit within the object
2389	// size will eventually be replaced by inline stores. Therefore, these
2390	// should not incur a call penalty. This is only really relevant on
2391	// platforms whose headers redirect memcpy to __memcpy_chk (e.g. Darwin), as
2392	// other platforms use memcpy intrinsics, which are already exempt from the
2393	// call penalty.
2394	auto *LenOp = getDirectOrSimplifiedValue<ConstantInt>(V: Call.getOperand(i_nocapture: `2`));
2395	auto *ObjSizeOp =
2396	getDirectOrSimplifiedValue<ConstantInt>(V: Call.getOperand(i_nocapture: `3`));
2397	if (LenOp && ObjSizeOp &&
2398	LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2399	return false;
2400	}
2401	break;
2402	}
2403	default:
2404	break;
2405	}
2406
2407	return TTI.isLoweredToCall(F);
2408	}
2409
2410	bool CallAnalyzer::visitCallBase(CallBase &Call) {
2411	if (!onCallBaseVisitStart(Call))
2412	return true;
2413
2414	if (Call.hasFnAttr(Kind: Attribute::ReturnsTwice) &&
2415	!F.hasFnAttribute(Kind: Attribute::ReturnsTwice)) {
2416	// This aborts the entire analysis.
2417	ExposesReturnsTwice = true;
2418	return false;
2419	}
2420	if (isa<CallInst>(Val: Call) && cast<CallInst>(Val&: Call).cannotDuplicate())
2421	ContainsNoDuplicateCall = true;
2422
2423	Function *F = Call.getCalledFunction();
2424	bool IsIndirectCall = !F;
2425	if (IsIndirectCall) {
2426	// Check if this happens to be an indirect function call to a known function
2427	// in this inline context. If not, we've done all we can.
2428	Value *Callee = Call.getCalledOperand();
2429	F = getSimplifiedValue<Function>(V: Callee);
2430	if (!F \|\| F->getFunctionType() != Call.getFunctionType()) {
2431	onCallArgumentSetup(Call);
2432
2433	if (!Call.onlyReadsMemory())
2434	disableLoadElimination();
2435	return Base::visitCallBase(I&: Call);
2436	}
2437	}
2438
2439	assert(F && "Expected a call to a known function");
2440
2441	// When we have a concrete function, first try to simplify it directly.
2442	if (simplifyCallSite(F, Call))
2443	return true;
2444
2445	// Next check if it is an intrinsic we know about.
2446	// FIXME: Lift this into part of the InstVisitor.
2447	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &Call)) {
2448	switch (II->getIntrinsicID()) {
2449	default:
2450	if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(I: II))
2451	disableLoadElimination();
2452	return Base::visitCallBase(I&: Call);
2453
2454	case Intrinsic::load_relative:
2455	onLoadRelativeIntrinsic();
2456	return false;
2457
2458	case Intrinsic::memset:
2459	case Intrinsic::memcpy:
2460	case Intrinsic::memmove:
2461	disableLoadElimination();
2462	// SROA can usually chew through these intrinsics, but they aren't free.
2463	return false;
2464	case Intrinsic::icall_branch_funnel:
2465	case Intrinsic::localescape:
2466	HasUninlineableIntrinsic = true;
2467	return false;
2468	case Intrinsic::vastart:
2469	InitsVargArgs = true;
2470	return false;
2471	case Intrinsic::launder_invariant_group:
2472	case Intrinsic::strip_invariant_group:
2473	if (auto *SROAArg = getSROAArgForValueOrNull(V: II->getOperand(i_nocapture: `0`)))
2474	SROAArgValues [II] = SROAArg;
2475	return true;
2476	case Intrinsic::is_constant:
2477	return simplifyIntrinsicCallIsConstant(CB&: Call);
2478	case Intrinsic::objectsize:
2479	return simplifyIntrinsicCallObjectSize(CB&: Call);
2480	}
2481	}
2482
2483	if (F == Call.getFunction()) {
2484	// This flag will fully abort the analysis, so don't bother with anything
2485	// else.
2486	IsRecursiveCall = true;
2487	if (!AllowRecursiveCall)
2488	return false;
2489	}
2490
2491	if (isLoweredToCall(F, Call)) {
2492	onLoweredCall(F, Call, IsIndirectCall);
2493	}
2494
2495	if (!(Call.onlyReadsMemory() \|\| (IsIndirectCall && F->onlyReadsMemory())))
2496	disableLoadElimination();
2497	return Base::visitCallBase(I&: Call);
2498	}
2499
2500	bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2501	// At least one return instruction will be free after inlining.
2502	bool Free = !HasReturn;
2503	HasReturn = true;
2504	return Free;
2505	}
2506
2507	bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
2508	// We model unconditional branches as essentially free -- they really
2509	// shouldn't exist at all, but handling them makes the behavior of the
2510	// inliner more regular and predictable. Interestingly, conditional branches
2511	// which will fold away are also free.
2512	return BI.isUnconditional() \|\|
2513	getDirectOrSimplifiedValue<ConstantInt>(V: BI.getCondition()) \|\|
2514	BI.getMetadata(KindID: LLVMContext::MD_make_implicit);
2515	}
2516
2517	bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2518	bool CheckSROA = SI.getType()->isPointerTy();
2519	Value *TrueVal = SI.getTrueValue();
2520	Value *FalseVal = SI.getFalseValue();
2521
2522	Constant *TrueC = getDirectOrSimplifiedValue<Constant>(V: TrueVal);
2523	Constant *FalseC = getDirectOrSimplifiedValue<Constant>(V: FalseVal);
2524	Constant *CondC = getSimplifiedValue<Constant>(V: SI.getCondition());
2525
2526	if (!CondC) {
2527	// Select C, X, X => X
2528	if (TrueC == FalseC && TrueC) {
2529	SimplifiedValues [&SI] = TrueC;
2530	return true;
2531	}
2532
2533	if (!CheckSROA)
2534	return Base::visitSelectInst(I&: SI);
2535
2536	std::pair<Value *, APInt> TrueBaseAndOffset =
2537	ConstantOffsetPtrs.lookup(Val: TrueVal);
2538	std::pair<Value *, APInt> FalseBaseAndOffset =
2539	ConstantOffsetPtrs.lookup(Val: FalseVal);
2540	if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2541	ConstantOffsetPtrs [&SI] = TrueBaseAndOffset;
2542
2543	if (auto *SROAArg = getSROAArgForValueOrNull(V: TrueVal))
2544	SROAArgValues [&SI] = SROAArg;
2545	return true;
2546	}
2547
2548	return Base::visitSelectInst(I&: SI);
2549	}
2550
2551	// Select condition is a constant.
2552	Value *SelectedV = CondC->isAllOnesValue() ? TrueVal
2553	: (CondC->isNullValue()) ? FalseVal
2554	: nullptr;
2555	if (!SelectedV) {
2556	// Condition is a vector constant that is not all 1s or all 0s. If all
2557	// operands are constants, ConstantFoldSelectInstruction() can handle the
2558	// cases such as select vectors.
2559	if (TrueC && FalseC) {
2560	if (auto *C = ConstantFoldSelectInstruction(Cond: CondC, V1: TrueC, V2: FalseC)) {
2561	SimplifiedValues [&SI] = C;
2562	return true;
2563	}
2564	}
2565	return Base::visitSelectInst(I&: SI);
2566	}
2567
2568	// Condition is either all 1s or all 0s. SI can be simplified.
2569	if (Constant *SelectedC = dyn_cast<Constant>(Val: SelectedV)) {
2570	SimplifiedValues [&SI] = SelectedC;
2571	return true;
2572	}
2573
2574	if (!CheckSROA)
2575	return true;
2576
2577	std::pair<Value *, APInt> BaseAndOffset =
2578	ConstantOffsetPtrs.lookup(Val: SelectedV);
2579	if (BaseAndOffset.first) {
2580	ConstantOffsetPtrs [&SI] = BaseAndOffset;
2581
2582	if (auto *SROAArg = getSROAArgForValueOrNull(V: SelectedV))
2583	SROAArgValues [&SI] = SROAArg;
2584	}
2585
2586	return true;
2587	}
2588
2589	bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2590	// We model unconditional switches as free, see the comments on handling
2591	// branches.
2592	if (getDirectOrSimplifiedValue<ConstantInt>(V: SI.getCondition()))
2593	return true;
2594
2595	// Assume the most general case where the switch is lowered into
2596	// either a jump table, bit test, or a balanced binary tree consisting of
2597	// case clusters without merging adjacent clusters with the same
2598	// destination. We do not consider the switches that are lowered with a mix
2599	// of jump table/bit test/binary search tree. The cost of the switch is
2600	// proportional to the size of the tree or the size of jump table range.
2601	//
2602	// NB: We convert large switches which are just used to initialize large phi
2603	// nodes to lookup tables instead in simplifycfg, so this shouldn't prevent
2604	// inlining those. It will prevent inlining in cases where the optimization
2605	// does not (yet) fire.
2606
2607	unsigned JumpTableSize = `0`;
2608	BlockFrequencyInfo BFI = GetBFI ? &(GetBFI (F)) : nullptr*;
2609	unsigned NumCaseCluster =
2610	TTI.getEstimatedNumberOfCaseClusters(SI, JTSize&: JumpTableSize, PSI, BFI);
2611
2612	onFinalizeSwitch(JumpTableSize, NumCaseCluster, DefaultDestUnreachable: SI.defaultDestUnreachable());
2613	return false;
2614	}
2615
2616	bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2617	// We never want to inline functions that contain an indirectbr. This is
2618	// incorrect because all the blockaddress's (in static global initializers
2619	// for example) would be referring to the original function, and this
2620	// indirect jump would jump from the inlined copy of the function into the
2621	// original function which is extremely undefined behavior.
2622	// FIXME: This logic isn't really right; we can safely inline functions with
2623	// indirectbr's as long as no other function or global references the
2624	// blockaddress of a block within the current function.
2625	HasIndirectBr = true;
2626	return false;
2627	}
2628
2629	bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2630	// FIXME: It's not clear that a single instruction is an accurate model for
2631	// the inline cost of a resume instruction.
2632	return false;
2633	}
2634
2635	bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2636	// FIXME: It's not clear that a single instruction is an accurate model for
2637	// the inline cost of a cleanupret instruction.
2638	return false;
2639	}
2640
2641	bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2642	// FIXME: It's not clear that a single instruction is an accurate model for
2643	// the inline cost of a catchret instruction.
2644	return false;
2645	}
2646
2647	bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
2648	// FIXME: It might be reasonably to discount the cost of instructions leading
2649	// to unreachable as they have the lowest possible impact on both runtime and
2650	// code size.
2651	return true; // No actual code is needed for unreachable.
2652	}
2653
2654	bool CallAnalyzer::visitInstruction(Instruction &I) {
2655	// Some instructions are free. All of the free intrinsics can also be
2656	// handled by SROA, etc.
2657	if (TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency) ==
2658	TargetTransformInfo::TCC_Free)
2659	return true;
2660
2661	// We found something we don't understand or can't handle. Mark any SROA-able
2662	// values in the operand list as no longer viable.
2663	for (const Use &Op : I.operands())
2664	disableSROA(V: Op);
2665
2666	return false;
2667	}
2668
2669	/// Analyze a basic block for its contribution to the inline cost.
2670	///
2671	/// This method walks the analyzer over every instruction in the given basic
2672	/// block and accounts for their cost during inlining at this callsite. It
2673	/// aborts early if the threshold has been exceeded or an impossible to inline
2674	/// construct has been detected. It returns false if inlining is no longer
2675	/// viable, and true if inlining remains viable.
2676	InlineResult
2677	CallAnalyzer::analyzeBlock(BasicBlock *BB,
2678	const SmallPtrSetImpl<const Value *> &EphValues) {
2679	for (Instruction &I : *BB) {
2680	// FIXME: Currently, the number of instructions in a function regardless of
2681	// our ability to simplify them during inline to constants or dead code,
2682	// are actually used by the vector bonus heuristic. As long as that's true,
2683	// we have to special case debug intrinsics here to prevent differences in
2684	// inlining due to debug symbols. Eventually, the number of unsimplified
2685	// instructions shouldn't factor into the cost computation, but until then,
2686	// hack around it here.
2687	// Similarly, skip pseudo-probes.
2688	if (I.isDebugOrPseudoInst())
2689	continue;
2690
2691	// Skip ephemeral values.
2692	if (EphValues.count(Ptr: &I))
2693	continue;
2694
2695	++NumInstructions;
2696	if (isa<ExtractElementInst>(Val: I) \|\| I.getType()->isVectorTy())
2697	++NumVectorInstructions;
2698
2699	// If the instruction simplified to a constant, there is no cost to this
2700	// instruction. Visit the instructions using our InstVisitor to account for
2701	// all of the per-instruction logic. The visit tree returns true if we
2702	// consumed the instruction in any way, and false if the instruction's base
2703	// cost should count against inlining.
2704	onInstructionAnalysisStart(I: &I);
2705
2706	if (Base::visit(I: &I))
2707	++NumInstructionsSimplified;
2708	else
2709	onMissedSimplification();
2710
2711	onInstructionAnalysisFinish(I: &I);
2712	using namespace ore;
2713	// If the visit this instruction detected an uninlinable pattern, abort.
2714	InlineResult IR = InlineResult::success();
2715	if (IsRecursiveCall && !AllowRecursiveCall)
2716	IR = InlineResult::failure(Reason: "recursive");
2717	else if (ExposesReturnsTwice)
2718	IR = InlineResult::failure(Reason: "exposes returns twice");
2719	else if (HasDynamicAlloca)
2720	IR = InlineResult::failure(Reason: "dynamic alloca");
2721	else if (HasIndirectBr)
2722	IR = InlineResult::failure(Reason: "indirect branch");
2723	else if (HasUninlineableIntrinsic)
2724	IR = InlineResult::failure(Reason: "uninlinable intrinsic");
2725	else if (InitsVargArgs)
2726	IR = InlineResult::failure(Reason: "varargs");
2727	if (!IR.isSuccess()) {
2728	if (ORE)
2729	ORE->emit(RemarkBuilder: [&]() {
2730	return OptimizationRemarkMissed (DEBUG_TYPE, "NeverInline",
2731	&CandidateCall)
2732	<< NV ("Callee", &F) << " has uninlinable pattern ("
2733	<< NV ("InlineResult", IR.getFailureReason())
2734	<< ") and cost is not fully computed";
2735	});
2736	return IR;
2737	}
2738
2739	// If the caller is a recursive function then we don't want to inline
2740	// functions which allocate a lot of stack space because it would increase
2741	// the caller stack usage dramatically.
2742	if (IsCallerRecursive && AllocatedSize > RecurStackSizeThreshold) {
2743	auto IR =
2744	InlineResult::failure(Reason: "recursive and allocates too much stack space");
2745	if (ORE)
2746	ORE->emit(RemarkBuilder: [&]() {
2747	return OptimizationRemarkMissed (DEBUG_TYPE, "NeverInline",
2748	&CandidateCall)
2749	<< NV ("Callee", &F) << " is "
2750	<< NV ("InlineResult", IR.getFailureReason())
2751	<< ". Cost is not fully computed";
2752	});
2753	return IR;
2754	}
2755
2756	if (shouldStop())
2757	return InlineResult::failure(
2758	Reason: "Call site analysis is not favorable to inlining.");
2759	}
2760
2761	return InlineResult::success();
2762	}
2763
2764	/// Compute the base pointer and cumulative constant offsets for V.
2765	///
2766	/// This strips all constant offsets off of V, leaving it the base pointer, and
2767	/// accumulates the total constant offset applied in the returned constant. It
2768	/// returns 0 if V is not a pointer, and returns the constant '0' if there are
2769	/// no constant offsets applied.
2770	ConstantInt CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value &V) {
2771	if (!V->getType()->isPointerTy())
2772	return nullptr;
2773
2774	unsigned AS = V->getType()->getPointerAddressSpace();
2775	unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
2776	APInt Offset = APInt::getZero(numBits: IntPtrWidth);
2777
2778	// Even though we don't look through PHI nodes, we could be called on an
2779	// instruction in an unreachable block, which may be on a cycle.
2780	SmallPtrSet<Value *, `4`> Visited;
2781	Visited.insert(Ptr: V);
2782	do {
2783	if (GEPOperator *GEP = dyn_cast<GEPOperator>(Val: V)) {
2784	if (!GEP->isInBounds() \|\| !accumulateGEPOffset(GEP&: *GEP, Offset))
2785	return nullptr;
2786	V = GEP->getPointerOperand();
2787	} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Val: V)) {
2788	if (GA->isInterposable())
2789	break;
2790	V = GA->getAliasee();
2791	} else {
2792	break;
2793	}
2794	assert(V->getType()->isPointerTy() && "Unexpected operand type!");
2795	} while (Visited.insert(Ptr: V).second);
2796
2797	Type *IdxPtrTy = DL.getIndexType(PtrTy: V->getType());
2798	return cast<ConstantInt>(Val: ConstantInt::get(Ty: IdxPtrTy, V: Offset));
2799	}
2800
2801	/// Find dead blocks due to deleted CFG edges during inlining.
2802	///
2803	/// If we know the successor of the current block, \p CurrBB, has to be \p
2804	/// NextBB, the other successors of \p CurrBB are dead if these successors have
2805	/// no live incoming CFG edges. If one block is found to be dead, we can
2806	/// continue growing the dead block list by checking the successors of the dead
2807	/// blocks to see if all their incoming edges are dead or not.
2808	void CallAnalyzer::findDeadBlocks(BasicBlock CurrBB, BasicBlock NextBB) {
2809	auto IsEdgeDead = [&](BasicBlock Pred, BasicBlock Succ) {
2810	// A CFG edge is dead if the predecessor is dead or the predecessor has a
2811	// known successor which is not the one under exam.
2812	if (DeadBlocks.count(Ptr: Pred))
2813	return true;
2814	BasicBlock *KnownSucc = KnownSuccessors [Pred];
2815	return KnownSucc && KnownSucc != Succ;
2816	};
2817
2818	auto IsNewlyDead = [&](BasicBlock *BB) {
2819	// If all the edges to a block are dead, the block is also dead.
2820	return (!DeadBlocks.count(Ptr: BB) &&
2821	llvm::all_of(Range: predecessors(BB),
2822	P: [&](BasicBlock P) { return* IsEdgeDead (P, BB); }));
2823	};
2824
2825	for (BasicBlock *Succ : successors(BB: CurrBB)) {
2826	if (Succ == NextBB \|\| !IsNewlyDead (Succ))
2827	continue;
2828	SmallVector<BasicBlock *, `4`> NewDead;
2829	NewDead.push_back(Elt: Succ);
2830	while (!NewDead.empty()) {
2831	BasicBlock *Dead = NewDead.pop_back_val();
2832	if (DeadBlocks.insert(Ptr: Dead).second)
2833	// Continue growing the dead block lists.
2834	for (BasicBlock *S : successors(BB: Dead))
2835	if (IsNewlyDead (S))
2836	NewDead.push_back(Elt: S);
2837	}
2838	}
2839	}
2840
2841	/// Analyze a call site for potential inlining.
2842	///
2843	/// Returns true if inlining this call is viable, and false if it is not
2844	/// viable. It computes the cost and adjusts the threshold based on numerous
2845	/// factors and heuristics. If this method returns false but the computed cost
2846	/// is below the computed threshold, then inlining was forcibly disabled by
2847	/// some artifact of the routine.
2848	InlineResult CallAnalyzer::analyze() {
2849	++NumCallsAnalyzed;
2850
2851	auto Result = onAnalysisStart();
2852	if (!Result.isSuccess())
2853	return Result;
2854
2855	if (F.empty())
2856	return InlineResult::success();
2857
2858	Function *Caller = CandidateCall.getFunction();
2859	// Check if the caller function is recursive itself.
2860	for (User *U : Caller->users()) {
2861	CallBase *Call = dyn_cast<CallBase>(Val: U);
2862	if (Call && Call->getFunction() == Caller) {
2863	IsCallerRecursive = true;
2864	break;
2865	}
2866	}
2867
2868	// Populate our simplified values by mapping from function arguments to call
2869	// arguments with known important simplifications.
2870	auto CAI = CandidateCall.arg_begin();
2871	for (Argument &FAI : F.args()) {
2872	assert(CAI != CandidateCall.arg_end());
2873	SimplifiedValues [&FAI] = *CAI;
2874	if (isa<Constant>(Val: *CAI))
2875	++NumConstantArgs;
2876
2877	Value PtrArg = CAI;
2878	if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(V&: PtrArg)) {
2879	ConstantOffsetPtrs [&FAI] = std::make_pair(x&: PtrArg, y: C->getValue());
2880
2881	// We can SROA any pointer arguments derived from alloca instructions.
2882	if (auto *SROAArg = dyn_cast<AllocaInst>(Val: PtrArg)) {
2883	SROAArgValues [&FAI] = SROAArg;
2884	onInitializeSROAArg(Arg: SROAArg);
2885	EnabledSROAAllocas.insert(V: SROAArg);
2886	}
2887	}
2888	++CAI;
2889	}
2890	NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
2891	NumAllocaArgs = SROAArgValues.size();
2892
2893	// Collecting the ephemeral values of `F` can be expensive, so use the
2894	// ephemeral values cache if available.
2895	SmallPtrSet<const Value *, `32`> EphValuesStorage;
2896	const SmallPtrSetImpl<const Value > EphValues = &EphValuesStorage;
2897	if (GetEphValuesCache)
2898	EphValues = &GetEphValuesCache (F).ephValues();
2899	else
2900	CodeMetrics::collectEphemeralValues(L: &F, AC: &GetAssumptionCache (F),
2901	EphValues&: EphValuesStorage);
2902
2903	// The worklist of live basic blocks in the callee after* inlining. We avoid*
2904	// adding basic blocks of the callee which can be proven to be dead for this
2905	// particular call site in order to get more accurate cost estimates. This
2906	// requires a somewhat heavyweight iteration pattern: we need to walk the
2907	// basic blocks in a breadth-first order as we insert live successors. To
2908	// accomplish this, prioritizing for small iterations because we exit after
2909	// crossing our threshold, we use a small-size optimized SetVector.
2910	typedef SmallSetVector<BasicBlock *, `16`> BBSetVector;
2911	BBSetVector BBWorklist;
2912	BBWorklist.insert(X: &F.getEntryBlock());
2913
2914	// Note that we must not* cache the size, this loop grows the worklist.*
2915	for (unsigned Idx = `0`; Idx != BBWorklist.size(); ++Idx) {
2916	if (shouldStop())
2917	break;
2918
2919	BasicBlock *BB = BBWorklist [Idx];
2920	if (BB->empty())
2921	continue;
2922
2923	onBlockStart(BB);
2924
2925	// Disallow inlining a blockaddress with uses other than strictly callbr.
2926	// A blockaddress only has defined behavior for an indirect branch in the
2927	// same function, and we do not currently support inlining indirect
2928	// branches. But, the inliner may not see an indirect branch that ends up
2929	// being dead code at a particular call site. If the blockaddress escapes
2930	// the function, e.g., via a global variable, inlining may lead to an
2931	// invalid cross-function reference.
2932	// FIXME: pr/39560: continue relaxing this overt restriction.
2933	if (BB->hasAddressTaken())
2934	for (User U : BlockAddress::get(BB: &BB)->users())
2935	if (!isa<CallBrInst>(Val: *U))
2936	return InlineResult::failure(Reason: "blockaddress used outside of callbr");
2937
2938	// Analyze the cost of this block. If we blow through the threshold, this
2939	// returns false, and we can bail on out.
2940	InlineResult IR = analyzeBlock(BB, EphValues: *EphValues);
2941	if (!IR.isSuccess())
2942	return IR;
2943
2944	Instruction *TI = BB->getTerminator();
2945
2946	// Add in the live successors by first checking whether we have terminator
2947	// that may be simplified based on the values simplified by this call.
2948	if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI)) {
2949	if (BI->isConditional()) {
2950	Value *Cond = BI->getCondition();
2951	if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(V: Cond)) {
2952	BasicBlock *NextBB = BI->getSuccessor(i: SimpleCond->isZero() ? `1` : `0`);
2953	BBWorklist.insert(X: NextBB);
2954	KnownSuccessors [BB] = NextBB;
2955	findDeadBlocks(CurrBB: BB, NextBB);
2956	continue;
2957	}
2958	}
2959	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
2960	Value *Cond = SI->getCondition();
2961	if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(V: Cond)) {
2962	BasicBlock *NextBB = SI->findCaseValue(C: SimpleCond)->getCaseSuccessor();
2963	BBWorklist.insert(X: NextBB);
2964	KnownSuccessors [BB] = NextBB;
2965	findDeadBlocks(CurrBB: BB, NextBB);
2966	continue;
2967	}
2968	}
2969
2970	// If we're unable to select a particular successor, just count all of
2971	// them.
2972	BBWorklist.insert_range(R: successors(BB));
2973
2974	onBlockAnalyzed(BB);
2975	}
2976
2977	// If this is a noduplicate call, we can still inline as long as
2978	// inlining this would cause the removal of the caller (so the instruction
2979	// is not actually duplicated, just moved).
2980	if (!isSoleCallToLocalFunction(CB: CandidateCall, Callee: F) && ContainsNoDuplicateCall)
2981	return InlineResult::failure(Reason: "noduplicate");
2982
2983	// If the callee's stack size exceeds the user-specified threshold,
2984	// do not let it be inlined.
2985	// The command line option overrides a limit set in the function attributes.
2986	size_t FinalStackSizeThreshold = StackSizeThreshold;
2987	if (!StackSizeThreshold.getNumOccurrences())
2988	if (std::optional<int> AttrMaxStackSize = getStringFnAttrAsInt(
2989	F: Caller, AttrKind: InlineConstants::MaxInlineStackSizeAttributeName))
2990	FinalStackSizeThreshold = *AttrMaxStackSize;
2991	if (AllocatedSize > FinalStackSizeThreshold)
2992	return InlineResult::failure(Reason: "stacksize");
2993
2994	return finalizeAnalysis();
2995	}
2996
2997	void InlineCostCallAnalyzer::print(raw_ostream &OS) {
2998	#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
2999	if (PrintInstructionComments)
3000	F.print(OS, AAW: &Writer);
3001	DEBUG_PRINT_STAT(NumConstantArgs);
3002	DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
3003	DEBUG_PRINT_STAT(NumAllocaArgs);
3004	DEBUG_PRINT_STAT(NumConstantPtrCmps);
3005	DEBUG_PRINT_STAT(NumConstantPtrDiffs);
3006	DEBUG_PRINT_STAT(NumInstructionsSimplified);
3007	DEBUG_PRINT_STAT(NumInstructions);
3008	DEBUG_PRINT_STAT(SROACostSavings);
3009	DEBUG_PRINT_STAT(SROACostSavingsLost);
3010	DEBUG_PRINT_STAT(LoadEliminationCost);
3011	DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
3012	DEBUG_PRINT_STAT(Cost);
3013	DEBUG_PRINT_STAT(Threshold);
3014	#undef DEBUG_PRINT_STAT
3015	}
3016
3017	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
3018	/// Dump stats about this call's analysis.
3019	LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(dbgs()); }
3020	#endif
3021
3022	/// Test that there are no attribute conflicts between Caller and Callee
3023	/// that prevent inlining.
3024	static bool functionsHaveCompatibleAttributes(
3025	Function Caller, Function Callee, TargetTransformInfo &TTI,
3026	function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
3027	// Note that CalleeTLI must be a copy not a reference. The legacy pass manager
3028	// caches the most recently created TLI in the TargetLibraryInfoWrapperPass
3029	// object, and always returns the same object (which is overwritten on each
3030	// GetTLI call). Therefore we copy the first result.
3031	auto CalleeTLI = GetTLI (*Callee);
3032	return (IgnoreTTIInlineCompatible \|\|
3033	TTI.areInlineCompatible(Caller, Callee)) &&
3034	GetTLI (*Caller).areInlineCompatible(CalleeTLI,
3035	AllowCallerSuperset: InlineCallerSupersetNoBuiltin) &&
3036	AttributeFuncs::areInlineCompatible(Caller: Caller, Callee: Callee);
3037	}
3038
3039	int llvm::getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call,
3040	const DataLayout &DL) {
3041	int64_t Cost = `0`;
3042	for (unsigned I = `0`, E = Call.arg_size(); I != E; ++I) {
3043	if (Call.isByValArgument(ArgNo: I)) {
3044	// We approximate the number of loads and stores needed by dividing the
3045	// size of the byval type by the target's pointer size.
3046	PointerType *PTy = cast<PointerType>(Val: Call.getArgOperand(i: I)->getType());
3047	unsigned TypeSize = DL.getTypeSizeInBits(Ty: Call.getParamByValType(ArgNo: I));
3048	unsigned AS = PTy->getAddressSpace();
3049	unsigned PointerSize = DL.getPointerSizeInBits(AS);
3050	// Ceiling division.
3051	unsigned NumStores = (TypeSize + PointerSize - `1`) / PointerSize;
3052
3053	// If it generates more than 8 stores it is likely to be expanded as an
3054	// inline memcpy so we take that as an upper bound. Otherwise we assume
3055	// one load and one store per word copied.
3056	// FIXME: The maxStoresPerMemcpy setting from the target should be used
3057	// here instead of a magic number of 8, but it's not available via
3058	// DataLayout.
3059	NumStores = std::min(a: NumStores, b: `8U`);
3060
3061	Cost += `2` * NumStores * InstrCost;
3062	} else {
3063	// For non-byval arguments subtract off one instruction per call
3064	// argument.
3065	Cost += InstrCost;
3066	}
3067	}
3068	// The call instruction also disappears after inlining.
3069	Cost += InstrCost;
3070	Cost += TTI.getInlineCallPenalty(F: Call.getCaller(), Call, DefaultCallPenalty: CallPenalty);
3071
3072	return std::min<int64_t>(a: Cost, INT_MAX);
3073	}
3074
3075	InlineCost llvm::getInlineCost(
3076	CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
3077	function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3078	function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3079	function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3080	ProfileSummaryInfo PSI, OptimizationRemarkEmitter ORE,
3081	function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache) {
3082	return getInlineCost(Call, Callee: Call.getCalledFunction(), Params, CalleeTTI,
3083	GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,
3084	GetEphValuesCache);
3085	}
3086
3087	std::optional<int> llvm::getInliningCostEstimate(
3088	CallBase &Call, TargetTransformInfo &CalleeTTI,
3089	function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3090	function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3091	function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3092	ProfileSummaryInfo PSI, OptimizationRemarkEmitter ORE) {
3093	const InlineParams Params = {/ DefaultThreshold/ `0`,
3094	/HintThreshold/ {},
3095	/ColdThreshold/ {},
3096	/OptSizeThreshold/ {},
3097	/OptMinSizeThreshold/ {},
3098	/HotCallSiteThreshold/ {},
3099	/LocallyHotCallSiteThreshold/ {},
3100	/ColdCallSiteThreshold/ {},
3101	/ComputeFullInlineCost/ true,
3102	/EnableDeferral/ true};
3103
3104	InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
3105	GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, true,
3106	/IgnoreThreshold/ true);
3107	auto R = CA.analyze();
3108	if (!R.isSuccess())
3109	return std::nullopt;
3110	return CA.getCost();
3111	}
3112
3113	std::optional<InlineCostFeatures> llvm::getInliningCostFeatures(
3114	CallBase &Call, TargetTransformInfo &CalleeTTI,
3115	function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3116	function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3117	function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3118	ProfileSummaryInfo PSI, OptimizationRemarkEmitter ORE) {
3119	InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,
3120	PSI, ORE, *Call.getCalledFunction(), Call);
3121	auto R = CFA.analyze();
3122	if (!R.isSuccess())
3123	return std::nullopt;
3124	return CFA.features();
3125	}
3126
3127	std::optional<InlineResult> llvm::getAttributeBasedInliningDecision(
3128	CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
3129	function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
3130
3131	// Cannot inline indirect calls.
3132	if (!Callee)
3133	return InlineResult::failure(Reason: "indirect call");
3134
3135	// When callee coroutine function is inlined into caller coroutine function
3136	// before coro-split pass,
3137	// coro-early pass can not handle this quiet well.
3138	// So we won't inline the coroutine function if it have not been unsplited
3139	if (Callee->isPresplitCoroutine())
3140	return InlineResult::failure(Reason: "unsplited coroutine call");
3141
3142	// Never inline calls with byval arguments that does not have the alloca
3143	// address space. Since byval arguments can be replaced with a copy to an
3144	// alloca, the inlined code would need to be adjusted to handle that the
3145	// argument is in the alloca address space (so it is a little bit complicated
3146	// to solve).
3147	unsigned AllocaAS = Callee->getDataLayout().getAllocaAddrSpace();
3148	for (unsigned I = `0`, E = Call.arg_size(); I != E; ++I)
3149	if (Call.isByValArgument(ArgNo: I)) {
3150	PointerType *PTy = cast<PointerType>(Val: Call.getArgOperand(i: I)->getType());
3151	if (PTy->getAddressSpace() != AllocaAS)
3152	return InlineResult::failure(Reason: "byval arguments without alloca"
3153	" address space");
3154	}
3155
3156	// Calls to functions with always-inline attributes should be inlined
3157	// whenever possible.
3158	if (Call.hasFnAttr(Kind: Attribute::AlwaysInline)) {
3159	if (Call.getAttributes().hasFnAttr(Kind: Attribute::NoInline))
3160	return InlineResult::failure(Reason: "noinline call site attribute");
3161
3162	auto IsViable = isInlineViable(Callee&: *Callee);
3163	if (IsViable.isSuccess())
3164	return InlineResult::success();
3165	return InlineResult::failure(Reason: IsViable.getFailureReason());
3166	}
3167
3168	// Never inline functions with conflicting attributes (unless callee has
3169	// always-inline attribute).
3170	Function *Caller = Call.getCaller();
3171	if (!functionsHaveCompatibleAttributes(Caller, Callee, TTI&: CalleeTTI, GetTLI))
3172	return InlineResult::failure(Reason: "conflicting attributes");
3173
3174	// Don't inline this call if the caller has the optnone attribute.
3175	if (Caller->hasOptNone())
3176	return InlineResult::failure(Reason: "optnone attribute");
3177
3178	// Don't inline a function that treats null pointer as valid into a caller
3179	// that does not have this attribute.
3180	if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
3181	return InlineResult::failure(Reason: "nullptr definitions incompatible");
3182
3183	// Don't inline functions which can be interposed at link-time.
3184	if (Callee->isInterposable())
3185	return InlineResult::failure(Reason: "interposable");
3186
3187	// Don't inline functions marked noinline.
3188	if (Callee->hasFnAttribute(Kind: Attribute::NoInline))
3189	return InlineResult::failure(Reason: "noinline function attribute");
3190
3191	// Don't inline call sites marked noinline.
3192	if (Call.isNoInline())
3193	return InlineResult::failure(Reason: "noinline call site attribute");
3194
3195	// Don't inline functions that are loader replaceable.
3196	if (Callee->hasFnAttribute(Kind: "loader-replaceable"))
3197	return InlineResult::failure(Reason: "loader replaceable function attribute");
3198
3199	return std::nullopt;
3200	}
3201
3202	InlineCost llvm::getInlineCost(
3203	CallBase &Call, Function Callee, const* InlineParams &Params,
3204	TargetTransformInfo &CalleeTTI,
3205	function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3206	function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3207	function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3208	ProfileSummaryInfo PSI, OptimizationRemarkEmitter ORE,
3209	function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache) {
3210
3211	auto UserDecision =
3212	llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
3213
3214	if (UserDecision) {
3215	if (UserDecision ->isSuccess())
3216	return llvm::InlineCost::getAlways(Reason: "always inline attribute");
3217	return llvm::InlineCost::getNever(Reason: UserDecision ->getFailureReason());
3218	}
3219
3220	LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
3221	<< "... (caller:" << Call.getCaller()->getName()
3222	<< ")\n");
3223
3224	InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3225	GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
3226	/BoostIndirect=/true, /IgnoreThreshold=/false,
3227	GetEphValuesCache);
3228	InlineResult ShouldInline = CA.analyze();
3229
3230	LLVM_DEBUG(CA.dump());
3231
3232	// Always make cost benefit based decision explicit.
3233	// We use always/never here since threshold is not meaningful,
3234	// as it's not what drives cost-benefit analysis.
3235	if (CA.wasDecidedByCostBenefit()) {
3236	if (ShouldInline.isSuccess())
3237	return InlineCost::getAlways(Reason: "benefit over cost",
3238	CostBenefit: CA.getCostBenefitPair());
3239	else
3240	return InlineCost::getNever(Reason: "cost over benefit", CostBenefit: CA.getCostBenefitPair());
3241	}
3242
3243	if (CA.wasDecidedByCostThreshold())
3244	return InlineCost::get(Cost: CA.getCost(), Threshold: CA.getThreshold(),
3245	StaticBonus: CA.getStaticBonusApplied());
3246
3247	// No details on how the decision was made, simply return always or never.
3248	return ShouldInline.isSuccess()
3249	? InlineCost::getAlways(Reason: "empty function")
3250	: InlineCost::getNever(Reason: ShouldInline.getFailureReason());
3251	}
3252
3253	InlineResult llvm::isInlineViable(Function &F) {
3254	bool ReturnsTwice = F.hasFnAttribute(Kind: Attribute::ReturnsTwice);
3255	for (BasicBlock &BB : F) {
3256	// Disallow inlining of functions which contain indirect branches.
3257	if (isa<IndirectBrInst>(Val: BB.getTerminator()))
3258	return InlineResult::failure(Reason: "contains indirect branches");
3259
3260	// Disallow inlining of blockaddresses which are used by non-callbr
3261	// instructions.
3262	if (BB.hasAddressTaken())
3263	for (User *U : BlockAddress::get(BB: &BB)->users())
3264	if (!isa<CallBrInst>(Val: *U))
3265	return InlineResult::failure(Reason: "blockaddress used outside of callbr");
3266
3267	for (auto &II : BB) {
3268	CallBase *Call = dyn_cast<CallBase>(Val: &II);
3269	if (!Call)
3270	continue;
3271
3272	// Disallow recursive calls.
3273	Function *Callee = Call->getCalledFunction();
3274	if (&F == Callee)
3275	return InlineResult::failure(Reason: "recursive call");
3276
3277	// Disallow calls which expose returns-twice to a function not previously
3278	// attributed as such.
3279	if (!ReturnsTwice && isa<CallInst>(Val: Call) &&
3280	cast<CallInst>(Val: Call)->canReturnTwice())
3281	return InlineResult::failure(Reason: "exposes returns-twice attribute");
3282
3283	if (Callee)
3284	switch (Callee->getIntrinsicID()) {
3285	default:
3286	break;
3287	case llvm::Intrinsic::icall_branch_funnel:
3288	// Disallow inlining of @llvm.icall.branch.funnel because current
3289	// backend can't separate call targets from call arguments.
3290	return InlineResult::failure(
3291	Reason: "disallowed inlining of @llvm.icall.branch.funnel");
3292	case llvm::Intrinsic::localescape:
3293	// Disallow inlining functions that call @llvm.localescape. Doing this
3294	// correctly would require major changes to the inliner.
3295	return InlineResult::failure(
3296	Reason: "disallowed inlining of @llvm.localescape");
3297	case llvm::Intrinsic::vastart:
3298	// Disallow inlining of functions that initialize VarArgs with
3299	// va_start.
3300	return InlineResult::failure(
3301	Reason: "contains VarArgs initialized with va_start");
3302	}
3303	}
3304	}
3305
3306	return InlineResult::success();
3307	}
3308
3309	// APIs to create InlineParams based on command line flags and/or other
3310	// parameters.
3311
3312	InlineParams llvm::getInlineParams(int Threshold) {
3313	InlineParams Params;
3314
3315	// This field is the threshold to use for a callee by default. This is
3316	// derived from one or more of:
3317	// optimization or size-optimization levels,*
3318	// a value passed to createFunctionInliningPass function, or*
3319	// the -inline-threshold flag.*
3320	// If the -inline-threshold flag is explicitly specified, that is used
3321	// irrespective of anything else.
3322	if (InlineThreshold.getNumOccurrences() > `0`)
3323	Params.DefaultThreshold = InlineThreshold;
3324	else
3325	Params.DefaultThreshold = Threshold;
3326
3327	// Set the HintThreshold knob from the -inlinehint-threshold.
3328	Params.HintThreshold = HintThreshold;
3329
3330	// Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
3331	Params.HotCallSiteThreshold = HotCallSiteThreshold;
3332
3333	// If the -locally-hot-callsite-threshold is explicitly specified, use it to
3334	// populate LocallyHotCallSiteThreshold. Later, we populate
3335	// Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
3336	// we know that optimization level is O3 (in the getInlineParams variant that
3337	// takes the opt and size levels).
3338	// FIXME: Remove this check (and make the assignment unconditional) after
3339	// addressing size regression issues at O2.
3340	if (LocallyHotCallSiteThreshold.getNumOccurrences() > `0`)
3341	Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
3342
3343	// Set the ColdCallSiteThreshold knob from the
3344	// -inline-cold-callsite-threshold.
3345	Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
3346
3347	// Set the OptMinSizeThreshold and OptSizeThreshold params only if the
3348	// -inlinehint-threshold commandline option is not explicitly given. If that
3349	// option is present, then its value applies even for callees with size and
3350	// minsize attributes.
3351	// If the -inline-threshold is not specified, set the ColdThreshold from the
3352	// -inlinecold-threshold even if it is not explicitly passed. If
3353	// -inline-threshold is specified, then -inlinecold-threshold needs to be
3354	// explicitly specified to set the ColdThreshold knob
3355	if (InlineThreshold.getNumOccurrences() == `0`) {
3356	Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold;
3357	Params.OptSizeThreshold = InlineConstants::OptSizeThreshold;
3358	Params.ColdThreshold = ColdThreshold;
3359	} else if (ColdThreshold.getNumOccurrences() > `0`) {
3360	Params.ColdThreshold = ColdThreshold;
3361	}
3362	return Params;
3363	}
3364
3365	InlineParams llvm::getInlineParams() {
3366	return getInlineParams(Threshold: DefaultThreshold);
3367	}
3368
3369	// Compute the default threshold for inlining based on the opt level and the
3370	// size opt level.
3371	static int computeThresholdFromOptLevels(unsigned OptLevel,
3372	unsigned SizeOptLevel) {
3373	if (OptLevel > `2`)
3374	return InlineConstants::OptAggressiveThreshold;
3375	if (SizeOptLevel == `1`) // -Os
3376	return InlineConstants::OptSizeThreshold;
3377	if (SizeOptLevel == `2`) // -Oz
3378	return InlineConstants::OptMinSizeThreshold;
3379	return DefaultThreshold;
3380	}
3381
3382	InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
3383	auto Params =
3384	getInlineParams(Threshold: computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
3385	// At O3, use the value of -locally-hot-callsite-threshold option to populate
3386	// Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
3387	// when it is specified explicitly.
3388	if (OptLevel > `2`)
3389	Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
3390	return Params;
3391	}
3392
3393	PreservedAnalyses
3394	InlineCostAnnotationPrinterPass::run(Function &F,
3395	FunctionAnalysisManager &FAM) {
3396	PrintInstructionComments = true;
3397	std::function<AssumptionCache &(Function &)> GetAssumptionCache =
3398	[&](Function &F) -> AssumptionCache & {
3399	return FAM.getResult<AssumptionAnalysis>(IR&: F);
3400	};
3401
3402	auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
3403	ProfileSummaryInfo *PSI =
3404	MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
3405	const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(IR&: F);
3406
3407	// FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
3408	// In the current implementation, the type of InlineParams doesn't matter as
3409	// the pass serves only for verification of inliner's decisions.
3410	// We can add a flag which determines InlineParams for this run. Right now,
3411	// the default InlineParams are used.
3412	const InlineParams Params = llvm::getInlineParams();
3413	for (BasicBlock &BB : F) {
3414	for (Instruction &I : BB) {
3415	if (auto *CB = dyn_cast<CallBase>(Val: &I)) {
3416	Function *CalledFunction = CB->getCalledFunction();
3417	if (!CalledFunction \|\| CalledFunction->isDeclaration())
3418	continue;
3419	OptimizationRemarkEmitter ORE(CalledFunction);
3420	InlineCostCallAnalyzer ICCA(CalledFunction, CB, Params, TTI,
3421	GetAssumptionCache, nullptr, nullptr, PSI,
3422	&ORE);
3423	ICCA.analyze();
3424	OS << " Analyzing call of " << CalledFunction->getName()
3425	<< "... (caller:" << CB->getCaller()->getName() << ")\n";
3426	ICCA.print(OS);
3427	OS << "\n";
3428	}
3429	}
3430	}
3431	return PreservedAnalyses::all();
3432	}
3433

Browse the source code of llvm_projects/llvm/lib/Analysis/InlineCost.cpp