InlineCost.cpp source code [llvm_projects/llvm/lib/Analysis/InlineCost.cpp]

1	//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements inline cost analysis.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/Analysis/InlineCost.h"
14	#include "llvm/ADT/STLExtras.h"
15	#include "llvm/ADT/SetVector.h"
16	#include "llvm/ADT/SmallPtrSet.h"
17	#include "llvm/ADT/SmallVector.h"
18	#include "llvm/ADT/Statistic.h"
19	#include "llvm/Analysis/AssumptionCache.h"
20	#include "llvm/Analysis/BlockFrequencyInfo.h"
21	#include "llvm/Analysis/CodeMetrics.h"
22	#include "llvm/Analysis/ConstantFolding.h"
23	#include "llvm/Analysis/DomConditionCache.h"
24	#include "llvm/Analysis/EphemeralValuesCache.h"
25	#include "llvm/Analysis/InstructionSimplify.h"
26	#include "llvm/Analysis/LoopInfo.h"
27	#include "llvm/Analysis/MemoryBuiltins.h"
28	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
29	#include "llvm/Analysis/ProfileSummaryInfo.h"
30	#include "llvm/Analysis/TargetLibraryInfo.h"
31	#include "llvm/Analysis/TargetTransformInfo.h"
32	#include "llvm/Analysis/ValueTracking.h"
33	#include "llvm/Config/llvm-config.h"
34	#include "llvm/IR/AssemblyAnnotationWriter.h"
35	#include "llvm/IR/CallingConv.h"
36	#include "llvm/IR/DataLayout.h"
37	#include "llvm/IR/Dominators.h"
38	#include "llvm/IR/GetElementPtrTypeIterator.h"
39	#include "llvm/IR/GlobalAlias.h"
40	#include "llvm/IR/InlineAsm.h"
41	#include "llvm/IR/InstVisitor.h"
42	#include "llvm/IR/IntrinsicInst.h"
43	#include "llvm/IR/Operator.h"
44	#include "llvm/IR/PatternMatch.h"
45	#include "llvm/Support/CommandLine.h"
46	#include "llvm/Support/Debug.h"
47	#include "llvm/Support/FormattedStream.h"
48	#include "llvm/Support/raw_ostream.h"
49	#include <climits>
50	#include <limits>
51	#include <optional>
52
53	using namespace llvm;
54
55	#define DEBUG_TYPE "inline-cost"
56
57	STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
58
59	static cl::opt<int>
60	DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(Val: `225`),
61	cl::desc("Default amount of inlining to perform"));
62
63	// We introduce this option since there is a minor compile-time win by avoiding
64	// addition of TTI attributes (target-features in particular) to inline
65	// candidates when they are guaranteed to be the same as top level methods in
66	// some use cases. If we avoid adding the attribute, we need an option to avoid
67	// checking these attributes.
68	static cl::opt<bool> IgnoreTTIInlineCompatible(
69	"ignore-tti-inline-compatible", cl::Hidden, cl::init(Val: false),
70	cl::desc("Ignore TTI attributes compatibility check between callee/caller "
71	"during inline cost calculation"));
72
73	static cl::opt<bool> PrintInstructionComments(
74	"print-instruction-comments", cl::Hidden, cl::init(Val: false),
75	cl::desc("Prints comments for instruction based on inline cost analysis"));
76
77	static cl::opt<int> InlineThreshold(
78	"inline-threshold", cl::Hidden, cl::init(Val: `225`),
79	cl::desc("Control the amount of inlining to perform (default = 225)"));
80
81	static cl::opt<int> HintThreshold(
82	"inlinehint-threshold", cl::Hidden, cl::init(Val: `325`),
83	cl::desc("Threshold for inlining functions with inline hint"));
84
85	static cl::opt<int>
86	ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
87	cl::init(Val: `45`),
88	cl::desc("Threshold for inlining cold callsites"));
89
90	static cl::opt<bool> InlineEnableCostBenefitAnalysis(
91	"inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(Val: false),
92	cl::desc("Enable the cost-benefit analysis for the inliner"));
93
94	// InlineSavingsMultiplier overrides per TTI multipliers iff it is
95	// specified explicitly in command line options. This option is exposed
96	// for tuning and testing.
97	static cl::opt<int> InlineSavingsMultiplier(
98	"inline-savings-multiplier", cl::Hidden, cl::init(Val: `8`),
99	cl::desc("Multiplier to multiply cycle savings by during inlining"));
100
101	// InlineSavingsProfitableMultiplier overrides per TTI multipliers iff it is
102	// specified explicitly in command line options. This option is exposed
103	// for tuning and testing.
104	static cl::opt<int> InlineSavingsProfitableMultiplier(
105	"inline-savings-profitable-multiplier", cl::Hidden, cl::init(Val: `4`),
106	cl::desc("A multiplier on top of cycle savings to decide whether the "
107	"savings won't justify the cost"));
108
109	static cl::opt<int>
110	InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(Val: `100`),
111	cl::desc("The maximum size of a callee that get's "
112	"inlined without sufficient cycle savings"));
113
114	// We introduce this threshold to help performance of instrumentation based
115	// PGO before we actually hook up inliner with analysis passes such as BPI and
116	// BFI.
117	static cl::opt<int> ColdThreshold(
118	"inlinecold-threshold", cl::Hidden, cl::init(Val: `45`),
119	cl::desc("Threshold for inlining functions with cold attribute"));
120
121	static cl::opt<int>
122	HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(Val: `3000`),
123	cl::desc("Threshold for hot callsites "));
124
125	static cl::opt<int> LocallyHotCallSiteThreshold(
126	"locally-hot-callsite-threshold", cl::Hidden, cl::init(Val: `525`),
127	cl::desc("Threshold for locally hot callsites "));
128
129	static cl::opt<int> ColdCallSiteRelFreq(
130	"cold-callsite-rel-freq", cl::Hidden, cl::init(Val: `2`),
131	cl::desc("Maximum block frequency, expressed as a percentage of caller's "
132	"entry frequency, for a callsite to be cold in the absence of "
133	"profile information."));
134
135	static cl::opt<uint64_t> HotCallSiteRelFreq(
136	"hot-callsite-rel-freq", cl::Hidden, cl::init(Val: `60`),
137	cl::desc("Minimum block frequency, expressed as a multiple of caller's "
138	"entry frequency, for a callsite to be hot in the absence of "
139	"profile information."));
140
141	static cl::opt<int>
142	InstrCost("inline-instr-cost", cl::Hidden, cl::init(Val: `5`),
143	cl::desc("Cost of a single instruction when inlining"));
144
145	static cl::opt<int> InlineAsmInstrCost(
146	"inline-asm-instr-cost", cl::Hidden, cl::init(Val: `0`),
147	cl::desc("Cost of a single inline asm instruction when inlining"));
148
149	static cl::opt<int>
150	MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(Val: `0`),
151	cl::desc("Cost of load/store instruction when inlining"));
152
153	static cl::opt<int> CallPenalty(
154	"inline-call-penalty", cl::Hidden, cl::init(Val: `25`),
155	cl::desc("Call penalty that is applied per callsite when inlining"));
156
157	static cl::opt<size_t>
158	StackSizeThreshold("inline-max-stacksize", cl::Hidden,
159	cl::init(Val: std::numeric_limits<size_t>::max()),
160	cl::desc("Do not inline functions with a stack size "
161	"that exceeds the specified limit"));
162
163	static cl::opt<size_t> RecurStackSizeThreshold(
164	"recursive-inline-max-stacksize", cl::Hidden,
165	cl::init(Val: InlineConstants::TotalAllocaSizeRecursiveCaller),
166	cl::desc("Do not inline recursive functions with a stack "
167	"size that exceeds the specified limit"));
168
169	static cl::opt<bool> OptComputeFullInlineCost(
170	"inline-cost-full", cl::Hidden,
171	cl::desc("Compute the full inline cost of a call site even when the cost "
172	"exceeds the threshold."));
173
174	static cl::opt<bool> InlineCallerSupersetNoBuiltin(
175	"inline-caller-superset-nobuiltin", cl::Hidden, cl::init(Val: true),
176	cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
177	"attributes."));
178
179	static cl::opt<bool> DisableGEPConstOperand(
180	"disable-gep-const-evaluation", cl::Hidden, cl::init(Val: false),
181	cl::desc("Disables evaluation of GetElementPtr with constant operands"));
182
183	static cl::opt<bool> InlineAllViableCalls(
184	"inline-all-viable-calls", cl::Hidden, cl::init(Val: false),
185	cl::desc("Inline all viable calls, even if they exceed the inlining "
186	"threshold"));
187	namespace llvm {
188	std::optional<int> getStringFnAttrAsInt(const Attribute &Attr) {
189	if (Attr.isValid()) {
190	int AttrValue = `0`;
191	if (!Attr.getValueAsString().getAsInteger(Radix: `10`, Result&: AttrValue))
192	return AttrValue;
193	}
194	return std::nullopt;
195	}
196
197	std::optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
198	return getStringFnAttrAsInt(Attr: CB.getFnAttr(Kind: AttrKind));
199	}
200
201	std::optional<int> getStringFnAttrAsInt(Function *F, StringRef AttrKind) {
202	return getStringFnAttrAsInt(Attr: F->getFnAttribute(Kind: AttrKind));
203	}
204
205	namespace InlineConstants {
206	int getInstrCost() { return InstrCost; }
207
208	} // namespace InlineConstants
209
210	} // namespace llvm
211
212	namespace {
213	class InlineCostCallAnalyzer;
214
215	// This struct is used to store information about inline cost of a
216	// particular instruction
217	struct InstructionCostDetail {
218	int CostBefore = `0`;
219	int CostAfter = `0`;
220	int ThresholdBefore = `0`;
221	int ThresholdAfter = `0`;
222
223	int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
224
225	int getCostDelta() const { return CostAfter - CostBefore; }
226
227	bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
228	};
229
230	class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {
231	private:
232	InlineCostCallAnalyzer *const ICCA;
233
234	public:
235	InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
236	void emitInstructionAnnot(const Instruction *I,
237	formatted_raw_ostream &OS) override;
238	};
239
240	/// Carry out call site analysis, in order to evaluate inlinability.
241	/// NOTE: the type is currently used as implementation detail of functions such
242	/// as llvm::getInlineCost. Note the function_ref constructor parameters - the
243	/// expectation is that they come from the outer scope, from the wrapper
244	/// functions. If we want to support constructing CallAnalyzer objects where
245	/// lambdas are provided inline at construction, or where the object needs to
246	/// otherwise survive past the scope of the provided functions, we need to
247	/// revisit the argument types.
248	class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
249	typedef InstVisitor<CallAnalyzer, bool> Base;
250	friend class InstVisitor<CallAnalyzer, bool>;
251
252	protected:
253	virtual ~CallAnalyzer() = default;
254	/// The TargetTransformInfo available for this compilation.
255	const TargetTransformInfo &TTI;
256
257	/// Getter for the cache of @llvm.assume intrinsics.
258	function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
259
260	/// Getter for BlockFrequencyInfo
261	function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
262
263	/// Getter for TargetLibraryInfo
264	function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
265
266	/// Profile summary information.
267	ProfileSummaryInfo *PSI;
268
269	/// The called function.
270	Function &F;
271
272	// Cache the DataLayout since we use it a lot.
273	const DataLayout &DL;
274
275	/// The OptimizationRemarkEmitter available for this compilation.
276	OptimizationRemarkEmitter *ORE;
277
278	/// The candidate callsite being analyzed. Please do not use this to do
279	/// analysis in the caller function; we want the inline cost query to be
280	/// easily cacheable. Instead, use the cover function paramHasAttr.
281	CallBase &CandidateCall;
282
283	/// Getter for the cache of ephemeral values.
284	function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache = nullptr;
285
286	/// Extension points for handling callsite features.
287	// Called before a basic block was analyzed.
288	virtual void onBlockStart(const BasicBlock *BB) {}
289
290	/// Called after a basic block was analyzed.
291	virtual void onBlockAnalyzed(const BasicBlock *BB) {}
292
293	/// Called before an instruction was analyzed
294	virtual void onInstructionAnalysisStart(const Instruction *I) {}
295
296	/// Called after an instruction was analyzed
297	virtual void onInstructionAnalysisFinish(const Instruction *I) {}
298
299	/// Called at the end of the analysis of the callsite. Return the outcome of
300	/// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
301	/// the reason it can't.
302	virtual InlineResult finalizeAnalysis() { return InlineResult::success(); }
303	/// Called when we're about to start processing a basic block, and every time
304	/// we are done processing an instruction. Return true if there is no point in
305	/// continuing the analysis (e.g. we've determined already the call site is
306	/// too expensive to inline)
307	virtual bool shouldStop() { return false; }
308
309	/// Called before the analysis of the callee body starts (with callsite
310	/// contexts propagated). It checks callsite-specific information. Return a
311	/// reason analysis can't continue if that's the case, or 'true' if it may
312	/// continue.
313	virtual InlineResult onAnalysisStart() { return InlineResult::success(); }
314	/// Called if the analysis engine decides SROA cannot be done for the given
315	/// alloca.
316	virtual void onDisableSROA(AllocaInst *Arg) {}
317
318	/// Called the analysis engine determines load elimination won't happen.
319	virtual void onDisableLoadElimination() {}
320
321	/// Called when we visit a CallBase, before the analysis starts. Return false
322	/// to stop further processing of the instruction.
323	virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
324
325	/// Called to account for a call.
326	virtual void onCallPenalty() {}
327
328	/// Called to account for a load or store.
329	virtual void onMemAccess(){};
330
331	/// Called to account for the expectation the inlining would result in a load
332	/// elimination.
333	virtual void onLoadEliminationOpportunity() {}
334
335	/// Called to account for the cost of argument setup for the Call in the
336	/// callee's body (not the callsite currently under analysis).
337	virtual void onCallArgumentSetup(const CallBase &Call) {}
338
339	/// Called to account for a load relative intrinsic.
340	virtual void onLoadRelativeIntrinsic() {}
341
342	/// Called to account for a lowered call.
343	virtual void onLoweredCall(Function F, CallBase &Call, bool* IsIndirectCall) {
344	}
345
346	/// Account for a jump table of given size. Return false to stop further
347	/// processing the switch instruction
348	virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
349
350	/// Account for a case cluster of given size. Return false to stop further
351	/// processing of the instruction.
352	virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
353
354	/// Called at the end of processing a switch instruction, with the given
355	/// number of case clusters.
356	virtual void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
357	bool DefaultDestUnreachable) {}
358
359	/// Called to account for any other instruction not specifically accounted
360	/// for.
361	virtual void onMissedSimplification() {}
362
363	/// Account for inline assembly instructions.
364	virtual void onInlineAsm(const InlineAsm &Arg) {}
365
366	/// Start accounting potential benefits due to SROA for the given alloca.
367	virtual void onInitializeSROAArg(AllocaInst *Arg) {}
368
369	/// Account SROA savings for the AllocaInst value.
370	virtual void onAggregateSROAUse(AllocaInst *V) {}
371
372	bool handleSROA(Value V, bool* DoNotDisable) {
373	// Check for SROA candidates in comparisons.
374	if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
375	if (DoNotDisable) {
376	onAggregateSROAUse(V: SROAArg);
377	return true;
378	}
379	disableSROAForArg(SROAArg);
380	}
381	return false;
382	}
383
384	bool IsCallerRecursive = false;
385	bool IsRecursiveCall = false;
386	bool ExposesReturnsTwice = false;
387	bool HasDynamicAlloca = false;
388	bool ContainsNoDuplicateCall = false;
389	bool HasReturn = false;
390	bool HasIndirectBr = false;
391	bool HasUninlineableIntrinsic = false;
392	bool InitsVargArgs = false;
393
394	/// Number of bytes allocated statically by the callee.
395	uint64_t AllocatedSize = `0`;
396	unsigned NumInstructions = `0`;
397	unsigned NumInlineAsmInstructions = `0`;
398	unsigned NumVectorInstructions = `0`;
399
400	/// While we walk the potentially-inlined instructions, we build up and
401	/// maintain a mapping of simplified values specific to this callsite. The
402	/// idea is to propagate any special information we have about arguments to
403	/// this call through the inlinable section of the function, and account for
404	/// likely simplifications post-inlining. The most important aspect we track
405	/// is CFG altering simplifications -- when we prove a basic block dead, that
406	/// can cause dramatic shifts in the cost of inlining a function.
407	/// Note: The simplified Value may be owned by the caller function.
408	DenseMap<Value , Value > SimplifiedValues;
409
410	/// Keep track of the values which map back (through function arguments) to
411	/// allocas on the caller stack which could be simplified through SROA.
412	DenseMap<Value , AllocaInst > SROAArgValues;
413
414	/// Keep track of Allocas for which we believe we may get SROA optimization.
415	DenseSet<AllocaInst *> EnabledSROAAllocas;
416
417	/// Keep track of values which map to a pointer base and constant offset.
418	DenseMap<Value , std::pair<Value , APInt>> ConstantOffsetPtrs;
419
420	/// Keep track of dead blocks due to the constant arguments.
421	SmallPtrSet<BasicBlock *, `16`> DeadBlocks;
422
423	/// The mapping of the blocks to their known unique successors due to the
424	/// constant arguments.
425	DenseMap<BasicBlock , BasicBlock > KnownSuccessors;
426
427	/// Model the elimination of repeated loads that is expected to happen
428	/// whenever we simplify away the stores that would otherwise cause them to be
429	/// loads.
430	bool EnableLoadElimination = true;
431
432	/// Whether we allow inlining for recursive call.
433	bool AllowRecursiveCall = false;
434
435	SmallPtrSet<Value *, `16`> LoadAddrSet;
436
437	AllocaInst getSROAArgForValueOrNull(Value V) const {
438	auto It = SROAArgValues.find(Val: V);
439	if (It == SROAArgValues.end() \|\| EnabledSROAAllocas.count(V: It ->second) == `0`)
440	return nullptr;
441	return It ->second;
442	}
443
444	/// Use a value in its given form directly if possible, otherwise try looking
445	/// for it in SimplifiedValues.
446	template <typename T> T getDirectOrSimplifiedValue(Value V) const {
447	if (auto *Direct = dyn_cast<T>(V))
448	return Direct;
449	return getSimplifiedValue<T>(V);
450	}
451
452	// Custom simplification helper routines.
453	bool isAllocaDerivedArg(Value *V);
454	void disableSROAForArg(AllocaInst *SROAArg);
455	void disableSROA(Value *V);
456	void findDeadBlocks(BasicBlock CurrBB, BasicBlock NextBB);
457	void disableLoadElimination();
458	bool isGEPFree(GetElementPtrInst &GEP);
459	bool canFoldInboundsGEP(GetElementPtrInst &I);
460	bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
461	bool simplifyCallSite(Function *F, CallBase &Call);
462	bool simplifyCmpInstForRecCall(CmpInst &Cmp);
463	bool simplifyInstruction(Instruction &I);
464	bool simplifyIntrinsicCallIsConstant(CallBase &CB);
465	bool simplifyIntrinsicCallObjectSize(CallBase &CB);
466	ConstantInt stripAndComputeInBoundsConstantOffsets(Value &V);
467	bool isLoweredToCall(Function *F, CallBase &Call);
468
469	/// Return true if the given argument to the function being considered for
470	/// inlining has the given attribute set either at the call site or the
471	/// function declaration. Primarily used to inspect call site specific
472	/// attributes since these can be more precise than the ones on the callee
473	/// itself.
474	bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
475
476	/// Return true if the given value is known non null within the callee if
477	/// inlined through this particular callsite.
478	bool isKnownNonNullInCallee(Value *V);
479
480	/// Return true if size growth is allowed when inlining the callee at \p Call.
481	bool allowSizeGrowth(CallBase &Call);
482
483	// Custom analysis routines.
484	InlineResult analyzeBlock(BasicBlock *BB,
485	const SmallPtrSetImpl<const Value *> &EphValues);
486
487	// Disable several entry points to the visitor so we don't accidentally use
488	// them by declaring but not defining them here.
489	void visit(Module *);
490	void visit(Module &);
491	void visit(Function *);
492	void visit(Function &);
493	void visit(BasicBlock *);
494	void visit(BasicBlock &);
495
496	// Provide base case for our instruction visit.
497	bool visitInstruction(Instruction &I);
498
499	// Our visit overrides.
500	bool visitAlloca(AllocaInst &I);
501	bool visitPHI(PHINode &I);
502	bool visitGetElementPtr(GetElementPtrInst &I);
503	bool visitBitCast(BitCastInst &I);
504	bool visitPtrToInt(PtrToIntInst &I);
505	bool visitIntToPtr(IntToPtrInst &I);
506	bool visitCastInst(CastInst &I);
507	bool visitCmpInst(CmpInst &I);
508	bool visitSub(BinaryOperator &I);
509	bool visitBinaryOperator(BinaryOperator &I);
510	bool visitFNeg(UnaryOperator &I);
511	bool visitLoad(LoadInst &I);
512	bool visitStore(StoreInst &I);
513	bool visitExtractValue(ExtractValueInst &I);
514	bool visitInsertValue(InsertValueInst &I);
515	bool visitCallBase(CallBase &Call);
516	bool visitReturnInst(ReturnInst &RI);
517	bool visitUncondBrInst(UncondBrInst &BI);
518	bool visitCondBrInst(CondBrInst &BI);
519	bool visitSelectInst(SelectInst &SI);
520	bool visitSwitchInst(SwitchInst &SI);
521	bool visitIndirectBrInst(IndirectBrInst &IBI);
522	bool visitResumeInst(ResumeInst &RI);
523	bool visitCleanupReturnInst(CleanupReturnInst &RI);
524	bool visitCatchReturnInst(CatchReturnInst &RI);
525	bool visitUnreachableInst(UnreachableInst &I);
526
527	public:
528	CallAnalyzer(
529	Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
530	function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
531	function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
532	function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
533	ProfileSummaryInfo PSI = nullptr*,
534	OptimizationRemarkEmitter ORE = nullptr*,
535	function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
536	nullptr)
537	: TTI(TTI), GetAssumptionCache (GetAssumptionCache), GetBFI (GetBFI),
538	GetTLI (GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
539	CandidateCall(Call), GetEphValuesCache (GetEphValuesCache) {}
540
541	InlineResult analyze();
542
543	/// Lookup simplified Value. May return a value owned by the caller.
544	Value getSimplifiedValueUnchecked(Value V) const {
545	return SimplifiedValues.lookup(Val: V);
546	}
547
548	/// Lookup simplified Value, but return nullptr if the simplified value is
549	/// owned by the caller.
550	template <typename T> T getSimplifiedValue(Value V) const {
551	Value *SimpleV = SimplifiedValues.lookup(Val: V);
552	if (!SimpleV)
553	return nullptr;
554
555	// Skip checks if we know T is a global. This has a small, but measurable
556	// impact on compile-time.
557	if constexpr (std::is_base_of_v<Constant, T>)
558	return dyn_cast<T>(SimpleV);
559
560	// Make sure the simplified Value is owned by this function
561	if (auto *I = dyn_cast<Instruction>(Val: SimpleV)) {
562	if (I->getFunction() != &F)
563	return nullptr;
564	} else if (auto *Arg = dyn_cast<Argument>(Val: SimpleV)) {
565	if (Arg->getParent() != &F)
566	return nullptr;
567	} else if (!isa<Constant>(Val: SimpleV))
568	return nullptr;
569	return dyn_cast<T>(SimpleV);
570	}
571
572	// Keep a bunch of stats about the cost savings found so we can print them
573	// out when debugging.
574	unsigned NumConstantArgs = `0`;
575	unsigned NumConstantOffsetPtrArgs = `0`;
576	unsigned NumAllocaArgs = `0`;
577	unsigned NumConstantPtrCmps = `0`;
578	unsigned NumConstantPtrDiffs = `0`;
579	unsigned NumInstructionsSimplified = `0`;
580
581	void dump();
582	};
583
584	// Considering forming a binary search, we should find the number of nodes
585	// which is same as the number of comparisons when lowered. For a given
586	// number of clusters, n, we can define a recursive function, f(n), to find
587	// the number of nodes in the tree. The recursion is :
588	// f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
589	// and f(n) = n, when n <= 3.
590	// This will lead a binary tree where the leaf should be either f(2) or f(3)
591	// when n > 3. So, the number of comparisons from leaves should be n, while
592	// the number of non-leaf should be :
593	// 2^(log2(n) - 1) - 1
594	// = 2^log2(n) 2^-1 - 1*
595	// = n / 2 - 1.
596	// Considering comparisons from leaf and non-leaf nodes, we can estimate the
597	// number of comparisons in a simple closed form :
598	// n + n / 2 - 1 = n 3 / 2 - 1*
599	int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
600	return `3` * static_cast<int64_t>(NumCaseCluster) / `2` - `1`;
601	}
602
603	/// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
604	/// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
605	class InlineCostCallAnalyzer final : public CallAnalyzer {
606	const bool ComputeFullInlineCost;
607	int LoadEliminationCost = `0`;
608	/// Bonus to be applied when percentage of vector instructions in callee is
609	/// high (see more details in updateThreshold).
610	int VectorBonus = `0`;
611	/// Bonus to be applied when the callee has only one reachable basic block.
612	int SingleBBBonus = `0`;
613
614	/// Tunable parameters that control the analysis.
615	const InlineParams &Params;
616
617	// This DenseMap stores the delta change in cost and threshold after
618	// accounting for the given instruction. The map is filled only with the
619	// flag PrintInstructionComments on.
620	DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap;
621
622	/// Upper bound for the inlining cost. Bonuses are being applied to account
623	/// for speculative "expected profit" of the inlining decision.
624	int Threshold = `0`;
625
626	/// The amount of StaticBonus applied.
627	int StaticBonusApplied = `0`;
628
629	/// Attempt to evaluate indirect calls to boost its inline cost.
630	const bool BoostIndirectCalls;
631
632	/// Ignore the threshold when finalizing analysis.
633	const bool IgnoreThreshold;
634
635	// True if the cost-benefit-analysis-based inliner is enabled.
636	const bool CostBenefitAnalysisEnabled;
637
638	/// Inlining cost measured in abstract units, accounts for all the
639	/// instructions expected to be executed for a given function invocation.
640	/// Instructions that are statically proven to be dead based on call-site
641	/// arguments are not counted here.
642	int Cost = `0`;
643
644	// The cumulative cost at the beginning of the basic block being analyzed. At
645	// the end of analyzing each basic block, "Cost - CostAtBBStart" represents
646	// the size of that basic block.
647	int CostAtBBStart = `0`;
648
649	// The static size of live but cold basic blocks. This is "static" in the
650	// sense that it's not weighted by profile counts at all.
651	int ColdSize = `0`;
652
653	// Whether inlining is decided by cost-threshold analysis.
654	bool DecidedByCostThreshold = false;
655
656	// Whether inlining is decided by cost-benefit analysis.
657	bool DecidedByCostBenefit = false;
658
659	// The cost-benefit pair computed by cost-benefit analysis.
660	std::optional<CostBenefitPair> CostBenefit;
661
662	bool SingleBB = true;
663
664	unsigned SROACostSavings = `0`;
665	unsigned SROACostSavingsLost = `0`;
666
667	/// The mapping of caller Alloca values to their accumulated cost savings. If
668	/// we have to disable SROA for one of the allocas, this tells us how much
669	/// cost must be added.
670	DenseMap<AllocaInst , int*> SROAArgCosts;
671
672	/// Return true if \p Call is a cold callsite.
673	bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
674
675	/// Update Threshold based on callsite properties such as callee
676	/// attributes and callee hotness for PGO builds. The Callee is explicitly
677	/// passed to support analyzing indirect calls whose target is inferred by
678	/// analysis.
679	void updateThreshold(CallBase &Call, Function &Callee);
680	/// Return a higher threshold if \p Call is a hot callsite.
681	std::optional<int> getHotCallSiteThreshold(CallBase &Call,
682	BlockFrequencyInfo *CallerBFI);
683
684	/// Handle a capped 'int' increment for Cost.
685	void addCost(int64_t Inc) {
686	Inc = std::clamp<int64_t>(val: Inc, INT_MIN, INT_MAX);
687	Cost = std::clamp<int64_t>(val: Inc + Cost, INT_MIN, INT_MAX);
688	}
689
690	void onDisableSROA(AllocaInst *Arg) override {
691	auto CostIt = SROAArgCosts.find(Val: Arg);
692	if (CostIt == SROAArgCosts.end())
693	return;
694	addCost(Inc: CostIt ->second);
695	SROACostSavings -= CostIt ->second;
696	SROACostSavingsLost += CostIt ->second;
697	SROAArgCosts.erase(I: CostIt);
698	}
699
700	void onDisableLoadElimination() override {
701	addCost(Inc: LoadEliminationCost);
702	LoadEliminationCost = `0`;
703	}
704
705	bool onCallBaseVisitStart(CallBase &Call) override {
706	if (std::optional<int> AttrCallThresholdBonus =
707	getStringFnAttrAsInt(CB&: Call, AttrKind: "call-threshold-bonus"))
708	Threshold += *AttrCallThresholdBonus;
709
710	if (std::optional<int> AttrCallCost =
711	getStringFnAttrAsInt(CB&: Call, AttrKind: "call-inline-cost")) {
712	addCost(Inc: *AttrCallCost);
713	// Prevent further processing of the call since we want to override its
714	// inline cost, not just add to it.
715	return false;
716	}
717	return true;
718	}
719
720	void onCallPenalty() override { addCost(Inc: CallPenalty); }
721
722	void onMemAccess() override { addCost(Inc: MemAccessCost); }
723
724	void onCallArgumentSetup(const CallBase &Call) override {
725	// Pay the price of the argument setup. We account for the average 1
726	// instruction per call argument setup here.
727	addCost(Inc: Call.arg_size() * InstrCost);
728	}
729	void onLoadRelativeIntrinsic() override {
730	// This is normally lowered to 4 LLVM instructions.
731	addCost(Inc: `3` * InstrCost);
732	}
733	void onLoweredCall(Function *F, CallBase &Call,
734	bool IsIndirectCall) override {
735	// We account for the average 1 instruction per call argument setup here.
736	addCost(Inc: Call.arg_size() * InstrCost);
737
738	// If we have a constant that we are calling as a function, we can peer
739	// through it and see the function target. This happens not infrequently
740	// during devirtualization and so we want to give it a hefty bonus for
741	// inlining, but cap that bonus in the event that inlining wouldn't pan out.
742	// Pretend to inline the function, with a custom threshold.
743	if (IsIndirectCall && BoostIndirectCalls) {
744	auto IndirectCallParams = Params;
745	IndirectCallParams.DefaultThreshold =
746	InlineConstants::IndirectCallThreshold;
747	/// FIXME: if InlineCostCallAnalyzer is derived from, this may need
748	/// to instantiate the derived class.
749	InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
750	GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
751	false);
752	if (CA.analyze().isSuccess()) {
753	// We were able to inline the indirect call! Subtract the cost from the
754	// threshold to get the bonus we want to apply, but don't go below zero.
755	addCost(Inc: -std::max(a: `0`, b: CA.getThreshold() - CA.getCost()));
756	}
757	} else
758	// Otherwise simply add the cost for merely making the call.
759	addCost(Inc: TTI.getInlineCallPenalty(F: CandidateCall.getCaller(), Call,
760	DefaultCallPenalty: CallPenalty));
761	}
762
763	void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
764	bool DefaultDestUnreachable) override {
765	// If suitable for a jump table, consider the cost for the table size and
766	// branch to destination.
767	// Maximum valid cost increased in this function.
768	if (JumpTableSize) {
769	// Suppose a default branch includes one compare and one conditional
770	// branch if it's reachable.
771	if (!DefaultDestUnreachable)
772	addCost(Inc: `2` * InstrCost);
773	// Suppose a jump table requires one load and one jump instruction.
774	int64_t JTCost =
775	static_cast<int64_t>(JumpTableSize) * InstrCost + `2` * InstrCost;
776	addCost(Inc: JTCost);
777	return;
778	}
779
780	if (NumCaseCluster <= `3`) {
781	// Suppose a comparison includes one compare and one conditional branch.
782	// We can reduce a set of instructions if the default branch is
783	// undefined.
784	addCost(Inc: (NumCaseCluster - DefaultDestUnreachable) * `2` * InstrCost);
785	return;
786	}
787
788	int64_t ExpectedNumberOfCompare =
789	getExpectedNumberOfCompare(NumCaseCluster);
790	int64_t SwitchCost = ExpectedNumberOfCompare * `2` * InstrCost;
791
792	addCost(Inc: SwitchCost);
793	}
794
795	// Parses the inline assembly argument to account for its cost. Inline
796	// assembly instructions incur higher costs for inlining since they cannot be
797	// analyzed and optimized.
798	void onInlineAsm(const InlineAsm &Arg) override {
799	if (!InlineAsmInstrCost)
800	return;
801	SmallVector<StringRef, `4`> AsmStrs;
802	Arg.collectAsmStrs(AsmStrs);
803	int SectionLevel = `0`;
804	int InlineAsmInstrCount = `0`;
805	for (StringRef AsmStr : AsmStrs) {
806	// Trim whitespaces and comments.
807	StringRef Trimmed = AsmStr.trim();
808	size_t hashPos = Trimmed.find(C: `'#'`);
809	if (hashPos != StringRef::npos)
810	Trimmed = Trimmed.substr(Start: `0`, N: hashPos);
811	// Ignore comments.
812	if (Trimmed.empty())
813	continue;
814	// Filter out the outlined assembly instructions from the cost by keeping
815	// track of the section level and only accounting for instrutions at
816	// section level of zero. Note there will be duplication in outlined
817	// sections too, but is not accounted in the inlining cost model.
818	if (Trimmed.starts_with(Prefix: ".pushsection")) {
819	++SectionLevel;
820	continue;
821	}
822	if (Trimmed.starts_with(Prefix: ".popsection")) {
823	--SectionLevel;
824	continue;
825	}
826	// Ignore directives and labels.
827	if (Trimmed.starts_with(Prefix: ".") \|\| Trimmed.contains(Other: ":"))
828	continue;
829	if (SectionLevel == `0`)
830	++InlineAsmInstrCount;
831	}
832	NumInlineAsmInstructions += InlineAsmInstrCount;
833	addCost(Inc: InlineAsmInstrCount * InlineAsmInstrCost);
834	}
835
836	void onMissedSimplification() override { addCost(Inc: InstrCost); }
837
838	void onInitializeSROAArg(AllocaInst *Arg) override {
839	assert(Arg != nullptr &&
840	"Should not initialize SROA costs for null value.");
841	auto SROAArgCost = TTI.getCallerAllocaCost(CB: &CandidateCall, AI: Arg);
842	SROACostSavings += SROAArgCost;
843	SROAArgCosts [Arg] = SROAArgCost;
844	}
845
846	void onAggregateSROAUse(AllocaInst *SROAArg) override {
847	auto CostIt = SROAArgCosts.find(Val: SROAArg);
848	assert(CostIt != SROAArgCosts.end() &&
849	"expected this argument to have a cost");
850	CostIt ->second += InstrCost;
851	SROACostSavings += InstrCost;
852	}
853
854	void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
855
856	void onBlockAnalyzed(const BasicBlock *BB) override {
857	if (CostBenefitAnalysisEnabled) {
858	// Keep track of the static size of live but cold basic blocks. For now,
859	// we define a cold basic block to be one that's never executed.
860	assert(GetBFI && "GetBFI must be available");
861	BlockFrequencyInfo *BFI = &(GetBFI (F));
862	assert(BFI && "BFI must be available");
863	auto ProfileCount = BFI->getBlockProfileCount(BB);
864	if (*ProfileCount == `0`)
865	ColdSize += Cost - CostAtBBStart;
866	}
867
868	auto *TI = BB->getTerminator();
869	// If we had any successors at this point, than post-inlining is likely to
870	// have them as well. Note that we assume any basic blocks which existed
871	// due to branches or switches which folded above will also fold after
872	// inlining.
873	if (SingleBB && TI->getNumSuccessors() > `1`) {
874	// Take off the bonus we applied to the threshold.
875	Threshold -= SingleBBBonus;
876	SingleBB = false;
877	}
878	}
879
880	void onInstructionAnalysisStart(const Instruction *I) override {
881	// This function is called to store the initial cost of inlining before
882	// the given instruction was assessed.
883	if (!PrintInstructionComments)
884	return;
885	auto &CostDetail = InstructionCostDetailMap [I];
886	CostDetail.CostBefore = Cost;
887	CostDetail.ThresholdBefore = Threshold;
888	}
889
890	void onInstructionAnalysisFinish(const Instruction *I) override {
891	// This function is called to find new values of cost and threshold after
892	// the instruction has been assessed.
893	if (!PrintInstructionComments)
894	return;
895	auto &CostDetail = InstructionCostDetailMap [I];
896	CostDetail.CostAfter = Cost;
897	CostDetail.ThresholdAfter = Threshold;
898	}
899
900	bool isCostBenefitAnalysisEnabled() {
901	if (!PSI \|\| !PSI->hasProfileSummary())
902	return false;
903
904	if (!GetBFI)
905	return false;
906
907	if (InlineEnableCostBenefitAnalysis.getNumOccurrences()) {
908	// Honor the explicit request from the user.
909	if (!InlineEnableCostBenefitAnalysis)
910	return false;
911	} else {
912	// Otherwise, require instrumentation profile.
913	if (!PSI->hasInstrumentationProfile())
914	return false;
915	}
916
917	auto *Caller = CandidateCall.getParent()->getParent();
918	if (!Caller->getEntryCount())
919	return false;
920
921	BlockFrequencyInfo CallerBFI = &(GetBFI (Caller));
922	if (!CallerBFI)
923	return false;
924
925	// For now, limit to hot call site.
926	if (!PSI->isHotCallSite(CB: CandidateCall, BFI: CallerBFI))
927	return false;
928
929	// Make sure we have a nonzero entry count.
930	auto EntryCount = F.getEntryCount();
931	if (!EntryCount \|\| *EntryCount == `0`)
932	return false;
933
934	BlockFrequencyInfo *CalleeBFI = &(GetBFI (F));
935	if (!CalleeBFI)
936	return false;
937
938	return true;
939	}
940
941	// A helper function to choose between command line override and default.
942	unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const {
943	if (InlineSavingsMultiplier.getNumOccurrences())
944	return InlineSavingsMultiplier;
945	return TTI.getInliningCostBenefitAnalysisSavingsMultiplier();
946	}
947
948	// A helper function to choose between command line override and default.
949	unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
950	if (InlineSavingsProfitableMultiplier.getNumOccurrences())
951	return InlineSavingsProfitableMultiplier;
952	return TTI.getInliningCostBenefitAnalysisProfitableMultiplier();
953	}
954
955	void OverrideCycleSavingsAndSizeForTesting(APInt &CycleSavings, int &Size) {
956	if (std::optional<int> AttrCycleSavings = getStringFnAttrAsInt(
957	CB&: CandidateCall, AttrKind: "inline-cycle-savings-for-test")) {
958	CycleSavings = *AttrCycleSavings;
959	}
960
961	if (std::optional<int> AttrRuntimeCost = getStringFnAttrAsInt(
962	CB&: CandidateCall, AttrKind: "inline-runtime-cost-for-test")) {
963	Size = *AttrRuntimeCost;
964	}
965	}
966
967	// Determine whether we should inline the given call site, taking into account
968	// both the size cost and the cycle savings. Return std::nullopt if we don't
969	// have sufficient profiling information to determine.
970	std::optional<bool> costBenefitAnalysis() {
971	if (!CostBenefitAnalysisEnabled)
972	return std::nullopt;
973
974	// buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0
975	// for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by
976	// falling back to the cost-based metric.
977	// TODO: Improve this hacky condition.
978	if (Threshold == `0`)
979	return std::nullopt;
980
981	assert(GetBFI);
982	BlockFrequencyInfo *CalleeBFI = &(GetBFI (F));
983	assert(CalleeBFI);
984
985	// The cycle savings expressed as the sum of InstrCost
986	// multiplied by the estimated dynamic count of each instruction we can
987	// avoid. Savings come from the call site cost, such as argument setup and
988	// the call instruction, as well as the instructions that are folded.
989	//
990	// We use 128-bit APInt here to avoid potential overflow. This variable
991	// should stay well below 10^^24 (or 2^^80) in practice. This "worst" case
992	// assumes that we can avoid or fold a billion instructions, each with a
993	// profile count of 10^^15 -- roughly the number of cycles for a 24-hour
994	// period on a 4GHz machine.
995	APInt CycleSavings(`128`, `0`);
996
997	for (auto &BB : F) {
998	APInt CurrentSavings(`128`, `0`);
999	for (auto &I : BB) {
1000	if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: &I)) {
1001	// Count a conditional branch as savings if it becomes unconditional.
1002	if (getSimplifiedValue<ConstantInt>(V: BI->getCondition()))
1003	CurrentSavings += InstrCost;
1004	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: &I)) {
1005	if (getSimplifiedValue<ConstantInt>(V: SI->getCondition()))
1006	CurrentSavings += InstrCost;
1007	} else if (SimplifiedValues.count(Val: &I)) {
1008	// Count an instruction as savings if we can fold it.
1009	CurrentSavings += InstrCost;
1010	}
1011	}
1012
1013	auto ProfileCount = CalleeBFI->getBlockProfileCount(BB: &BB);
1014	CurrentSavings = ProfileCount;
1015	CycleSavings += CurrentSavings;
1016	}
1017
1018	// Compute the cycle savings per call.
1019	auto EntryProfileCount = F.getEntryCount();
1020	assert(EntryProfileCount && *EntryProfileCount);
1021	CycleSavings += *EntryProfileCount / `2`;
1022	CycleSavings = CycleSavings.udiv(RHS: *EntryProfileCount);
1023
1024	// Compute the total savings for the call site.
1025	auto *CallerBB = CandidateCall.getParent();
1026	BlockFrequencyInfo CallerBFI = &(GetBFI ((CallerBB->getParent())));
1027	CycleSavings += getCallsiteCost(TTI, Call: this->CandidateCall, DL);
1028	CycleSavings = CallerBFI->getBlockProfileCount(BB: CallerBB);
1029
1030	// Remove the cost of the cold basic blocks to model the runtime cost more
1031	// accurately. Both machine block placement and function splitting could
1032	// place cold blocks further from hot blocks.
1033	int Size = Cost - ColdSize;
1034
1035	// Allow tiny callees to be inlined regardless of whether they meet the
1036	// savings threshold.
1037	Size = Size > InlineSizeAllowance ? Size - InlineSizeAllowance : `1`;
1038
1039	OverrideCycleSavingsAndSizeForTesting(CycleSavings, Size);
1040	CostBenefit.emplace(args: APInt (`128`, Size), args&: CycleSavings);
1041
1042	// Let R be the ratio of CycleSavings to Size. We accept the inlining
1043	// opportunity if R is really high and reject if R is really low. If R is
1044	// somewhere in the middle, we fall back to the cost-based analysis.
1045	//
1046	// Specifically, let R = CycleSavings / Size, we accept the inlining
1047	// opportunity if:
1048	//
1049	// PSI->getOrCompHotCountThreshold()
1050	// R > -------------------------------------------------
1051	// getInliningCostBenefitAnalysisSavingsMultiplier()
1052	//
1053	// and reject the inlining opportunity if:
1054	//
1055	// PSI->getOrCompHotCountThreshold()
1056	// R <= ----------------------------------------------------
1057	// getInliningCostBenefitAnalysisProfitableMultiplier()
1058	//
1059	// Otherwise, we fall back to the cost-based analysis.
1060	//
1061	// Implementation-wise, use multiplication (CycleSavings Multiplier,*
1062	// HotCountThreshold Size) rather than division to avoid precision loss.*
1063	APInt Threshold(`128`, PSI->getOrCompHotCountThreshold());
1064	Threshold *= Size;
1065
1066	APInt UpperBoundCycleSavings = CycleSavings;
1067	UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
1068	if (UpperBoundCycleSavings.uge(RHS: Threshold))
1069	return true;
1070
1071	APInt LowerBoundCycleSavings = CycleSavings;
1072	LowerBoundCycleSavings *=
1073	getInliningCostBenefitAnalysisProfitableMultiplier();
1074	if (LowerBoundCycleSavings.ult(RHS: Threshold))
1075	return false;
1076
1077	// Otherwise, fall back to the cost-based analysis.
1078	return std::nullopt;
1079	}
1080
1081	InlineResult finalizeAnalysis() override {
1082	// Loops generally act a lot like calls in that they act like barriers to
1083	// movement, require a certain amount of setup, etc. So when optimising for
1084	// size, we penalise any call sites that perform loops. We do this after all
1085	// other costs here, so will likely only be dealing with relatively small
1086	// functions (and hence DT and LI will hopefully be cheap).
1087	auto *Caller = CandidateCall.getFunction();
1088	if (Caller->hasMinSize()) {
1089	DominatorTree DT(F);
1090	LoopInfo LI(DT);
1091	int NumLoops = `0`;
1092	for (Loop *L : LI) {
1093	// Ignore loops that will not be executed
1094	if (DeadBlocks.count(Ptr: L->getHeader()))
1095	continue;
1096	NumLoops++;
1097	}
1098	addCost(Inc: NumLoops * InlineConstants::LoopPenalty);
1099	}
1100
1101	// We applied the maximum possible vector bonus at the beginning. Now,
1102	// subtract the excess bonus, if any, from the Threshold before
1103	// comparing against Cost.
1104	if (NumVectorInstructions <= NumInstructions / `10`)
1105	Threshold -= VectorBonus;
1106	else if (NumVectorInstructions <= NumInstructions / `2`)
1107	Threshold -= VectorBonus / `2`;
1108
1109	if (std::optional<int> AttrCost =
1110	getStringFnAttrAsInt(CB&: CandidateCall, AttrKind: "function-inline-cost"))
1111	Cost = *AttrCost;
1112
1113	if (std::optional<int> AttrCostMult = getStringFnAttrAsInt(
1114	CB&: CandidateCall,
1115	AttrKind: InlineConstants::FunctionInlineCostMultiplierAttributeName))
1116	Cost = AttrCostMult;
1117
1118	if (std::optional<int> AttrThreshold =
1119	getStringFnAttrAsInt(CB&: CandidateCall, AttrKind: "function-inline-threshold"))
1120	Threshold = *AttrThreshold;
1121
1122	if (auto Result = costBenefitAnalysis()) {
1123	DecidedByCostBenefit = true;
1124	if (*Result)
1125	return InlineResult::success();
1126	else
1127	return InlineResult::failure(Reason: "Cost over threshold.");
1128	}
1129
1130	if (IgnoreThreshold)
1131	return InlineResult::success();
1132
1133	DecidedByCostThreshold = true;
1134	return Cost < std::max(a: `1`, b: Threshold)
1135	? InlineResult::success()
1136	: InlineResult::failure(Reason: "Cost over threshold.");
1137	}
1138
1139	bool shouldStop() override {
1140	if (IgnoreThreshold \|\| ComputeFullInlineCost)
1141	return false;
1142	// Bail out the moment we cross the threshold. This means we'll under-count
1143	// the cost, but only when undercounting doesn't matter.
1144	if (Cost < Threshold)
1145	return false;
1146	DecidedByCostThreshold = true;
1147	return true;
1148	}
1149
1150	void onLoadEliminationOpportunity() override {
1151	LoadEliminationCost += InstrCost;
1152	}
1153
1154	InlineResult onAnalysisStart() override {
1155	// Perform some tweaks to the cost and threshold based on the direct
1156	// callsite information.
1157
1158	// We want to more aggressively inline vector-dense kernels, so up the
1159	// threshold, and we'll lower it if the % of vector instructions gets too
1160	// low. Note that these bonuses are some what arbitrary and evolved over
1161	// time by accident as much as because they are principled bonuses.
1162	//
1163	// FIXME: It would be nice to remove all such bonuses. At least it would be
1164	// nice to base the bonus values on something more scientific.
1165	assert(NumInstructions == `0`);
1166	assert(NumVectorInstructions == `0`);
1167
1168	// Update the threshold based on callsite properties
1169	updateThreshold(Call&: CandidateCall, Callee&: F);
1170
1171	// While Threshold depends on commandline options that can take negative
1172	// values, we want to enforce the invariant that the computed threshold and
1173	// bonuses are non-negative.
1174	assert(Threshold >= `0`);
1175	assert(SingleBBBonus >= `0`);
1176	assert(VectorBonus >= `0`);
1177
1178	// Speculatively apply all possible bonuses to Threshold. If cost exceeds
1179	// this Threshold any time, and cost cannot decrease, we can stop processing
1180	// the rest of the function body.
1181	Threshold += (SingleBBBonus + VectorBonus);
1182
1183	// Give out bonuses for the callsite, as the instructions setting them up
1184	// will be gone after inlining.
1185	addCost(Inc: -getCallsiteCost(TTI, Call: this->CandidateCall, DL));
1186
1187	// If this function uses the coldcc calling convention, prefer not to inline
1188	// it.
1189	if (F.getCallingConv() == CallingConv::Cold)
1190	addCost(Inc: InlineConstants::ColdccPenalty);
1191
1192	LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n");
1193
1194	// Check if we're done. This can happen due to bonuses and penalties.
1195	if (Cost >= Threshold && !ComputeFullInlineCost)
1196	return InlineResult::failure(Reason: "high cost");
1197
1198	return InlineResult::success();
1199	}
1200
1201	public:
1202	InlineCostCallAnalyzer(
1203	Function &Callee, CallBase &Call, const InlineParams &Params,
1204	const TargetTransformInfo &TTI,
1205	function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
1206	function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
1207	function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
1208	ProfileSummaryInfo PSI = nullptr*,
1209	OptimizationRemarkEmitter ORE = nullptr, bool* BoostIndirect = true,
1210	bool IgnoreThreshold = false,
1211	function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
1212	nullptr)
1213	: CallAnalyzer (Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1214	ORE, GetEphValuesCache),
1215	ComputeFullInlineCost(OptComputeFullInlineCost \|\|
1216	Params.ComputeFullInlineCost \|\| ORE \|\|
1217	isCostBenefitAnalysisEnabled()),
1218	Params(Params), Threshold(Params.DefaultThreshold),
1219	BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1220	CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1221	Writer (this) {
1222	AllowRecursiveCall = *Params.AllowRecursiveCall;
1223	}
1224
1225	/// Annotation Writer for instruction details
1226	InlineCostAnnotationWriter Writer;
1227
1228	void dump();
1229
1230	// Prints the same analysis as dump(), but its definition is not dependent
1231	// on the build.
1232	void print(raw_ostream &OS);
1233
1234	std::optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
1235	auto It = InstructionCostDetailMap.find(Val: I);
1236	if (It != InstructionCostDetailMap.end())
1237	return It ->second;
1238	return std::nullopt;
1239	}
1240
1241	~InlineCostCallAnalyzer() override = default;
1242	int getThreshold() const { return Threshold; }
1243	int getCost() const { return Cost; }
1244	int getStaticBonusApplied() const { return StaticBonusApplied; }
1245	std::optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
1246	bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
1247	bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
1248	};
1249
1250	// Return true if CB is the sole call to local function Callee.
1251	static bool isSoleCallToLocalFunction(const CallBase &CB,
1252	const Function &Callee) {
1253	return Callee.hasLocalLinkage() && Callee.hasOneLiveUse() &&
1254	&Callee == CB.getCalledFunction();
1255	}
1256
1257	class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
1258	private:
1259	InlineCostFeatures Cost = {};
1260
1261	// FIXME: These constants are taken from the heuristic-based cost visitor.
1262	// These should be removed entirely in a later revision to avoid reliance on
1263	// heuristics in the ML inliner.
1264	static constexpr int JTCostMultiplier = `2`;
1265	static constexpr int CaseClusterCostMultiplier = `2`;
1266	static constexpr int SwitchDefaultDestCostMultiplier = `2`;
1267	static constexpr int SwitchCostMultiplier = `2`;
1268
1269	// FIXME: These are taken from the heuristic-based cost visitor: we should
1270	// eventually abstract these to the CallAnalyzer to avoid duplication.
1271	unsigned SROACostSavingOpportunities = `0`;
1272	int VectorBonus = `0`;
1273	int SingleBBBonus = `0`;
1274	int Threshold = `5`;
1275
1276	DenseMap<AllocaInst , unsigned*> SROACosts;
1277
1278	void increment(InlineCostFeatureIndex Feature, int64_t Delta = `1`) {
1279	Cost [static_cast<size_t>(Feature)] += Delta;
1280	}
1281
1282	void set(InlineCostFeatureIndex Feature, int64_t Value) {
1283	Cost [static_cast<size_t>(Feature)] = Value;
1284	}
1285
1286	void onDisableSROA(AllocaInst *Arg) override {
1287	auto CostIt = SROACosts.find(Val: Arg);
1288	if (CostIt == SROACosts.end())
1289	return;
1290
1291	increment(Feature: InlineCostFeatureIndex::sroa_losses, Delta: CostIt ->second);
1292	SROACostSavingOpportunities -= CostIt ->second;
1293	SROACosts.erase(I: CostIt);
1294	}
1295
1296	void onDisableLoadElimination() override {
1297	set(Feature: InlineCostFeatureIndex::load_elimination, Value: `1`);
1298	}
1299
1300	void onCallPenalty() override {
1301	increment(Feature: InlineCostFeatureIndex::call_penalty, Delta: CallPenalty);
1302	}
1303
1304	void onCallArgumentSetup(const CallBase &Call) override {
1305	increment(Feature: InlineCostFeatureIndex::call_argument_setup,
1306	Delta: Call.arg_size() * InstrCost);
1307	}
1308
1309	void onLoadRelativeIntrinsic() override {
1310	increment(Feature: InlineCostFeatureIndex::load_relative_intrinsic, Delta: `3` * InstrCost);
1311	}
1312
1313	void onLoweredCall(Function *F, CallBase &Call,
1314	bool IsIndirectCall) override {
1315	increment(Feature: InlineCostFeatureIndex::lowered_call_arg_setup,
1316	Delta: Call.arg_size() * InstrCost);
1317
1318	if (IsIndirectCall) {
1319	InlineParams IndirectCallParams = {/ DefaultThreshold/ `0`,
1320	/HintThreshold/ {},
1321	/OptSizeHintThreshold/ {},
1322	/ColdThreshold/ {},
1323	/OptSizeThreshold/ {},
1324	/OptMinSizeThreshold/ {},
1325	/HotCallSiteThreshold/ {},
1326	/LocallyHotCallSiteThreshold/ {},
1327	/ColdCallSiteThreshold/ {},
1328	/ComputeFullInlineCost/ true,
1329	/EnableDeferral/ true};
1330	IndirectCallParams.DefaultThreshold =
1331	InlineConstants::IndirectCallThreshold;
1332
1333	InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
1334	GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
1335	false, true);
1336	if (CA.analyze().isSuccess()) {
1337	increment(Feature: InlineCostFeatureIndex::nested_inline_cost_estimate,
1338	Delta: CA.getCost());
1339	increment(Feature: InlineCostFeatureIndex::nested_inlines, Delta: `1`);
1340	}
1341	} else {
1342	onCallPenalty();
1343	}
1344	}
1345
1346	void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
1347	bool DefaultDestUnreachable) override {
1348	if (JumpTableSize) {
1349	if (!DefaultDestUnreachable)
1350	increment(Feature: InlineCostFeatureIndex::switch_default_dest_penalty,
1351	Delta: SwitchDefaultDestCostMultiplier * InstrCost);
1352	int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +
1353	JTCostMultiplier * InstrCost;
1354	increment(Feature: InlineCostFeatureIndex::jump_table_penalty, Delta: JTCost);
1355	return;
1356	}
1357
1358	if (NumCaseCluster <= `3`) {
1359	increment(Feature: InlineCostFeatureIndex::case_cluster_penalty,
1360	Delta: (NumCaseCluster - DefaultDestUnreachable) *
1361	CaseClusterCostMultiplier * InstrCost);
1362	return;
1363	}
1364
1365	int64_t ExpectedNumberOfCompare =
1366	getExpectedNumberOfCompare(NumCaseCluster);
1367
1368	int64_t SwitchCost =
1369	ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost;
1370	increment(Feature: InlineCostFeatureIndex::switch_penalty, Delta: SwitchCost);
1371	}
1372
1373	void onMissedSimplification() override {
1374	increment(Feature: InlineCostFeatureIndex::unsimplified_common_instructions,
1375	Delta: InstrCost);
1376	}
1377
1378	void onInitializeSROAArg(AllocaInst *Arg) override {
1379	auto SROAArgCost = TTI.getCallerAllocaCost(CB: &CandidateCall, AI: Arg);
1380	SROACosts [Arg] = SROAArgCost;
1381	SROACostSavingOpportunities += SROAArgCost;
1382	}
1383
1384	void onAggregateSROAUse(AllocaInst *Arg) override {
1385	SROACosts.find(Val: Arg)->second += InstrCost;
1386	SROACostSavingOpportunities += InstrCost;
1387	}
1388
1389	void onBlockAnalyzed(const BasicBlock *BB) override {
1390	if (BB->getTerminator()->getNumSuccessors() > `1`)
1391	set(Feature: InlineCostFeatureIndex::is_multiple_blocks, Value: `1`);
1392	Threshold -= SingleBBBonus;
1393	}
1394
1395	InlineResult finalizeAnalysis() override {
1396	auto *Caller = CandidateCall.getFunction();
1397	if (Caller->hasMinSize()) {
1398	DominatorTree DT(F);
1399	LoopInfo LI(DT);
1400	for (Loop *L : LI) {
1401	// Ignore loops that will not be executed
1402	if (DeadBlocks.count(Ptr: L->getHeader()))
1403	continue;
1404	increment(Feature: InlineCostFeatureIndex::num_loops,
1405	Delta: InlineConstants::LoopPenalty);
1406	}
1407	}
1408	set(Feature: InlineCostFeatureIndex::dead_blocks, Value: DeadBlocks.size());
1409	set(Feature: InlineCostFeatureIndex::simplified_instructions,
1410	Value: NumInstructionsSimplified);
1411	set(Feature: InlineCostFeatureIndex::constant_args, Value: NumConstantArgs);
1412	set(Feature: InlineCostFeatureIndex::constant_offset_ptr_args,
1413	Value: NumConstantOffsetPtrArgs);
1414	set(Feature: InlineCostFeatureIndex::sroa_savings, Value: SROACostSavingOpportunities);
1415
1416	if (NumVectorInstructions <= NumInstructions / `10`)
1417	Threshold -= VectorBonus;
1418	else if (NumVectorInstructions <= NumInstructions / `2`)
1419	Threshold -= VectorBonus / `2`;
1420
1421	set(Feature: InlineCostFeatureIndex::threshold, Value: Threshold);
1422
1423	return InlineResult::success();
1424	}
1425
1426	bool shouldStop() override { return false; }
1427
1428	void onLoadEliminationOpportunity() override {
1429	increment(Feature: InlineCostFeatureIndex::load_elimination, Delta: `1`);
1430	}
1431
1432	InlineResult onAnalysisStart() override {
1433	increment(Feature: InlineCostFeatureIndex::callsite_cost,
1434	Delta: -`1` * getCallsiteCost(TTI, Call: this->CandidateCall, DL));
1435
1436	set(Feature: InlineCostFeatureIndex::cold_cc_penalty,
1437	Value: (F.getCallingConv() == CallingConv::Cold));
1438
1439	set(Feature: InlineCostFeatureIndex::last_call_to_static_bonus,
1440	Value: isSoleCallToLocalFunction(CB: CandidateCall, Callee: F));
1441
1442	// FIXME: we shouldn't repeat this logic in both the Features and Cost
1443	// analyzer - instead, we should abstract it to a common method in the
1444	// CallAnalyzer
1445	int SingleBBBonusPercent = `50`;
1446	int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1447	Threshold += TTI.adjustInliningThreshold(CB: &CandidateCall);
1448	Threshold *= TTI.getInliningThresholdMultiplier();
1449	SingleBBBonus = Threshold * SingleBBBonusPercent / `100`;
1450	VectorBonus = Threshold * VectorBonusPercent / `100`;
1451	Threshold += (SingleBBBonus + VectorBonus);
1452
1453	return InlineResult::success();
1454	}
1455
1456	public:
1457	InlineCostFeaturesAnalyzer(
1458	const TargetTransformInfo &TTI,
1459	function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1460	function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1461	function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
1462	ProfileSummaryInfo PSI, OptimizationRemarkEmitter ORE, Function &Callee,
1463	CallBase &Call)
1464	: CallAnalyzer (Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI,
1465	PSI) {}
1466
1467	const InlineCostFeatures &features() const { return Cost; }
1468	};
1469
1470	} // namespace
1471
1472	/// Test whether the given value is an Alloca-derived function argument.
1473	bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
1474	return SROAArgValues.count(Val: V);
1475	}
1476
1477	void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1478	onDisableSROA(Arg: SROAArg);
1479	EnabledSROAAllocas.erase(V: SROAArg);
1480	disableLoadElimination();
1481	}
1482
1483	void InlineCostAnnotationWriter::emitInstructionAnnot(
1484	const Instruction *I, formatted_raw_ostream &OS) {
1485	// The cost of inlining of the given instruction is printed always.
1486	// The threshold delta is printed only when it is non-zero. It happens
1487	// when we decided to give a bonus at a particular instruction.
1488	std::optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
1489	if (!Record)
1490	OS << "; No analysis for the instruction";
1491	else {
1492	OS << "; cost before = " << Record ->CostBefore
1493	<< ", cost after = " << Record ->CostAfter
1494	<< ", threshold before = " << Record ->ThresholdBefore
1495	<< ", threshold after = " << Record ->ThresholdAfter << ", ";
1496	OS << "cost delta = " << Record ->getCostDelta();
1497	if (Record ->hasThresholdChanged())
1498	OS << ", threshold delta = " << Record ->getThresholdDelta();
1499	}
1500	auto V = ICCA->getSimplifiedValueUnchecked(V: const_cast<Instruction >(I));
1501	if (V) {
1502	OS << ", simplified to ";
1503	V->print(O&: OS, IsForDebug: true);
1504	if (auto *VI = dyn_cast<Instruction>(Val: V)) {
1505	if (VI->getFunction() != I->getFunction())
1506	OS << " (caller instruction)";
1507	} else if (auto *VArg = dyn_cast<Argument>(Val: V)) {
1508	if (VArg->getParent() != I->getFunction())
1509	OS << " (caller argument)";
1510	}
1511	}
1512	OS << "\n";
1513	}
1514
1515	/// If 'V' maps to a SROA candidate, disable SROA for it.
1516	void CallAnalyzer::disableSROA(Value *V) {
1517	if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
1518	disableSROAForArg(SROAArg);
1519	}
1520	}
1521
1522	void CallAnalyzer::disableLoadElimination() {
1523	if (EnableLoadElimination) {
1524	onDisableLoadElimination();
1525	EnableLoadElimination = false;
1526	}
1527	}
1528
1529	/// Accumulate a constant GEP offset into an APInt if possible.
1530	///
1531	/// Returns false if unable to compute the offset for any reason. Respects any
1532	/// simplified values known during the analysis of this callsite.
1533	bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
1534	unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(Ty: GEP.getType());
1535	assert(IntPtrWidth == Offset.getBitWidth());
1536
1537	for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
1538	GTI != GTE; ++GTI) {
1539	ConstantInt *OpC =
1540	getDirectOrSimplifiedValue<ConstantInt>(V: GTI.getOperand());
1541	if (!OpC)
1542	return false;
1543	if (OpC->isZero())
1544	continue;
1545
1546	// Handle a struct index, which adds its field offset to the pointer.
1547	if (StructType *STy = GTI.getStructTypeOrNull()) {
1548	unsigned ElementIdx = OpC->getZExtValue();
1549	const StructLayout *SL = DL.getStructLayout(Ty: STy);
1550	Offset += APInt (IntPtrWidth, SL->getElementOffset(Idx: ElementIdx));
1551	continue;
1552	}
1553
1554	APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(DL));
1555	Offset += OpC->getValue().sextOrTrunc(width: IntPtrWidth) * TypeSize;
1556	}
1557	return true;
1558	}
1559
1560	/// Use TTI to check whether a GEP is free.
1561	///
1562	/// Respects any simplified values known during the analysis of this callsite.
1563	bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
1564	SmallVector<Value *, `4`> Operands;
1565	Operands.push_back(Elt: GEP.getOperand(i_nocapture: `0`));
1566	for (const Use &Op : GEP.indices())
1567	if (Constant *SimpleOp = getSimplifiedValue<Constant>(V: Op))
1568	Operands.push_back(Elt: SimpleOp);
1569	else
1570	Operands.push_back(Elt: Op);
1571	return TTI.getInstructionCost(U: &GEP, Operands,
1572	CostKind: TargetTransformInfo::TCK_SizeAndLatency) ==
1573	TargetTransformInfo::TCC_Free;
1574	}
1575
1576	bool CallAnalyzer::visitAlloca(AllocaInst &I) {
1577	disableSROA(V: I.getOperand(i_nocapture: `0`));
1578
1579	// Check whether inlining will turn a dynamic alloca into a static
1580	// alloca and handle that case.
1581	if (I.isArrayAllocation()) {
1582	Constant *Size = getSimplifiedValue<Constant>(V: I.getArraySize());
1583	if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Val: Size)) {
1584	// Sometimes a dynamic alloca could be converted into a static alloca
1585	// after this constant prop, and become a huge static alloca on an
1586	// unconditional CFG path. Avoid inlining if this is going to happen above
1587	// a threshold.
1588	// FIXME: If the threshold is removed or lowered too much, we could end up
1589	// being too pessimistic and prevent inlining non-problematic code. This
1590	// could result in unintended perf regressions. A better overall strategy
1591	// is needed to track stack usage during inlining.
1592	Type *Ty = I.getAllocatedType();
1593	AllocatedSize = SaturatingMultiplyAdd(
1594	X: AllocSize->getLimitedValue(),
1595	Y: DL.getTypeAllocSize(Ty).getKnownMinValue(), A: AllocatedSize);
1596	if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline)
1597	HasDynamicAlloca = true;
1598	return false;
1599	}
1600	}
1601
1602	if (I.isStaticAlloca()) {
1603	// Accumulate the allocated size if constant and executed once.
1604	// Note: if AllocSize is a vscale value, this is an underestimate of the
1605	// allocated size, and it also requires some of the cost of a dynamic
1606	// alloca, but is recorded here as a constant size alloca.
1607	TypeSize AllocSize = I.getAllocationSize(DL).value_or(u: TypeSize::getZero());
1608	AllocatedSize = SaturatingAdd(X: AllocSize.getKnownMinValue(), Y: AllocatedSize);
1609	} else {
1610	// FIXME: This is overly conservative. Dynamic allocas are inefficient for
1611	// a variety of reasons, and so we would like to not inline them into
1612	// functions which don't currently have a dynamic alloca. This simply
1613	// disables inlining altogether in the presence of a dynamic alloca.
1614	HasDynamicAlloca = true;
1615	}
1616
1617	return false;
1618	}
1619
1620	bool CallAnalyzer::visitPHI(PHINode &I) {
1621	// FIXME: We need to propagate SROA disabling* through phi nodes, even*
1622	// though we don't want to propagate it's bonuses. The idea is to disable
1623	// SROA if it might* be used in an inappropriate manner.*
1624
1625	// Phi nodes are always zero-cost.
1626	// FIXME: Pointer sizes may differ between different address spaces, so do we
1627	// need to use correct address space in the call to getPointerSizeInBits here?
1628	// Or could we skip the getPointerSizeInBits call completely? As far as I can
1629	// see the ZeroOffset is used as a dummy value, so we can probably use any
1630	// bit width for the ZeroOffset?
1631	APInt ZeroOffset = APInt::getZero(numBits: DL.getPointerSizeInBits(AS: `0`));
1632	bool CheckSROA = I.getType()->isPointerTy();
1633
1634	// Track the constant or pointer with constant offset we've seen so far.
1635	Constant FirstC = nullptr*;
1636	std::pair<Value , APInt> FirstBaseAndOffset = {nullptr*, ZeroOffset};
1637	Value FirstV = nullptr*;
1638
1639	for (unsigned i = `0`, e = I.getNumIncomingValues(); i != e; ++i) {
1640	BasicBlock *Pred = I.getIncomingBlock(i);
1641	// If the incoming block is dead, skip the incoming block.
1642	if (DeadBlocks.count(Ptr: Pred))
1643	continue;
1644	// If the parent block of phi is not the known successor of the incoming
1645	// block, skip the incoming block.
1646	BasicBlock *KnownSuccessor = KnownSuccessors [Pred];
1647	if (KnownSuccessor && KnownSuccessor != I.getParent())
1648	continue;
1649
1650	Value *V = I.getIncomingValue(i);
1651	// If the incoming value is this phi itself, skip the incoming value.
1652	if (&I == V)
1653	continue;
1654
1655	Constant *C = getDirectOrSimplifiedValue<Constant>(V);
1656
1657	std::pair<Value , APInt> BaseAndOffset = {nullptr*, ZeroOffset};
1658	if (!C && CheckSROA)
1659	BaseAndOffset = ConstantOffsetPtrs.lookup(Val: V);
1660
1661	if (!C && !BaseAndOffset.first)
1662	// The incoming value is neither a constant nor a pointer with constant
1663	// offset, exit early.
1664	return true;
1665
1666	if (FirstC) {
1667	if (FirstC == C)
1668	// If we've seen a constant incoming value before and it is the same
1669	// constant we see this time, continue checking the next incoming value.
1670	continue;
1671	// Otherwise early exit because we either see a different constant or saw
1672	// a constant before but we have a pointer with constant offset this time.
1673	return true;
1674	}
1675
1676	if (FirstV) {
1677	// The same logic as above, but check pointer with constant offset here.
1678	if (FirstBaseAndOffset == BaseAndOffset)
1679	continue;
1680	return true;
1681	}
1682
1683	if (C) {
1684	// This is the 1st time we've seen a constant, record it.
1685	FirstC = C;
1686	continue;
1687	}
1688
1689	// The remaining case is that this is the 1st time we've seen a pointer with
1690	// constant offset, record it.
1691	FirstV = V;
1692	FirstBaseAndOffset = BaseAndOffset;
1693	}
1694
1695	// Check if we can map phi to a constant.
1696	if (FirstC) {
1697	SimplifiedValues [&I] = FirstC;
1698	return true;
1699	}
1700
1701	// Check if we can map phi to a pointer with constant offset.
1702	if (FirstBaseAndOffset.first) {
1703	ConstantOffsetPtrs [&I] = std::move(FirstBaseAndOffset);
1704
1705	if (auto *SROAArg = getSROAArgForValueOrNull(V: FirstV))
1706	SROAArgValues [&I] = SROAArg;
1707	}
1708
1709	return true;
1710	}
1711
1712	/// Check we can fold GEPs of constant-offset call site argument pointers.
1713	/// This requires target data and inbounds GEPs.
1714	///
1715	/// \return true if the specified GEP can be folded.
1716	bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
1717	// Check if we have a base + offset for the pointer.
1718	std::pair<Value *, APInt> BaseAndOffset =
1719	ConstantOffsetPtrs.lookup(Val: I.getPointerOperand());
1720	if (!BaseAndOffset.first)
1721	return false;
1722
1723	// Check if the offset of this GEP is constant, and if so accumulate it
1724	// into Offset.
1725	if (!accumulateGEPOffset(GEP&: cast<GEPOperator>(Val&: I), Offset&: BaseAndOffset.second))
1726	return false;
1727
1728	// Add the result as a new mapping to Base + Offset.
1729	ConstantOffsetPtrs [&I] = std::move(BaseAndOffset);
1730
1731	return true;
1732	}
1733
1734	bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
1735	auto *SROAArg = getSROAArgForValueOrNull(V: I.getPointerOperand());
1736
1737	// Lambda to check whether a GEP's indices are all constant.
1738	auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
1739	for (const Use &Op : GEP.indices())
1740	if (!getDirectOrSimplifiedValue<Constant>(V: Op))
1741	return false;
1742	return true;
1743	};
1744
1745	if (!DisableGEPConstOperand)
1746	if (simplifyInstruction(I))
1747	return true;
1748
1749	if ((I.isInBounds() && canFoldInboundsGEP(I)) \|\| IsGEPOffsetConstant (I)) {
1750	if (SROAArg)
1751	SROAArgValues [&I] = SROAArg;
1752
1753	// Constant GEPs are modeled as free.
1754	return true;
1755	}
1756
1757	// Variable GEPs will require math and will disable SROA.
1758	if (SROAArg)
1759	disableSROAForArg(SROAArg);
1760	return isGEPFree(GEP&: I);
1761	}
1762
1763	// Simplify \p Cmp if RHS is const and we can ValueTrack LHS.
1764	// This handles the case only when the Cmp instruction is guarding a recursive
1765	// call that will cause the Cmp to fail/succeed for the recursive call.
1766	bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
1767	// Bail out if LHS is not a function argument or RHS is NOT const:
1768	if (!isa<Argument>(Val: Cmp.getOperand(i_nocapture: `0`)) \|\| !isa<Constant>(Val: Cmp.getOperand(i_nocapture: `1`)))
1769	return false;
1770	auto *CmpOp = Cmp.getOperand(i_nocapture: `0`);
1771	// Make sure that the callsite is recursive:
1772	if (CandidateCall.getCaller() != &F)
1773	return false;
1774	// Only handle the case when the callsite has a single predecessor:
1775	auto *CallBB = CandidateCall.getParent();
1776	auto *Predecessor = CallBB->getSinglePredecessor();
1777	if (!Predecessor)
1778	return false;
1779	// Check if the callsite is guarded by the same Cmp instruction:
1780	auto *Br = dyn_cast<CondBrInst>(Val: Predecessor->getTerminator());
1781	if (!Br \|\| Br->getCondition() != &Cmp)
1782	return false;
1783
1784	// Check if there is any arg of the recursive callsite is affecting the cmp
1785	// instr:
1786	bool ArgFound = false;
1787	Value FuncArg = nullptr, CallArg = nullptr;
1788	for (unsigned ArgNum = `0`;
1789	ArgNum < F.arg_size() && ArgNum < CandidateCall.arg_size(); ArgNum++) {
1790	FuncArg = F.getArg(i: ArgNum);
1791	CallArg = CandidateCall.getArgOperand(i: ArgNum);
1792	if (FuncArg == CmpOp && CallArg != CmpOp) {
1793	ArgFound = true;
1794	break;
1795	}
1796	}
1797	if (!ArgFound)
1798	return false;
1799
1800	// Now we have a recursive call that is guarded by a cmp instruction.
1801	// Check if this cmp can be simplified:
1802	SimplifyQuery SQ(DL, dyn_cast<Instruction>(Val: CallArg));
1803	CondContext CC(&Cmp);
1804	CC.Invert = (CallBB != Br->getSuccessor(i: `0`));
1805	SQ.CC = &CC;
1806	CC.AffectedValues.insert(Ptr: FuncArg);
1807	Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
1808	I: cast<CmpInst>(Val: &Cmp), NewOps: {CallArg, Cmp.getOperand(i_nocapture: `1`)}, Q: SQ);
1809	if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(Val: SimplifiedInstruction)) {
1810	// Make sure that the BB of the recursive call is NOT the true successor
1811	// of the icmp. In other words, make sure that the recursion depth is 1.
1812	if ((ConstVal->isOne() && CC.Invert) \|\|
1813	(ConstVal->isZero() && !CC.Invert)) {
1814	SimplifiedValues [&Cmp] = ConstVal;
1815	return true;
1816	}
1817	}
1818	return false;
1819	}
1820
1821	/// Simplify \p I if its operands are constants and update SimplifiedValues.
1822	bool CallAnalyzer::simplifyInstruction(Instruction &I) {
1823	SmallVector<Constant *> COps;
1824	for (Value *Op : I.operands()) {
1825	Constant *COp = getDirectOrSimplifiedValue<Constant>(V: Op);
1826	if (!COp)
1827	return false;
1828	COps.push_back(Elt: COp);
1829	}
1830	auto *C = ConstantFoldInstOperands(I: &I, Ops: COps, DL);
1831	if (!C)
1832	return false;
1833	SimplifiedValues [&I] = C;
1834	return true;
1835	}
1836
1837	/// Try to simplify a call to llvm.is.constant.
1838	///
1839	/// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
1840	/// we expect calls of this specific intrinsic to be infrequent.
1841	///
1842	/// FIXME: Given that we know CB's parent (F) caller
1843	/// (CandidateCall->getParent()->getParent()), we might be able to determine
1844	/// whether inlining F into F's caller would change how the call to
1845	/// llvm.is.constant would evaluate.
1846	bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
1847	Value *Arg = CB.getArgOperand(i: `0`);
1848	auto *C = getDirectOrSimplifiedValue<Constant>(V: Arg);
1849
1850	Type *RT = CB.getFunctionType()->getReturnType();
1851	SimplifiedValues [&CB] = ConstantInt::get(Ty: RT, V: C ? `1` : `0`);
1852	return true;
1853	}
1854
1855	bool CallAnalyzer::simplifyIntrinsicCallObjectSize(CallBase &CB) {
1856	// As per the langref, "The fourth argument to llvm.objectsize determines if
1857	// the value should be evaluated at runtime."
1858	if (cast<ConstantInt>(Val: CB.getArgOperand(i: `3`))->isOne())
1859	return false;
1860
1861	Value V = lowerObjectSizeCall(ObjectSize: &cast<IntrinsicInst>(Val&: CB), DL, TLI: nullptr*,
1862	/MustSucceed=/true);
1863	Constant *C = dyn_cast_or_null<Constant>(Val: V);
1864	if (C)
1865	SimplifiedValues [&CB] = C;
1866	return C;
1867	}
1868
1869	bool CallAnalyzer::visitBitCast(BitCastInst &I) {
1870	// Propagate constants through bitcasts.
1871	if (simplifyInstruction(I))
1872	return true;
1873
1874	// Track base/offsets through casts
1875	std::pair<Value *, APInt> BaseAndOffset =
1876	ConstantOffsetPtrs.lookup(Val: I.getOperand(i_nocapture: `0`));
1877	// Casts don't change the offset, just wrap it up.
1878	if (BaseAndOffset.first)
1879	ConstantOffsetPtrs [&I] = std::move(BaseAndOffset);
1880
1881	// Also look for SROA candidates here.
1882	if (auto *SROAArg = getSROAArgForValueOrNull(V: I.getOperand(i_nocapture: `0`)))
1883	SROAArgValues [&I] = SROAArg;
1884
1885	// Bitcasts are always zero cost.
1886	return true;
1887	}
1888
1889	bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
1890	// Propagate constants through ptrtoint.
1891	if (simplifyInstruction(I))
1892	return true;
1893
1894	// Track base/offset pairs when converted to a plain integer provided the
1895	// integer is large enough to represent the pointer.
1896	unsigned IntegerSize = I.getType()->getScalarSizeInBits();
1897	unsigned AS = I.getOperand(i_nocapture: `0`)->getType()->getPointerAddressSpace();
1898	if (IntegerSize == DL.getPointerSizeInBits(AS)) {
1899	std::pair<Value *, APInt> BaseAndOffset =
1900	ConstantOffsetPtrs.lookup(Val: I.getOperand(i_nocapture: `0`));
1901	if (BaseAndOffset.first)
1902	ConstantOffsetPtrs [&I] = std::move(BaseAndOffset);
1903	}
1904
1905	// This is really weird. Technically, ptrtoint will disable SROA. However,
1906	// unless that ptrtoint is used* somewhere in the live basic blocks after*
1907	// inlining, it will be nuked, and SROA should proceed. All of the uses which
1908	// would block SROA would also block SROA if applied directly to a pointer,
1909	// and so we can just add the integer in here. The only places where SROA is
1910	// preserved either cannot fire on an integer, or won't in-and-of themselves
1911	// disable SROA (ext) w/o some later use that we would see and disable.
1912	if (auto *SROAArg = getSROAArgForValueOrNull(V: I.getOperand(i_nocapture: `0`)))
1913	SROAArgValues [&I] = SROAArg;
1914
1915	return TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency) ==
1916	TargetTransformInfo::TCC_Free;
1917	}
1918
1919	bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
1920	// Propagate constants through ptrtoint.
1921	if (simplifyInstruction(I))
1922	return true;
1923
1924	// Track base/offset pairs when round-tripped through a pointer without
1925	// modifications provided the integer is not too large.
1926	Value *Op = I.getOperand(i_nocapture: `0`);
1927	unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
1928	if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
1929	std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Val: Op);
1930	if (BaseAndOffset.first)
1931	ConstantOffsetPtrs [&I] = std::move(BaseAndOffset);
1932	}
1933
1934	// "Propagate" SROA here in the same manner as we do for ptrtoint above.
1935	if (auto *SROAArg = getSROAArgForValueOrNull(V: Op))
1936	SROAArgValues [&I] = SROAArg;
1937
1938	return TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency) ==
1939	TargetTransformInfo::TCC_Free;
1940	}
1941
1942	bool CallAnalyzer::visitCastInst(CastInst &I) {
1943	// Propagate constants through casts.
1944	if (simplifyInstruction(I))
1945	return true;
1946
1947	// Disable SROA in the face of arbitrary casts we don't explicitly list
1948	// elsewhere.
1949	disableSROA(V: I.getOperand(i_nocapture: `0`));
1950
1951	// If this is a floating-point cast, and the target says this operation
1952	// is expensive, this may eventually become a library call. Treat the cost
1953	// as such.
1954	switch (I.getOpcode()) {
1955	case Instruction::FPTrunc:
1956	case Instruction::FPExt:
1957	case Instruction::UIToFP:
1958	case Instruction::SIToFP:
1959	case Instruction::FPToUI:
1960	case Instruction::FPToSI:
1961	if (TTI.getFPOpCost(Ty: I.getType()) == TargetTransformInfo::TCC_Expensive)
1962	onCallPenalty();
1963	break;
1964	default:
1965	break;
1966	}
1967
1968	return TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency) ==
1969	TargetTransformInfo::TCC_Free;
1970	}
1971
1972	bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
1973	return CandidateCall.paramHasAttr(ArgNo: A->getArgNo(), Kind: Attr);
1974	}
1975
1976	bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
1977	// Does the call site* have the NonNull attribute set on an argument? We*
1978	// use the attribute on the call site to memoize any analysis done in the
1979	// caller. This will also trip if the callee function has a non-null
1980	// parameter attribute, but that's a less interesting case because hopefully
1981	// the callee would already have been simplified based on that.
1982	if (Argument *A = dyn_cast<Argument>(Val: V))
1983	if (paramHasAttr(A, Attr: Attribute::NonNull))
1984	return true;
1985
1986	// Is this an alloca in the caller? This is distinct from the attribute case
1987	// above because attributes aren't updated within the inliner itself and we
1988	// always want to catch the alloca derived case.
1989	if (isAllocaDerivedArg(V))
1990	// We can actually predict the result of comparisons between an
1991	// alloca-derived value and null. Note that this fires regardless of
1992	// SROA firing.
1993	return true;
1994
1995	return false;
1996	}
1997
1998	bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
1999	// If the normal destination of the invoke or the parent block of the call
2000	// site is unreachable-terminated, there is little point in inlining this
2001	// unless there is literally zero cost.
2002	// FIXME: Note that it is possible that an unreachable-terminated block has a
2003	// hot entry. For example, in below scenario inlining hot_call_X() may be
2004	// beneficial :
2005	// main() {
2006	// hot_call_1();
2007	// ...
2008	// hot_call_N()
2009	// exit(0);
2010	// }
2011	// For now, we are not handling this corner case here as it is rare in real
2012	// code. In future, we should elaborate this based on BPI and BFI in more
2013	// general threshold adjusting heuristics in updateThreshold().
2014	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &Call)) {
2015	if (isa<UnreachableInst>(Val: II->getNormalDest()->getTerminator()))
2016	return false;
2017	} else if (isa<UnreachableInst>(Val: Call.getParent()->getTerminator()))
2018	return false;
2019
2020	return true;
2021	}
2022
2023	bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call,
2024	BlockFrequencyInfo *CallerBFI) {
2025	// If global profile summary is available, then callsite's coldness is
2026	// determined based on that.
2027	if (PSI && PSI->hasProfileSummary())
2028	return PSI->isColdCallSite(CB: Call, BFI: CallerBFI);
2029
2030	// Otherwise we need BFI to be available.
2031	if (!CallerBFI)
2032	return false;
2033
2034	// Determine if the callsite is cold relative to caller's entry. We could
2035	// potentially cache the computation of scaled entry frequency, but the added
2036	// complexity is not worth it unless this scaling shows up high in the
2037	// profiles.
2038	const BranchProbability ColdProb(ColdCallSiteRelFreq, `100`);
2039	auto CallSiteBB = Call.getParent();
2040	auto CallSiteFreq = CallerBFI->getBlockFreq(BB: CallSiteBB);
2041	auto CallerEntryFreq =
2042	CallerBFI->getBlockFreq(BB: &(Call.getCaller()->getEntryBlock()));
2043	return CallSiteFreq < CallerEntryFreq * ColdProb;
2044	}
2045
2046	std::optional<int>
2047	InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
2048	BlockFrequencyInfo *CallerBFI) {
2049
2050	// If global profile summary is available, then callsite's hotness is
2051	// determined based on that.
2052	if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CB: Call, BFI: CallerBFI))
2053	return Params.HotCallSiteThreshold;
2054
2055	// Otherwise we need BFI to be available and to have a locally hot callsite
2056	// threshold.
2057	if (!CallerBFI \|\| !Params.LocallyHotCallSiteThreshold)
2058	return std::nullopt;
2059
2060	// Determine if the callsite is hot relative to caller's entry. We could
2061	// potentially cache the computation of scaled entry frequency, but the added
2062	// complexity is not worth it unless this scaling shows up high in the
2063	// profiles.
2064	const BasicBlock *CallSiteBB = Call.getParent();
2065	BlockFrequency CallSiteFreq = CallerBFI->getBlockFreq(BB: CallSiteBB);
2066	BlockFrequency CallerEntryFreq = CallerBFI->getEntryFreq();
2067	std::optional<BlockFrequency> Limit = CallerEntryFreq.mul(Factor: HotCallSiteRelFreq);
2068	if (Limit && CallSiteFreq >= *Limit)
2069	return Params.LocallyHotCallSiteThreshold;
2070
2071	// Otherwise treat it normally.
2072	return std::nullopt;
2073	}
2074
2075	void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
2076	// If no size growth is allowed for this inlining, set Threshold to 0.
2077	if (!allowSizeGrowth(Call)) {
2078	Threshold = `0`;
2079	return;
2080	}
2081
2082	Function *Caller = Call.getCaller();
2083
2084	// return min(A, B) if B is valid.
2085	auto MinIfValid = [](int A, std::optional<int> B) {
2086	return B ? std::min(a: A, b: *B) : A;
2087	};
2088
2089	// return max(A, B) if B is valid.
2090	auto MaxIfValid = [](int A, std::optional<int> B) {
2091	return B ? std::max(a: A, b: *B) : A;
2092	};
2093
2094	// Various bonus percentages. These are multiplied by Threshold to get the
2095	// bonus values.
2096	// SingleBBBonus: This bonus is applied if the callee has a single reachable
2097	// basic block at the given callsite context. This is speculatively applied
2098	// and withdrawn if more than one basic block is seen.
2099	//
2100	// LstCallToStaticBonus: This large bonus is applied to ensure the inlining
2101	// of the last call to a static function as inlining such functions is
2102	// guaranteed to reduce code size.
2103	//
2104	// These bonus percentages may be set to 0 based on properties of the caller
2105	// and the callsite.
2106	int SingleBBBonusPercent = `50`;
2107	int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
2108	int LastCallToStaticBonus = TTI.getInliningLastCallToStaticBonus();
2109
2110	// Lambda to set all the above bonus and bonus percentages to 0.
2111	auto DisallowAllBonuses = [&]() {
2112	SingleBBBonusPercent = `0`;
2113	VectorBonusPercent = `0`;
2114	LastCallToStaticBonus = `0`;
2115	};
2116
2117	// Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
2118	// and reduce the threshold if the caller has the necessary attribute.
2119	if (Caller->hasMinSize()) {
2120	Threshold = MinIfValid (Threshold, Params.OptMinSizeThreshold);
2121	// For minsize, we want to disable the single BB bonus and the vector
2122	// bonuses, but not the last-call-to-static bonus. Inlining the last call to
2123	// a static function will, at the minimum, eliminate the parameter setup and
2124	// call/return instructions.
2125	SingleBBBonusPercent = `0`;
2126	VectorBonusPercent = `0`;
2127	} else if (Caller->hasOptSize())
2128	Threshold = MinIfValid (Threshold, Params.OptSizeThreshold);
2129
2130	// Adjust the threshold based on inlinehint attribute and profile based
2131	// hotness information if the caller does not have MinSize attribute.
2132	if (!Caller->hasMinSize()) {
2133	std::optional<int> HintThreshold = Caller->hasOptSize()
2134	? Params.OptSizeHintThreshold
2135	: Params.HintThreshold;
2136	if (Callee.hasFnAttribute(Kind: Attribute::InlineHint))
2137	Threshold = MaxIfValid (Threshold, HintThreshold);
2138
2139	// FIXME: After switching to the new passmanager, simplify the logic below
2140	// by checking only the callsite hotness/coldness as we will reliably
2141	// have local profile information.
2142	//
2143	// Callsite hotness and coldness can be determined if sample profile is
2144	// used (which adds hotness metadata to calls) or if caller's
2145	// BlockFrequencyInfo is available.
2146	BlockFrequencyInfo CallerBFI = GetBFI ? &(GetBFI (Caller)) : nullptr;
2147	auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
2148	if (!Caller->hasOptSize() && HotCallSiteThreshold) {
2149	LLVM_DEBUG(dbgs() << "Hot callsite.\n");
2150	// FIXME: This should update the threshold only if it exceeds the
2151	// current threshold, but AutoFDO + ThinLTO currently relies on this
2152	// behavior to prevent inlining of hot callsites during ThinLTO
2153	// compile phase.
2154	Threshold = *HotCallSiteThreshold;
2155	} else if (isColdCallSite(Call, CallerBFI)) {
2156	LLVM_DEBUG(dbgs() << "Cold callsite.\n");
2157	// Do not apply bonuses for a cold callsite including the
2158	// LastCallToStatic bonus. While this bonus might result in code size
2159	// reduction, it can cause the size of a non-cold caller to increase
2160	// preventing it from being inlined.
2161	DisallowAllBonuses ();
2162	Threshold = MinIfValid (Threshold, Params.ColdCallSiteThreshold);
2163	} else if (PSI) {
2164	// Use callee's global profile information only if we have no way of
2165	// determining this via callsite information.
2166	if (PSI->isFunctionEntryHot(F: &Callee)) {
2167	LLVM_DEBUG(dbgs() << "Hot callee.\n");
2168	// If callsite hotness can not be determined, we may still know
2169	// that the callee is hot and treat it as a weaker hint for threshold
2170	// increase.
2171	Threshold = MaxIfValid (Threshold, HintThreshold);
2172	} else if (PSI->isFunctionEntryCold(F: &Callee)) {
2173	LLVM_DEBUG(dbgs() << "Cold callee.\n");
2174	// Do not apply bonuses for a cold callee including the
2175	// LastCallToStatic bonus. While this bonus might result in code size
2176	// reduction, it can cause the size of a non-cold caller to increase
2177	// preventing it from being inlined.
2178	DisallowAllBonuses ();
2179	Threshold = MinIfValid (Threshold, Params.ColdThreshold);
2180	}
2181	}
2182	}
2183
2184	Threshold += TTI.adjustInliningThreshold(CB: &Call);
2185
2186	// Finally, take the target-specific inlining threshold multiplier into
2187	// account.
2188	Threshold *= TTI.getInliningThresholdMultiplier();
2189
2190	SingleBBBonus = Threshold * SingleBBBonusPercent / `100`;
2191	VectorBonus = Threshold * VectorBonusPercent / `100`;
2192
2193	// If there is only one call of the function, and it has internal linkage,
2194	// the cost of inlining it drops dramatically. It may seem odd to update
2195	// Cost in updateThreshold, but the bonus depends on the logic in this method.
2196	if (isSoleCallToLocalFunction(CB: Call, Callee: F)) {
2197	addCost(Inc: -LastCallToStaticBonus);
2198	StaticBonusApplied = LastCallToStaticBonus;
2199	}
2200	}
2201
2202	bool CallAnalyzer::visitCmpInst(CmpInst &I) {
2203	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
2204	// First try to handle simplified comparisons.
2205	if (simplifyInstruction(I))
2206	return true;
2207
2208	// Try to handle comparison that can be simplified using ValueTracking.
2209	if (simplifyCmpInstForRecCall(Cmp&: I))
2210	return true;
2211
2212	if (I.getOpcode() == Instruction::FCmp)
2213	return false;
2214
2215	// Otherwise look for a comparison between constant offset pointers with
2216	// a common base.
2217	Value LHSBase, RHSBase;
2218	APInt LHSOffset, RHSOffset;
2219	std::tie(args&: LHSBase, args&: LHSOffset) = ConstantOffsetPtrs.lookup(Val: LHS);
2220	if (LHSBase) {
2221	std::tie(args&: RHSBase, args&: RHSOffset) = ConstantOffsetPtrs.lookup(Val: RHS);
2222	if (RHSBase && LHSBase == RHSBase) {
2223	// We have common bases, fold the icmp to a constant based on the
2224	// offsets.
2225	SimplifiedValues [&I] = ConstantInt::getBool(
2226	Ty: I.getType(),
2227	V: ICmpInst::compare(LHS: LHSOffset, RHS: RHSOffset, Pred: I.getPredicate()));
2228	++NumConstantPtrCmps;
2229	return true;
2230	}
2231	}
2232
2233	auto isImplicitNullCheckCmp = [](const CmpInst &I) {
2234	for (auto *User : I.users())
2235	if (auto *Instr = dyn_cast<Instruction>(Val: User))
2236	if (!Instr->getMetadata(KindID: LLVMContext::MD_make_implicit))
2237	return false;
2238	return true;
2239	};
2240
2241	// If the comparison is an equality comparison with null, we can simplify it
2242	// if we know the value (argument) can't be null
2243	if (I.isEquality() && isa<ConstantPointerNull>(Val: I.getOperand(i_nocapture: `1`))) {
2244	if (isKnownNonNullInCallee(V: I.getOperand(i_nocapture: `0`))) {
2245	bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
2246	SimplifiedValues [&I] = IsNotEqual ? ConstantInt::getTrue(Ty: I.getType())
2247	: ConstantInt::getFalse(Ty: I.getType());
2248	return true;
2249	}
2250	// Implicit null checks act as unconditional branches and their comparisons
2251	// should be treated as simplified and free of cost.
2252	if (isImplicitNullCheckCmp (I))
2253	return true;
2254	}
2255	return handleSROA(V: I.getOperand(i_nocapture: `0`), DoNotDisable: isa<ConstantPointerNull>(Val: I.getOperand(i_nocapture: `1`)));
2256	}
2257
2258	bool CallAnalyzer::visitSub(BinaryOperator &I) {
2259	// Try to handle a special case: we can fold computing the difference of two
2260	// constant-related pointers.
2261	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
2262	Value LHSBase, RHSBase;
2263	APInt LHSOffset, RHSOffset;
2264	std::tie(args&: LHSBase, args&: LHSOffset) = ConstantOffsetPtrs.lookup(Val: LHS);
2265	if (LHSBase) {
2266	std::tie(args&: RHSBase, args&: RHSOffset) = ConstantOffsetPtrs.lookup(Val: RHS);
2267	if (RHSBase && LHSBase == RHSBase) {
2268	// We have common bases, fold the subtract to a constant based on the
2269	// offsets.
2270	Constant *CLHS = ConstantInt::get(Context&: LHS->getContext(), V: LHSOffset);
2271	Constant *CRHS = ConstantInt::get(Context&: RHS->getContext(), V: RHSOffset);
2272	if (Constant *C = ConstantExpr::getSub(C1: CLHS, C2: CRHS)) {
2273	SimplifiedValues [&I] = C;
2274	++NumConstantPtrDiffs;
2275	return true;
2276	}
2277	}
2278	}
2279
2280	// Otherwise, fall back to the generic logic for simplifying and handling
2281	// instructions.
2282	return Base::visitSub(I);
2283	}
2284
2285	bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
2286	Value LHS = I.getOperand(i_nocapture: `0`), RHS = I.getOperand(i_nocapture: `1`);
2287	Constant *CLHS = getDirectOrSimplifiedValue<Constant>(V: LHS);
2288	Constant *CRHS = getDirectOrSimplifiedValue<Constant>(V: RHS);
2289
2290	Value SimpleV = nullptr*;
2291	if (auto FI = dyn_cast<FPMathOperator>(Val: &I))
2292	SimpleV = simplifyBinOp(Opcode: I.getOpcode(), LHS: CLHS ? CLHS : LHS, RHS: CRHS ? CRHS : RHS,
2293	FMF: FI->getFastMathFlags(), Q: DL);
2294	else
2295	SimpleV =
2296	simplifyBinOp(Opcode: I.getOpcode(), LHS: CLHS ? CLHS : LHS, RHS: CRHS ? CRHS : RHS, Q: DL);
2297
2298	if (Constant *C = dyn_cast_or_null<Constant>(Val: SimpleV))
2299	SimplifiedValues [&I] = C;
2300
2301	if (SimpleV)
2302	return true;
2303
2304	// Disable any SROA on arguments to arbitrary, unsimplified binary operators.
2305	disableSROA(V: LHS);
2306	disableSROA(V: RHS);
2307
2308	// If the instruction is floating point, and the target says this operation
2309	// is expensive, this may eventually become a library call. Treat the cost
2310	// as such. Unless it's fneg which can be implemented with an xor.
2311	using namespace llvm::PatternMatch;
2312	if (I.getType()->isFloatingPointTy() &&
2313	TTI.getFPOpCost(Ty: I.getType()) == TargetTransformInfo::TCC_Expensive &&
2314	!match(V: &I, P: m_FNeg(X: m_Value())))
2315	onCallPenalty();
2316
2317	return false;
2318	}
2319
2320	bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
2321	Value *Op = I.getOperand(i_nocapture: `0`);
2322	Constant *COp = getDirectOrSimplifiedValue<Constant>(V: Op);
2323
2324	Value *SimpleV = simplifyFNegInst(
2325	Op: COp ? COp : Op, FMF: cast<FPMathOperator>(Val&: I).getFastMathFlags(), Q: DL);
2326
2327	if (Constant *C = dyn_cast_or_null<Constant>(Val: SimpleV))
2328	SimplifiedValues [&I] = C;
2329
2330	if (SimpleV)
2331	return true;
2332
2333	// Disable any SROA on arguments to arbitrary, unsimplified fneg.
2334	disableSROA(V: Op);
2335
2336	return false;
2337	}
2338
2339	bool CallAnalyzer::visitLoad(LoadInst &I) {
2340	if (handleSROA(V: I.getPointerOperand(), DoNotDisable: I.isSimple()))
2341	return true;
2342
2343	// If the data is already loaded from this address and hasn't been clobbered
2344	// by any stores or calls, this load is likely to be redundant and can be
2345	// eliminated.
2346	if (EnableLoadElimination &&
2347	!LoadAddrSet.insert(Ptr: I.getPointerOperand()).second && I.isUnordered()) {
2348	onLoadEliminationOpportunity();
2349	return true;
2350	}
2351
2352	onMemAccess();
2353	return false;
2354	}
2355
2356	bool CallAnalyzer::visitStore(StoreInst &I) {
2357	if (handleSROA(V: I.getPointerOperand(), DoNotDisable: I.isSimple()))
2358	return true;
2359
2360	// The store can potentially clobber loads and prevent repeated loads from
2361	// being eliminated.
2362	// FIXME:
2363	// 1. We can probably keep an initial set of eliminatable loads substracted
2364	// from the cost even when we finally see a store. We just need to disable
2365	// further* accumulation of elimination savings.*
2366	// 2. We should probably at some point thread MemorySSA for the callee into
2367	// this and then use that to actually compute really* precise savings.*
2368	disableLoadElimination();
2369
2370	onMemAccess();
2371	return false;
2372	}
2373
2374	bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
2375	Value *Op = I.getAggregateOperand();
2376
2377	// Special handling, because we want to simplify extractvalue with a
2378	// potential insertvalue from the caller.
2379	if (Value *SimpleOp = getSimplifiedValueUnchecked(V: Op)) {
2380	SimplifyQuery SQ(DL);
2381	Value *SimpleV = simplifyExtractValueInst(Agg: SimpleOp, Idxs: I.getIndices(), Q: SQ);
2382	if (SimpleV) {
2383	SimplifiedValues [&I] = SimpleV;
2384	return true;
2385	}
2386	}
2387
2388	// SROA can't look through these, but they may be free.
2389	return Base::visitExtractValue(I);
2390	}
2391
2392	bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
2393	// Constant folding for insert value is trivial.
2394	if (simplifyInstruction(I))
2395	return true;
2396
2397	// SROA can't look through these, but they may be free.
2398	return Base::visitInsertValue(I);
2399	}
2400
2401	/// Try to simplify a call site.
2402	///
2403	/// Takes a concrete function and callsite and tries to actually simplify it by
2404	/// analyzing the arguments and call itself with instsimplify. Returns true if
2405	/// it has simplified the callsite to some other entity (a constant), making it
2406	/// free.
2407	bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
2408	// FIXME: Using the instsimplify logic directly for this is inefficient
2409	// because we have to continually rebuild the argument list even when no
2410	// simplifications can be performed. Until that is fixed with remapping
2411	// inside of instsimplify, directly constant fold calls here.
2412	if (!canConstantFoldCallTo(Call: &Call, F))
2413	return false;
2414
2415	// Try to re-map the arguments to constants.
2416	SmallVector<Constant *, `4`> ConstantArgs;
2417	ConstantArgs.reserve(N: Call.arg_size());
2418	for (Value *I : Call.args()) {
2419	Constant *C = getDirectOrSimplifiedValue<Constant>(V: I);
2420	if (!C)
2421	return false; // This argument doesn't map to a constant.
2422
2423	ConstantArgs.push_back(Elt: C);
2424	}
2425	if (Constant *C = ConstantFoldCall(Call: &Call, F, Operands: ConstantArgs)) {
2426	SimplifiedValues [&Call] = C;
2427	return true;
2428	}
2429
2430	return false;
2431	}
2432
2433	bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) {
2434	const TargetLibraryInfo TLI = GetTLI ? &GetTLI (F) : nullptr;
2435	LibFunc LF;
2436	if (!TLI \|\| !TLI->getLibFunc(FDecl: *F, F&: LF) \|\| !TLI->has(F: LF))
2437	return TTI.isLoweredToCall(F);
2438
2439	switch (LF) {
2440	case LibFunc_memcpy_chk:
2441	case LibFunc_memmove_chk:
2442	case LibFunc_mempcpy_chk:
2443	case LibFunc_memset_chk: {
2444	// Calls to __memcpy_chk whose length is known to fit within the object
2445	// size will eventually be replaced by inline stores. Therefore, these
2446	// should not incur a call penalty. This is only really relevant on
2447	// platforms whose headers redirect memcpy to __memcpy_chk (e.g. Darwin), as
2448	// other platforms use memcpy intrinsics, which are already exempt from the
2449	// call penalty.
2450	auto *LenOp = getDirectOrSimplifiedValue<ConstantInt>(V: Call.getOperand(i_nocapture: `2`));
2451	auto *ObjSizeOp =
2452	getDirectOrSimplifiedValue<ConstantInt>(V: Call.getOperand(i_nocapture: `3`));
2453	if (LenOp && ObjSizeOp &&
2454	LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2455	return false;
2456	}
2457	break;
2458	}
2459	default:
2460	break;
2461	}
2462
2463	return TTI.isLoweredToCall(F);
2464	}
2465
2466	bool CallAnalyzer::visitCallBase(CallBase &Call) {
2467	if (!onCallBaseVisitStart(Call))
2468	return true;
2469
2470	if (Call.hasFnAttr(Kind: Attribute::ReturnsTwice) &&
2471	!F.hasFnAttribute(Kind: Attribute::ReturnsTwice)) {
2472	// This aborts the entire analysis.
2473	ExposesReturnsTwice = true;
2474	return false;
2475	}
2476	if (isa<CallInst>(Val: Call) && cast<CallInst>(Val&: Call).cannotDuplicate())
2477	ContainsNoDuplicateCall = true;
2478
2479	if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Val: Call.getCalledOperand()))
2480	onInlineAsm(Arg: *InlineAsmOp);
2481
2482	Function *F = Call.getCalledFunction();
2483	bool IsIndirectCall = !F;
2484	if (IsIndirectCall) {
2485	// Check if this happens to be an indirect function call to a known function
2486	// in this inline context. If not, we've done all we can.
2487	Value *Callee = Call.getCalledOperand();
2488	F = getSimplifiedValue<Function>(V: Callee);
2489	if (!F \|\| F->getFunctionType() != Call.getFunctionType()) {
2490	onCallArgumentSetup(Call);
2491
2492	if (!Call.onlyReadsMemory())
2493	disableLoadElimination();
2494	return Base::visitCallBase(I&: Call);
2495	}
2496	}
2497
2498	assert(F && "Expected a call to a known function");
2499
2500	// When we have a concrete function, first try to simplify it directly.
2501	if (simplifyCallSite(F, Call))
2502	return true;
2503
2504	// Next check if it is an intrinsic we know about.
2505	// FIXME: Lift this into part of the InstVisitor.
2506	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &Call)) {
2507	switch (II->getIntrinsicID()) {
2508	default:
2509	if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(I: II))
2510	disableLoadElimination();
2511	return Base::visitCallBase(I&: Call);
2512
2513	case Intrinsic::load_relative:
2514	onLoadRelativeIntrinsic();
2515	return false;
2516
2517	case Intrinsic::memset:
2518	case Intrinsic::memcpy:
2519	case Intrinsic::memmove:
2520	disableLoadElimination();
2521	// SROA can usually chew through these intrinsics, but they aren't free.
2522	return false;
2523	case Intrinsic::icall_branch_funnel:
2524	case Intrinsic::localescape:
2525	HasUninlineableIntrinsic = true;
2526	return false;
2527	case Intrinsic::vastart:
2528	InitsVargArgs = true;
2529	return false;
2530	case Intrinsic::launder_invariant_group:
2531	case Intrinsic::strip_invariant_group:
2532	if (auto *SROAArg = getSROAArgForValueOrNull(V: II->getOperand(i_nocapture: `0`)))
2533	SROAArgValues [II] = SROAArg;
2534	return true;
2535	case Intrinsic::is_constant:
2536	return simplifyIntrinsicCallIsConstant(CB&: Call);
2537	case Intrinsic::objectsize:
2538	return simplifyIntrinsicCallObjectSize(CB&: Call);
2539	}
2540	}
2541
2542	if (F == Call.getFunction()) {
2543	// This flag will fully abort the analysis, so don't bother with anything
2544	// else.
2545	IsRecursiveCall = true;
2546	if (!AllowRecursiveCall)
2547	return false;
2548	}
2549
2550	if (isLoweredToCall(F, Call)) {
2551	onLoweredCall(F, Call, IsIndirectCall);
2552	}
2553
2554	if (!(Call.onlyReadsMemory() \|\| (IsIndirectCall && F->onlyReadsMemory())))
2555	disableLoadElimination();
2556	return Base::visitCallBase(I&: Call);
2557	}
2558
2559	bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2560	// At least one return instruction will be free after inlining.
2561	bool Free = !HasReturn;
2562	HasReturn = true;
2563	return Free;
2564	}
2565
2566	bool CallAnalyzer::visitUncondBrInst(UncondBrInst &BI) {
2567	// We model unconditional branches as essentially free -- they really
2568	// shouldn't exist at all, but handling them makes the behavior of the
2569	// inliner more regular and predictable.
2570	return true;
2571	}
2572
2573	bool CallAnalyzer::visitCondBrInst(CondBrInst &BI) {
2574	// Conditional branches which will fold away are free.
2575	return getDirectOrSimplifiedValue<ConstantInt>(V: BI.getCondition()) \|\|
2576	BI.getMetadata(KindID: LLVMContext::MD_make_implicit);
2577	}
2578
2579	bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2580	bool CheckSROA = SI.getType()->isPointerTy();
2581	Value *TrueVal = SI.getTrueValue();
2582	Value *FalseVal = SI.getFalseValue();
2583
2584	Constant *TrueC = getDirectOrSimplifiedValue<Constant>(V: TrueVal);
2585	Constant *FalseC = getDirectOrSimplifiedValue<Constant>(V: FalseVal);
2586	Constant *CondC = getSimplifiedValue<Constant>(V: SI.getCondition());
2587
2588	if (!CondC) {
2589	// Select C, X, X => X
2590	if (TrueC == FalseC && TrueC) {
2591	SimplifiedValues [&SI] = TrueC;
2592	return true;
2593	}
2594
2595	if (!CheckSROA)
2596	return Base::visitSelectInst(I&: SI);
2597
2598	std::pair<Value *, APInt> TrueBaseAndOffset =
2599	ConstantOffsetPtrs.lookup(Val: TrueVal);
2600	std::pair<Value *, APInt> FalseBaseAndOffset =
2601	ConstantOffsetPtrs.lookup(Val: FalseVal);
2602	if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2603	ConstantOffsetPtrs [&SI] = std::move(TrueBaseAndOffset);
2604
2605	if (auto *SROAArg = getSROAArgForValueOrNull(V: TrueVal))
2606	SROAArgValues [&SI] = SROAArg;
2607	return true;
2608	}
2609
2610	return Base::visitSelectInst(I&: SI);
2611	}
2612
2613	// Select condition is a constant.
2614	Value *SelectedV = CondC->isAllOnesValue() ? TrueVal
2615	: (CondC->isNullValue()) ? FalseVal
2616	: nullptr;
2617	if (!SelectedV) {
2618	// Condition is a vector constant that is not all 1s or all 0s. If all
2619	// operands are constants, ConstantFoldSelectInstruction() can handle the
2620	// cases such as select vectors.
2621	if (TrueC && FalseC) {
2622	if (auto *C = ConstantFoldSelectInstruction(Cond: CondC, V1: TrueC, V2: FalseC)) {
2623	SimplifiedValues [&SI] = C;
2624	return true;
2625	}
2626	}
2627	return Base::visitSelectInst(I&: SI);
2628	}
2629
2630	// Condition is either all 1s or all 0s. SI can be simplified.
2631	if (Constant *SelectedC = dyn_cast<Constant>(Val: SelectedV)) {
2632	SimplifiedValues [&SI] = SelectedC;
2633	return true;
2634	}
2635
2636	if (!CheckSROA)
2637	return true;
2638
2639	std::pair<Value *, APInt> BaseAndOffset =
2640	ConstantOffsetPtrs.lookup(Val: SelectedV);
2641	if (BaseAndOffset.first) {
2642	ConstantOffsetPtrs [&SI] = std::move(BaseAndOffset);
2643
2644	if (auto *SROAArg = getSROAArgForValueOrNull(V: SelectedV))
2645	SROAArgValues [&SI] = SROAArg;
2646	}
2647
2648	return true;
2649	}
2650
2651	bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2652	// We model unconditional switches as free, see the comments on handling
2653	// branches.
2654	if (getDirectOrSimplifiedValue<ConstantInt>(V: SI.getCondition()))
2655	return true;
2656
2657	// Assume the most general case where the switch is lowered into
2658	// either a jump table, bit test, or a balanced binary tree consisting of
2659	// case clusters without merging adjacent clusters with the same
2660	// destination. We do not consider the switches that are lowered with a mix
2661	// of jump table/bit test/binary search tree. The cost of the switch is
2662	// proportional to the size of the tree or the size of jump table range.
2663	//
2664	// NB: We convert large switches which are just used to initialize large phi
2665	// nodes to lookup tables instead in simplifycfg, so this shouldn't prevent
2666	// inlining those. It will prevent inlining in cases where the optimization
2667	// does not (yet) fire.
2668
2669	unsigned JumpTableSize = `0`;
2670	BlockFrequencyInfo BFI = GetBFI ? &(GetBFI (F)) : nullptr*;
2671	unsigned NumCaseCluster =
2672	TTI.getEstimatedNumberOfCaseClusters(SI, JTSize&: JumpTableSize, PSI, BFI);
2673
2674	onFinalizeSwitch(JumpTableSize, NumCaseCluster, DefaultDestUnreachable: SI.defaultDestUnreachable());
2675	return false;
2676	}
2677
2678	bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2679	// We never want to inline functions that contain an indirectbr. This is
2680	// incorrect because all the blockaddress's (in static global initializers
2681	// for example) would be referring to the original function, and this
2682	// indirect jump would jump from the inlined copy of the function into the
2683	// original function which is extremely undefined behavior.
2684	// FIXME: This logic isn't really right; we can safely inline functions with
2685	// indirectbr's as long as no other function or global references the
2686	// blockaddress of a block within the current function.
2687	HasIndirectBr = true;
2688	return false;
2689	}
2690
2691	bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2692	// FIXME: It's not clear that a single instruction is an accurate model for
2693	// the inline cost of a resume instruction.
2694	return false;
2695	}
2696
2697	bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2698	// FIXME: It's not clear that a single instruction is an accurate model for
2699	// the inline cost of a cleanupret instruction.
2700	return false;
2701	}
2702
2703	bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2704	// FIXME: It's not clear that a single instruction is an accurate model for
2705	// the inline cost of a catchret instruction.
2706	return false;
2707	}
2708
2709	bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
2710	// FIXME: It might be reasonably to discount the cost of instructions leading
2711	// to unreachable as they have the lowest possible impact on both runtime and
2712	// code size.
2713	return true; // No actual code is needed for unreachable.
2714	}
2715
2716	bool CallAnalyzer::visitInstruction(Instruction &I) {
2717	// Some instructions are free. All of the free intrinsics can also be
2718	// handled by SROA, etc.
2719	if (TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency) ==
2720	TargetTransformInfo::TCC_Free)
2721	return true;
2722
2723	// We found something we don't understand or can't handle. Mark any SROA-able
2724	// values in the operand list as no longer viable.
2725	for (const Use &Op : I.operands())
2726	disableSROA(V: Op);
2727
2728	return false;
2729	}
2730
2731	/// Analyze a basic block for its contribution to the inline cost.
2732	///
2733	/// This method walks the analyzer over every instruction in the given basic
2734	/// block and accounts for their cost during inlining at this callsite. It
2735	/// aborts early if the threshold has been exceeded or an impossible to inline
2736	/// construct has been detected. It returns false if inlining is no longer
2737	/// viable, and true if inlining remains viable.
2738	InlineResult
2739	CallAnalyzer::analyzeBlock(BasicBlock *BB,
2740	const SmallPtrSetImpl<const Value *> &EphValues) {
2741	for (Instruction &I : *BB) {
2742	// FIXME: Currently, the number of instructions in a function regardless of
2743	// our ability to simplify them during inline to constants or dead code,
2744	// are actually used by the vector bonus heuristic. As long as that's true,
2745	// we have to special case debug intrinsics here to prevent differences in
2746	// inlining due to debug symbols. Eventually, the number of unsimplified
2747	// instructions shouldn't factor into the cost computation, but until then,
2748	// hack around it here.
2749	// Similarly, skip pseudo-probes.
2750	if (I.isDebugOrPseudoInst())
2751	continue;
2752
2753	// Skip ephemeral values.
2754	if (EphValues.count(Ptr: &I))
2755	continue;
2756
2757	++NumInstructions;
2758	if (isa<ExtractElementInst>(Val: I) \|\| I.getType()->isVectorTy())
2759	++NumVectorInstructions;
2760
2761	// If the instruction simplified to a constant, there is no cost to this
2762	// instruction. Visit the instructions using our InstVisitor to account for
2763	// all of the per-instruction logic. The visit tree returns true if we
2764	// consumed the instruction in any way, and false if the instruction's base
2765	// cost should count against inlining.
2766	onInstructionAnalysisStart(I: &I);
2767
2768	if (Base::visit(I: &I))
2769	++NumInstructionsSimplified;
2770	else
2771	onMissedSimplification();
2772
2773	onInstructionAnalysisFinish(I: &I);
2774	using namespace ore;
2775	// If the visit this instruction detected an uninlinable pattern, abort.
2776	InlineResult IR = InlineResult::success();
2777	if (IsRecursiveCall && !AllowRecursiveCall)
2778	IR = InlineResult::failure(Reason: "recursive");
2779	else if (ExposesReturnsTwice)
2780	IR = InlineResult::failure(Reason: "exposes returns twice");
2781	else if (HasDynamicAlloca)
2782	IR = InlineResult::failure(Reason: "dynamic alloca");
2783	else if (HasIndirectBr)
2784	IR = InlineResult::failure(Reason: "indirect branch");
2785	else if (HasUninlineableIntrinsic)
2786	IR = InlineResult::failure(Reason: "uninlinable intrinsic");
2787	else if (InitsVargArgs)
2788	IR = InlineResult::failure(Reason: "varargs");
2789	if (!IR.isSuccess()) {
2790	if (ORE)
2791	ORE->emit(RemarkBuilder: [&]() {
2792	return OptimizationRemarkMissed (DEBUG_TYPE, "NeverInline",
2793	&CandidateCall)
2794	<< NV ("Callee", &F) << " has uninlinable pattern ("
2795	<< NV ("InlineResult", IR.getFailureReason())
2796	<< ") and cost is not fully computed";
2797	});
2798	return IR;
2799	}
2800
2801	// If the caller is a recursive function then we don't want to inline
2802	// functions which allocate a lot of stack space because it would increase
2803	// the caller stack usage dramatically.
2804	if (IsCallerRecursive && AllocatedSize > RecurStackSizeThreshold) {
2805	auto IR =
2806	InlineResult::failure(Reason: "recursive and allocates too much stack space");
2807	if (ORE)
2808	ORE->emit(RemarkBuilder: [&]() {
2809	return OptimizationRemarkMissed (DEBUG_TYPE, "NeverInline",
2810	&CandidateCall)
2811	<< NV ("Callee", &F) << " is "
2812	<< NV ("InlineResult", IR.getFailureReason())
2813	<< ". Cost is not fully computed";
2814	});
2815	return IR;
2816	}
2817
2818	if (shouldStop())
2819	return InlineResult::failure(
2820	Reason: "Call site analysis is not favorable to inlining.");
2821	}
2822
2823	return InlineResult::success();
2824	}
2825
2826	/// Compute the base pointer and cumulative constant offsets for V.
2827	///
2828	/// This strips all constant offsets off of V, leaving it the base pointer, and
2829	/// accumulates the total constant offset applied in the returned constant. It
2830	/// returns 0 if V is not a pointer, and returns the constant '0' if there are
2831	/// no constant offsets applied.
2832	ConstantInt CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value &V) {
2833	if (!V->getType()->isPointerTy())
2834	return nullptr;
2835
2836	unsigned AS = V->getType()->getPointerAddressSpace();
2837	unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
2838	APInt Offset = APInt::getZero(numBits: IntPtrWidth);
2839
2840	// Even though we don't look through PHI nodes, we could be called on an
2841	// instruction in an unreachable block, which may be on a cycle.
2842	SmallPtrSet<Value *, `4`> Visited;
2843	Visited.insert(Ptr: V);
2844	do {
2845	if (GEPOperator *GEP = dyn_cast<GEPOperator>(Val: V)) {
2846	if (!GEP->isInBounds() \|\| !accumulateGEPOffset(GEP&: *GEP, Offset))
2847	return nullptr;
2848	V = GEP->getPointerOperand();
2849	} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Val: V)) {
2850	if (GA->isInterposable())
2851	break;
2852	V = GA->getAliasee();
2853	} else {
2854	break;
2855	}
2856	assert(V->getType()->isPointerTy() && "Unexpected operand type!");
2857	} while (Visited.insert(Ptr: V).second);
2858
2859	Type *IdxPtrTy = DL.getIndexType(PtrTy: V->getType());
2860	return cast<ConstantInt>(Val: ConstantInt::get(Ty: IdxPtrTy, V: Offset));
2861	}
2862
2863	/// Find dead blocks due to deleted CFG edges during inlining.
2864	///
2865	/// If we know the successor of the current block, \p CurrBB, has to be \p
2866	/// NextBB, the other successors of \p CurrBB are dead if these successors have
2867	/// no live incoming CFG edges. If one block is found to be dead, we can
2868	/// continue growing the dead block list by checking the successors of the dead
2869	/// blocks to see if all their incoming edges are dead or not.
2870	void CallAnalyzer::findDeadBlocks(BasicBlock CurrBB, BasicBlock NextBB) {
2871	auto IsEdgeDead = [&](BasicBlock Pred, BasicBlock Succ) {
2872	// A CFG edge is dead if the predecessor is dead or the predecessor has a
2873	// known successor which is not the one under exam.
2874	if (DeadBlocks.count(Ptr: Pred))
2875	return true;
2876	BasicBlock *KnownSucc = KnownSuccessors [Pred];
2877	return KnownSucc && KnownSucc != Succ;
2878	};
2879
2880	auto IsNewlyDead = [&](BasicBlock *BB) {
2881	// If all the edges to a block are dead, the block is also dead.
2882	return (!DeadBlocks.count(Ptr: BB) &&
2883	llvm::all_of(Range: predecessors(BB),
2884	P: [&](BasicBlock P) { return* IsEdgeDead (P, BB); }));
2885	};
2886
2887	for (BasicBlock *Succ : successors(BB: CurrBB)) {
2888	if (Succ == NextBB \|\| !IsNewlyDead (Succ))
2889	continue;
2890	SmallVector<BasicBlock *, `4`> NewDead;
2891	NewDead.push_back(Elt: Succ);
2892	while (!NewDead.empty()) {
2893	BasicBlock *Dead = NewDead.pop_back_val();
2894	if (DeadBlocks.insert(Ptr: Dead).second)
2895	// Continue growing the dead block lists.
2896	for (BasicBlock *S : successors(BB: Dead))
2897	if (IsNewlyDead (S))
2898	NewDead.push_back(Elt: S);
2899	}
2900	}
2901	}
2902
2903	/// Analyze a call site for potential inlining.
2904	///
2905	/// Returns true if inlining this call is viable, and false if it is not
2906	/// viable. It computes the cost and adjusts the threshold based on numerous
2907	/// factors and heuristics. If this method returns false but the computed cost
2908	/// is below the computed threshold, then inlining was forcibly disabled by
2909	/// some artifact of the routine.
2910	InlineResult CallAnalyzer::analyze() {
2911	++NumCallsAnalyzed;
2912
2913	auto Result = onAnalysisStart();
2914	if (!Result.isSuccess())
2915	return Result;
2916
2917	if (F.empty())
2918	return InlineResult::success();
2919
2920	Function *Caller = CandidateCall.getFunction();
2921	// Check if the caller function is recursive itself.
2922	for (User *U : Caller->users()) {
2923	CallBase *Call = dyn_cast<CallBase>(Val: U);
2924	if (Call && Call->getFunction() == Caller) {
2925	IsCallerRecursive = true;
2926	break;
2927	}
2928	}
2929
2930	// Populate our simplified values by mapping from function arguments to call
2931	// arguments with known important simplifications.
2932	auto CAI = CandidateCall.arg_begin();
2933	for (Argument &FAI : F.args()) {
2934	assert(CAI != CandidateCall.arg_end());
2935	SimplifiedValues [&FAI] = *CAI;
2936	if (isa<Constant>(Val: *CAI))
2937	++NumConstantArgs;
2938
2939	Value PtrArg = CAI;
2940	if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(V&: PtrArg)) {
2941	ConstantOffsetPtrs [&FAI] = std::make_pair(x&: PtrArg, y: C->getValue());
2942
2943	// We can SROA any pointer arguments derived from alloca instructions.
2944	if (auto *SROAArg = dyn_cast<AllocaInst>(Val: PtrArg)) {
2945	SROAArgValues [&FAI] = SROAArg;
2946	onInitializeSROAArg(Arg: SROAArg);
2947	EnabledSROAAllocas.insert(V: SROAArg);
2948	}
2949	}
2950	++CAI;
2951	}
2952	NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
2953	NumAllocaArgs = SROAArgValues.size();
2954
2955	// Collecting the ephemeral values of `F` can be expensive, so use the
2956	// ephemeral values cache if available.
2957	SmallPtrSet<const Value *, `32`> EphValuesStorage;
2958	const SmallPtrSetImpl<const Value > EphValues = &EphValuesStorage;
2959	if (GetEphValuesCache)
2960	EphValues = &GetEphValuesCache (F).ephValues();
2961	else
2962	CodeMetrics::collectEphemeralValues(L: &F, AC: &GetAssumptionCache (F),
2963	EphValues&: EphValuesStorage);
2964
2965	// The worklist of live basic blocks in the callee after* inlining. We avoid*
2966	// adding basic blocks of the callee which can be proven to be dead for this
2967	// particular call site in order to get more accurate cost estimates. This
2968	// requires a somewhat heavyweight iteration pattern: we need to walk the
2969	// basic blocks in a breadth-first order as we insert live successors. To
2970	// accomplish this, prioritizing for small iterations because we exit after
2971	// crossing our threshold, we use a small-size optimized SetVector.
2972	typedef SmallSetVector<BasicBlock *, `16`> BBSetVector;
2973	BBSetVector BBWorklist;
2974	BBWorklist.insert(X: &F.getEntryBlock());
2975
2976	// Note that we must not* cache the size, this loop grows the worklist.*
2977	for (unsigned Idx = `0`; Idx != BBWorklist.size(); ++Idx) {
2978	if (shouldStop())
2979	break;
2980
2981	BasicBlock *BB = BBWorklist [Idx];
2982	if (BB->empty())
2983	continue;
2984
2985	onBlockStart(BB);
2986
2987	// Disallow inlining a blockaddress.
2988	// A blockaddress only has defined behavior for an indirect branch in the
2989	// same function, and we do not currently support inlining indirect
2990	// branches. But, the inliner may not see an indirect branch that ends up
2991	// being dead code at a particular call site. If the blockaddress escapes
2992	// the function, e.g., via a global variable, inlining may lead to an
2993	// invalid cross-function reference.
2994	// FIXME: pr/39560: continue relaxing this overt restriction.
2995	if (BB->hasAddressTaken())
2996	return InlineResult::failure(Reason: "blockaddress used");
2997
2998	// Analyze the cost of this block. If we blow through the threshold, this
2999	// returns false, and we can bail on out.
3000	InlineResult IR = analyzeBlock(BB, EphValues: *EphValues);
3001	if (!IR.isSuccess())
3002	return IR;
3003
3004	Instruction *TI = BB->getTerminator();
3005
3006	// Add in the live successors by first checking whether we have terminator
3007	// that may be simplified based on the values simplified by this call.
3008	if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI)) {
3009	Value *Cond = BI->getCondition();
3010	if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(V: Cond)) {
3011	BasicBlock *NextBB = BI->getSuccessor(i: SimpleCond->isZero() ? `1` : `0`);
3012	BBWorklist.insert(X: NextBB);
3013	KnownSuccessors [BB] = NextBB;
3014	findDeadBlocks(CurrBB: BB, NextBB);
3015	continue;
3016	}
3017	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
3018	Value *Cond = SI->getCondition();
3019	if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(V: Cond)) {
3020	BasicBlock *NextBB = SI->findCaseValue(C: SimpleCond)->getCaseSuccessor();
3021	BBWorklist.insert(X: NextBB);
3022	KnownSuccessors [BB] = NextBB;
3023	findDeadBlocks(CurrBB: BB, NextBB);
3024	continue;
3025	}
3026	}
3027
3028	// If we're unable to select a particular successor, just count all of
3029	// them.
3030	BBWorklist.insert_range(R: successors(BB));
3031
3032	onBlockAnalyzed(BB);
3033	}
3034
3035	// If this is a noduplicate call, we can still inline as long as
3036	// inlining this would cause the removal of the caller (so the instruction
3037	// is not actually duplicated, just moved).
3038	if (!isSoleCallToLocalFunction(CB: CandidateCall, Callee: F) && ContainsNoDuplicateCall)
3039	return InlineResult::failure(Reason: "noduplicate");
3040
3041	// If the callee's stack size exceeds the user-specified threshold,
3042	// do not let it be inlined.
3043	// The command line option overrides a limit set in the function attributes.
3044	size_t FinalStackSizeThreshold = StackSizeThreshold;
3045	if (!StackSizeThreshold.getNumOccurrences())
3046	if (std::optional<int> AttrMaxStackSize = getStringFnAttrAsInt(
3047	F: Caller, AttrKind: InlineConstants::MaxInlineStackSizeAttributeName))
3048	FinalStackSizeThreshold = *AttrMaxStackSize;
3049	if (AllocatedSize > FinalStackSizeThreshold)
3050	return InlineResult::failure(Reason: "stacksize");
3051
3052	return finalizeAnalysis();
3053	}
3054
3055	void InlineCostCallAnalyzer::print(raw_ostream &OS) {
3056	#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
3057	if (PrintInstructionComments)
3058	F.print(OS, AAW: &Writer);
3059	DEBUG_PRINT_STAT(NumConstantArgs);
3060	DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
3061	DEBUG_PRINT_STAT(NumAllocaArgs);
3062	DEBUG_PRINT_STAT(NumConstantPtrCmps);
3063	DEBUG_PRINT_STAT(NumConstantPtrDiffs);
3064	DEBUG_PRINT_STAT(NumInstructionsSimplified);
3065	DEBUG_PRINT_STAT(NumInstructions);
3066	DEBUG_PRINT_STAT(NumInlineAsmInstructions);
3067	DEBUG_PRINT_STAT(SROACostSavings);
3068	DEBUG_PRINT_STAT(SROACostSavingsLost);
3069	DEBUG_PRINT_STAT(LoadEliminationCost);
3070	DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
3071	DEBUG_PRINT_STAT(Cost);
3072	DEBUG_PRINT_STAT(Threshold);
3073	#undef DEBUG_PRINT_STAT
3074	}
3075
3076	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
3077	/// Dump stats about this call's analysis.
3078	LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(dbgs()); }
3079	#endif
3080
3081	/// Test that there are no attribute conflicts between Caller and Callee
3082	/// that prevent inlining.
3083	static bool functionsHaveCompatibleAttributes(
3084	Function Caller, Function Callee,
3085	function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
3086	// Note that CalleeTLI must be a copy not a reference. The legacy pass manager
3087	// caches the most recently created TLI in the TargetLibraryInfoWrapperPass
3088	// object, and always returns the same object (which is overwritten on each
3089	// GetTLI call). Therefore we copy the first result.
3090	auto CalleeTLI = GetTLI (*Callee);
3091	return GetTLI (*Caller).areInlineCompatible(CalleeTLI,
3092	AllowCallerSuperset: InlineCallerSupersetNoBuiltin) &&
3093	AttributeFuncs::areInlineCompatible(Caller: Caller, Callee: Callee);
3094	}
3095
3096	int llvm::getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call,
3097	const DataLayout &DL) {
3098	int64_t Cost = `0`;
3099	for (unsigned I = `0`, E = Call.arg_size(); I != E; ++I) {
3100	if (Call.isByValArgument(ArgNo: I)) {
3101	// We approximate the number of loads and stores needed by dividing the
3102	// size of the byval type by the target's pointer size.
3103	PointerType *PTy = cast<PointerType>(Val: Call.getArgOperand(i: I)->getType());
3104	unsigned TypeSize = DL.getTypeSizeInBits(Ty: Call.getParamByValType(ArgNo: I));
3105	unsigned AS = PTy->getAddressSpace();
3106	unsigned PointerSize = DL.getPointerSizeInBits(AS);
3107	// Ceiling division.
3108	unsigned NumStores = (TypeSize + PointerSize - `1`) / PointerSize;
3109
3110	// If it generates more than 8 stores it is likely to be expanded as an
3111	// inline memcpy so we take that as an upper bound. Otherwise we assume
3112	// one load and one store per word copied.
3113	// FIXME: The maxStoresPerMemcpy setting from the target should be used
3114	// here instead of a magic number of 8, but it's not available via
3115	// DataLayout.
3116	NumStores = std::min(a: NumStores, b: `8U`);
3117
3118	Cost += `2` * NumStores * InstrCost;
3119	} else {
3120	// For non-byval arguments subtract off one instruction per call
3121	// argument.
3122	Cost += InstrCost;
3123	}
3124	}
3125	// The call instruction also disappears after inlining.
3126	Cost += InstrCost;
3127	Cost += TTI.getInlineCallPenalty(F: Call.getCaller(), Call, DefaultCallPenalty: CallPenalty);
3128
3129	return std::min<int64_t>(a: Cost, INT_MAX);
3130	}
3131
3132	InlineCost llvm::getInlineCost(
3133	CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
3134	function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3135	function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3136	function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3137	ProfileSummaryInfo PSI, OptimizationRemarkEmitter ORE,
3138	function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache) {
3139	return getInlineCost(Call, Callee: Call.getCalledFunction(), Params, CalleeTTI,
3140	GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,
3141	GetEphValuesCache);
3142	}
3143
3144	std::optional<int> llvm::getInliningCostEstimate(
3145	CallBase &Call, TargetTransformInfo &CalleeTTI,
3146	function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3147	function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3148	function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3149	ProfileSummaryInfo PSI, OptimizationRemarkEmitter ORE) {
3150	const InlineParams Params = {/ DefaultThreshold/ `0`,
3151	/HintThreshold/ {},
3152	/OptSizeHintThreshold/ {},
3153	/ColdThreshold/ {},
3154	/OptSizeThreshold/ {},
3155	/OptMinSizeThreshold/ {},
3156	/HotCallSiteThreshold/ {},
3157	/LocallyHotCallSiteThreshold/ {},
3158	/ColdCallSiteThreshold/ {},
3159	/ComputeFullInlineCost/ true,
3160	/EnableDeferral/ true};
3161
3162	InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
3163	GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, true,
3164	/IgnoreThreshold/ true);
3165	auto R = CA.analyze();
3166	if (!R.isSuccess())
3167	return std::nullopt;
3168	return CA.getCost();
3169	}
3170
3171	std::optional<InlineCostFeatures> llvm::getInliningCostFeatures(
3172	CallBase &Call, TargetTransformInfo &CalleeTTI,
3173	function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3174	function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3175	function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3176	ProfileSummaryInfo PSI, OptimizationRemarkEmitter ORE) {
3177	InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,
3178	PSI, ORE, *Call.getCalledFunction(), Call);
3179	auto R = CFA.analyze();
3180	if (!R.isSuccess())
3181	return std::nullopt;
3182	return CFA.features();
3183	}
3184
3185	std::optional<InlineResult> llvm::getAttributeBasedInliningDecision(
3186	CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
3187	function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
3188
3189	// Cannot inline indirect calls.
3190	if (!Callee)
3191	return InlineResult::failure(Reason: "indirect call");
3192
3193	// When callee coroutine function is inlined into caller coroutine function
3194	// before coro-split pass,
3195	// coro-early pass can not handle this quiet well.
3196	// So we won't inline the coroutine function if it have not been unsplited
3197	if (Callee->isPresplitCoroutine())
3198	return InlineResult::failure(Reason: "unsplited coroutine call");
3199
3200	// Inlining into a function with less target features is unsound, so enforce
3201	// this even if alwaysinline is used.
3202	Function *Caller = Call.getCaller();
3203	if (!IgnoreTTIInlineCompatible &&
3204	!CalleeTTI.areInlineCompatible(Caller, Callee))
3205	return InlineResult::failure(Reason: "conflicting target features");
3206
3207	// Calls to functions with always-inline attributes should be inlined
3208	// whenever possible.
3209	if (Call.hasFnAttr(Kind: Attribute::AlwaysInline)) {
3210	if (Call.getAttributes().hasFnAttr(Kind: Attribute::NoInline))
3211	return InlineResult::failure(Reason: "noinline call site attribute");
3212
3213	auto IsViable = isInlineViable(Callee&: *Callee);
3214	if (IsViable.isSuccess())
3215	return InlineResult::success();
3216	return InlineResult::failure(Reason: IsViable.getFailureReason());
3217	}
3218
3219	// Never inline functions with conflicting attributes (unless callee has
3220	// always-inline attribute).
3221	if (!functionsHaveCompatibleAttributes(Caller, Callee, GetTLI))
3222	return InlineResult::failure(Reason: "conflicting attributes");
3223
3224	// Flatten: inline all viable calls from flatten functions regardless of cost.
3225	// Checked before optnone so that flatten takes priority.
3226	if (Caller->hasFnAttribute(Kind: Attribute::Flatten)) {
3227	auto IsViable = isInlineViable(Callee&: *Callee);
3228	if (IsViable.isSuccess())
3229	return InlineResult::success();
3230	return InlineResult::failure(Reason: IsViable.getFailureReason());
3231	}
3232
3233	// Don't inline this call if the caller has the optnone attribute.
3234	if (Caller->hasOptNone())
3235	return InlineResult::failure(Reason: "optnone attribute");
3236
3237	// Don't inline functions which can be interposed at link-time.
3238	if (Callee->isInterposable(/CheckNoIPA=/false))
3239	return InlineResult::failure(Reason: "interposable");
3240
3241	// Don't inline functions marked noinline.
3242	if (Callee->hasFnAttribute(Kind: Attribute::NoInline))
3243	return InlineResult::failure(Reason: "noinline function attribute");
3244
3245	// Don't inline call sites marked noinline.
3246	if (Call.isNoInline())
3247	return InlineResult::failure(Reason: "noinline call site attribute");
3248
3249	// Don't inline functions that are loader replaceable.
3250	if (Callee->hasFnAttribute(Kind: "loader-replaceable"))
3251	return InlineResult::failure(Reason: "loader replaceable function attribute");
3252
3253	return std::nullopt;
3254	}
3255
3256	InlineCost llvm::getInlineCost(
3257	CallBase &Call, Function Callee, const* InlineParams &Params,
3258	TargetTransformInfo &CalleeTTI,
3259	function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3260	function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3261	function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3262	ProfileSummaryInfo PSI, OptimizationRemarkEmitter ORE,
3263	function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache) {
3264
3265	auto UserDecision =
3266	llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
3267
3268	if (UserDecision) {
3269	if (UserDecision ->isSuccess())
3270	return llvm::InlineCost::getAlways(Reason: "always inline attribute");
3271	return llvm::InlineCost::getNever(Reason: UserDecision ->getFailureReason());
3272	}
3273
3274	if (InlineAllViableCalls && isInlineViable(Callee&: *Callee).isSuccess())
3275	return llvm::InlineCost::getAlways(
3276	Reason: "Inlining forced by -inline-all-viable-calls");
3277
3278	LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
3279	<< "... (caller:" << Call.getCaller()->getName()
3280	<< ")\n");
3281
3282	InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3283	GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
3284	/BoostIndirect=/true, /IgnoreThreshold=/false,
3285	GetEphValuesCache);
3286	InlineResult ShouldInline = CA.analyze();
3287
3288	LLVM_DEBUG(CA.dump());
3289
3290	// Always make cost benefit based decision explicit.
3291	// We use always/never here since threshold is not meaningful,
3292	// as it's not what drives cost-benefit analysis.
3293	if (CA.wasDecidedByCostBenefit()) {
3294	if (ShouldInline.isSuccess())
3295	return InlineCost::getAlways(Reason: "benefit over cost",
3296	CostBenefit: CA.getCostBenefitPair());
3297	else
3298	return InlineCost::getNever(Reason: "cost over benefit", CostBenefit: CA.getCostBenefitPair());
3299	}
3300
3301	if (CA.wasDecidedByCostThreshold())
3302	return InlineCost::get(Cost: CA.getCost(), Threshold: CA.getThreshold(),
3303	StaticBonus: CA.getStaticBonusApplied());
3304
3305	// No details on how the decision was made, simply return always or never.
3306	return ShouldInline.isSuccess()
3307	? InlineCost::getAlways(Reason: "empty function")
3308	: InlineCost::getNever(Reason: ShouldInline.getFailureReason());
3309	}
3310
3311	InlineResult llvm::isInlineViable(Function &F) {
3312	bool ReturnsTwice = F.hasFnAttribute(Kind: Attribute::ReturnsTwice);
3313	for (BasicBlock &BB : F) {
3314	// Disallow inlining of functions which contain indirect branches.
3315	if (isa<IndirectBrInst>(Val: BB.getTerminator()))
3316	return InlineResult::failure(Reason: "contains indirect branches");
3317
3318	// Disallow inlining of blockaddresses.
3319	if (BB.hasAddressTaken())
3320	return InlineResult::failure(Reason: "blockaddress used");
3321
3322	for (auto &II : BB) {
3323	CallBase *Call = dyn_cast<CallBase>(Val: &II);
3324	if (!Call)
3325	continue;
3326
3327	// Disallow recursive calls.
3328	Function *Callee = Call->getCalledFunction();
3329	if (&F == Callee)
3330	return InlineResult::failure(Reason: "recursive call");
3331
3332	// Disallow calls which expose returns-twice to a function not previously
3333	// attributed as such.
3334	if (!ReturnsTwice && isa<CallInst>(Val: Call) &&
3335	cast<CallInst>(Val: Call)->canReturnTwice())
3336	return InlineResult::failure(Reason: "exposes returns-twice attribute");
3337
3338	if (Callee)
3339	switch (Callee->getIntrinsicID()) {
3340	default:
3341	break;
3342	case llvm::Intrinsic::icall_branch_funnel:
3343	// Disallow inlining of @llvm.icall.branch.funnel because current
3344	// backend can't separate call targets from call arguments.
3345	return InlineResult::failure(
3346	Reason: "disallowed inlining of @llvm.icall.branch.funnel");
3347	case llvm::Intrinsic::localescape:
3348	// Disallow inlining functions that call @llvm.localescape. Doing this
3349	// correctly would require major changes to the inliner.
3350	return InlineResult::failure(
3351	Reason: "disallowed inlining of @llvm.localescape");
3352	case llvm::Intrinsic::vastart:
3353	// Disallow inlining of functions that initialize VarArgs with
3354	// va_start.
3355	return InlineResult::failure(
3356	Reason: "contains VarArgs initialized with va_start");
3357	}
3358	}
3359	}
3360
3361	return InlineResult::success();
3362	}
3363
3364	// APIs to create InlineParams based on command line flags and/or other
3365	// parameters.
3366
3367	InlineParams llvm::getInlineParams(int Threshold) {
3368	InlineParams Params;
3369
3370	// This field is the threshold to use for a callee by default. This is
3371	// derived from one or more of:
3372	// optimization or size-optimization levels,*
3373	// a value passed to createFunctionInliningPass function, or*
3374	// the -inline-threshold flag.*
3375	// If the -inline-threshold flag is explicitly specified, that is used
3376	// irrespective of anything else.
3377	if (InlineThreshold.getNumOccurrences() > `0`)
3378	Params.DefaultThreshold = InlineThreshold;
3379	else
3380	Params.DefaultThreshold = Threshold;
3381
3382	// Set the HintThreshold knob from the -inlinehint-threshold.
3383	Params.HintThreshold = HintThreshold;
3384	// Use same threshold for optsize by default.
3385	Params.OptSizeHintThreshold = HintThreshold;
3386
3387	// Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
3388	Params.HotCallSiteThreshold = HotCallSiteThreshold;
3389
3390	// If the -locally-hot-callsite-threshold is explicitly specified, use it to
3391	// populate LocallyHotCallSiteThreshold. Later, we populate
3392	// Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
3393	// we know that optimization level is O3 (in the getInlineParams variant that
3394	// takes the opt and size levels).
3395	// FIXME: Remove this check (and make the assignment unconditional) after
3396	// addressing size regression issues at O2.
3397	if (LocallyHotCallSiteThreshold.getNumOccurrences() > `0`)
3398	Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
3399
3400	// Set the ColdCallSiteThreshold knob from the
3401	// -inline-cold-callsite-threshold.
3402	Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
3403
3404	// Set the OptMinSizeThreshold and OptSizeThreshold params only if the
3405	// -inlinehint-threshold commandline option is not explicitly given. If that
3406	// option is present, then its value applies even for callees with size and
3407	// minsize attributes.
3408	// If the -inline-threshold is not specified, set the ColdThreshold from the
3409	// -inlinecold-threshold even if it is not explicitly passed. If
3410	// -inline-threshold is specified, then -inlinecold-threshold needs to be
3411	// explicitly specified to set the ColdThreshold knob
3412	if (InlineThreshold.getNumOccurrences() == `0`) {
3413	Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold;
3414	Params.OptSizeThreshold = InlineConstants::OptSizeThreshold;
3415	Params.ColdThreshold = ColdThreshold;
3416	} else if (ColdThreshold.getNumOccurrences() > `0`) {
3417	Params.ColdThreshold = ColdThreshold;
3418	}
3419	return Params;
3420	}
3421
3422	InlineParams llvm::getInlineParams() {
3423	return getInlineParams(Threshold: DefaultThreshold);
3424	}
3425
3426	InlineParams llvm::getInlineParamsFromOptLevel(unsigned OptLevel) {
3427	auto Params =
3428	getInlineParams(Threshold: OptLevel > `2` ? InlineConstants::OptAggressiveThreshold
3429	: DefaultThreshold);
3430	// At O3, use the value of -locally-hot-callsite-threshold option to populate
3431	// Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
3432	// when it is specified explicitly.
3433	if (OptLevel > `2`)
3434	Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
3435	return Params;
3436	}
3437
3438	PreservedAnalyses
3439	InlineCostAnnotationPrinterPass::run(Function &F,
3440	FunctionAnalysisManager &FAM) {
3441	PrintInstructionComments = true;
3442	std::function<AssumptionCache &(Function &)> GetAssumptionCache =
3443	[&](Function &F) -> AssumptionCache & {
3444	return FAM.getResult<AssumptionAnalysis>(IR&: F);
3445	};
3446
3447	auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
3448	ProfileSummaryInfo *PSI =
3449	MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
3450	const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(IR&: F);
3451
3452	// FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
3453	// In the current implementation, the type of InlineParams doesn't matter as
3454	// the pass serves only for verification of inliner's decisions.
3455	// We can add a flag which determines InlineParams for this run. Right now,
3456	// the default InlineParams are used.
3457	const InlineParams Params = llvm::getInlineParams();
3458	for (BasicBlock &BB : F) {
3459	for (Instruction &I : BB) {
3460	if (auto *CB = dyn_cast<CallBase>(Val: &I)) {
3461	Function *CalledFunction = CB->getCalledFunction();
3462	if (!CalledFunction \|\| CalledFunction->isDeclaration())
3463	continue;
3464	OptimizationRemarkEmitter ORE(CalledFunction);
3465	InlineCostCallAnalyzer ICCA(CalledFunction, CB, Params, TTI,
3466	GetAssumptionCache, nullptr, nullptr, PSI,
3467	&ORE);
3468	ICCA.analyze();
3469	OS << " Analyzing call of " << CalledFunction->getName()
3470	<< "... (caller:" << CB->getCaller()->getName() << ")\n";
3471	ICCA.print(OS);
3472	OS << "\n";
3473	}
3474	}
3475	}
3476	return PreservedAnalyses::all();
3477	}
3478

Browse the source code of llvm_projects/llvm/lib/Analysis/InlineCost.cpp