1//===- ModuleInliner.cpp - Code related to module inliner -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the mechanics required to implement inlining without
10// missing any calls in the module level. It doesn't need any infromation about
11// SCC or call graph, which is different from the SCC inliner. The decisions of
12// which calls are profitable to inline are implemented elsewhere.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/Transforms/IPO/ModuleInliner.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/Analysis/AliasAnalysis.h"
21#include "llvm/Analysis/AssumptionCache.h"
22#include "llvm/Analysis/BlockFrequencyInfo.h"
23#include "llvm/Analysis/CtxProfAnalysis.h"
24#include "llvm/Analysis/InlineAdvisor.h"
25#include "llvm/Analysis/InlineCost.h"
26#include "llvm/Analysis/InlineOrder.h"
27#include "llvm/Analysis/OptimizationRemarkEmitter.h"
28#include "llvm/Analysis/ProfileSummaryInfo.h"
29#include "llvm/Analysis/ReplayInlineAdvisor.h"
30#include "llvm/Analysis/TargetLibraryInfo.h"
31#include "llvm/IR/DiagnosticInfo.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/InstIterator.h"
34#include "llvm/IR/Instruction.h"
35#include "llvm/IR/IntrinsicInst.h"
36#include "llvm/IR/Module.h"
37#include "llvm/IR/PassManager.h"
38#include "llvm/Support/CommandLine.h"
39#include "llvm/Support/Debug.h"
40#include "llvm/Support/raw_ostream.h"
41#include "llvm/Transforms/Utils/CallPromotionUtils.h"
42#include "llvm/Transforms/Utils/Cloning.h"
43#include <cassert>
44
45using namespace llvm;
46
47#define DEBUG_TYPE "module-inline"
48
49STATISTIC(NumInlined, "Number of functions inlined");
50STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
51
52static cl::opt<bool> CtxProfPromoteAlwaysInline(
53 "ctx-prof-promote-alwaysinline", cl::init(Val: false), cl::Hidden,
54 cl::desc("If using a contextual profile in this module, and an indirect "
55 "call target is marked as alwaysinline, perform indirect call "
56 "promotion for that target. If multiple targets for an indirect "
57 "call site fit this description, they are all promoted."));
58
59InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM,
60 FunctionAnalysisManager &FAM,
61 Module &M) {
62 if (OwnedAdvisor)
63 return *OwnedAdvisor;
64
65 auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(IR&: M);
66 if (!IAA) {
67 // It should still be possible to run the inliner as a stand-alone module
68 // pass, for test scenarios. In that case, we default to the
69 // DefaultInlineAdvisor, which doesn't need to keep state between module
70 // pass runs. It also uses just the default InlineParams. In this case, we
71 // need to use the provided FAM, which is valid for the duration of the
72 // inliner pass, and thus the lifetime of the owned advisor. The one we
73 // would get from the MAM can be invalidated as a result of the inliner's
74 // activity.
75 OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(
76 args&: M, args&: FAM, args: Params, args: InlineContext{.LTOPhase: LTOPhase, .Pass: InlinePass::ModuleInliner});
77
78 return *OwnedAdvisor;
79 }
80 assert(IAA->getAdvisor() &&
81 "Expected a present InlineAdvisorAnalysis also have an "
82 "InlineAdvisor initialized");
83 return *IAA->getAdvisor();
84}
85
86static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) {
87 LibFunc LF;
88
89 // Either this is a normal library function or a "vectorizable"
90 // function. Not using the VFDatabase here because this query
91 // is related only to libraries handled via the TLI.
92 return TLI.getLibFunc(FDecl: F, F&: LF) ||
93 TLI.isKnownVectorFunctionInLibrary(F: F.getName());
94}
95
96PreservedAnalyses ModuleInlinerPass::run(Module &M,
97 ModuleAnalysisManager &MAM) {
98 LLVM_DEBUG(dbgs() << "---- Module Inliner is Running ---- \n");
99
100 auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(IR&: M);
101 if (!IAA.tryCreate(Params, Mode, ReplaySettings: {},
102 IC: InlineContext{.LTOPhase: LTOPhase, .Pass: InlinePass::ModuleInliner})) {
103 M.getContext().emitError(
104 ErrorStr: "Could not setup Inlining Advisor for the requested "
105 "mode and/or options");
106 return PreservedAnalyses::all();
107 }
108
109 auto &CtxProf = MAM.getResult<CtxProfAnalysis>(IR&: M);
110
111 bool Changed = false;
112
113 ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(IR&: M);
114
115 FunctionAnalysisManager &FAM =
116 MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
117
118 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
119 return FAM.getResult<TargetLibraryAnalysis>(IR&: F);
120 };
121
122 InlineAdvisor &Advisor = getAdvisor(MAM, FAM, M);
123 Advisor.onPassEntry();
124
125 llvm::scope_exit AdvisorOnExit([&] { Advisor.onPassExit(); });
126
127 // In the module inliner, a priority-based worklist is used for calls across
128 // the entire Module. With this module inliner, the inline order is not
129 // limited to bottom-up order. More globally scope inline order is enabled.
130 // Also, the inline deferral logic become unnecessary in this module inliner.
131 // It is possible to use other priority heuristics, e.g. profile-based
132 // heuristic.
133 //
134 // TODO: Here is a huge amount duplicate code between the module inliner and
135 // the SCC inliner, which need some refactoring.
136 auto Calls = getInlineOrder(FAM, Params, MAM, M);
137 assert(Calls != nullptr && "Expected an initialized InlineOrder");
138
139 // Populate the initial list of calls in this module.
140 SetVector<std::pair<CallBase *, Function *>> ICPCandidates;
141 for (Function &F : M) {
142 if (F.isDeclaration())
143 continue;
144 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
145 for (Instruction &I : instructions(F)) {
146 if (auto *CB = dyn_cast<CallBase>(Val: &I)) {
147 if (Function *Callee = CB->getCalledFunction()) {
148 if (!Callee->isDeclaration())
149 Calls->push(Elt: CB);
150 else if (!isa<IntrinsicInst>(Val: I)) {
151 using namespace ore;
152 setInlineRemark(CB&: *CB, Message: "unavailable definition");
153 ORE.emit(RemarkBuilder: [&]() {
154 return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
155 << NV("Callee", Callee) << " will not be inlined into "
156 << NV("Caller", CB->getCaller())
157 << " because its definition is unavailable"
158 << setIsVerbose();
159 });
160 }
161 } else if (CtxProfPromoteAlwaysInline &&
162 CtxProf.isInSpecializedModule() && CB->isIndirectCall()) {
163 CtxProfAnalysis::collectIndirectCallPromotionList(IC&: *CB, Profile&: CtxProf,
164 Candidates&: ICPCandidates);
165 }
166 }
167 }
168 }
169 for (auto &[CB, Target] : ICPCandidates) {
170 if (auto *DirectCB = promoteCallWithIfThenElse(CB&: *CB, Callee&: *Target, CtxProf))
171 Calls->push(Elt: DirectCB);
172 }
173 if (Calls->empty())
174 return PreservedAnalyses::all();
175
176 // Track the dead functions to delete once finished with inlining calls. We
177 // defer deleting these to make it easier to handle the call graph updates.
178 SmallVector<Function *, 4> DeadFunctions;
179
180 // Loop forward over all of the calls.
181 while (!Calls->empty()) {
182 CallBase *CB = Calls->pop();
183 Function &F = *CB->getCaller();
184 Function &Callee = *CB->getCalledFunction();
185
186 LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"
187 << " Function size: " << F.getInstructionCount()
188 << "\n");
189 (void)F;
190
191 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
192 return FAM.getResult<AssumptionAnalysis>(IR&: F);
193 };
194
195 auto Advice = Advisor.getAdvice(CB&: *CB, /*OnlyMandatory*/ MandatoryOnly: false);
196 // Check whether we want to inline this callsite.
197 if (!Advice->isInliningRecommended()) {
198 Advice->recordUnattemptedInlining();
199 continue;
200 }
201
202 // Setup the data structure used to plumb customization into the
203 // `InlineFunction` routine.
204 InlineFunctionInfo IFI(
205 GetAssumptionCache, PSI,
206 &FAM.getResult<BlockFrequencyAnalysis>(IR&: *(CB->getCaller())),
207 &FAM.getResult<BlockFrequencyAnalysis>(IR&: Callee));
208
209 InlineResult IR =
210 InlineFunction(CB&: *CB, IFI, CtxProf, /*MergeAttributes=*/true,
211 CalleeAAR: &FAM.getResult<AAManager>(IR&: *CB->getCaller()),
212 /*InsertLifetime=*/true,
213 /*TrackInlineHistory=*/true);
214 if (!IR.isSuccess()) {
215 Advice->recordUnsuccessfulInlining(Result: IR);
216 continue;
217 }
218
219 Changed = true;
220 ++NumInlined;
221
222 LLVM_DEBUG(dbgs() << " Size after inlining: " << F.getInstructionCount()
223 << "\n");
224
225 // Add any new callsites to defined functions to the worklist.
226 if (!IFI.InlinedCallSites.empty()) {
227 for (CallBase *ICB : reverse(C&: IFI.InlinedCallSites)) {
228 Function *NewCallee = ICB->getCalledFunction();
229 if (!NewCallee) {
230 // Try to promote an indirect (virtual) call without waiting for
231 // the post-inline cleanup and the next DevirtSCCRepeatedPass
232 // iteration because the next iteration may not happen and we may
233 // miss inlining it.
234 // FIXME: enable for ctxprof.
235 if (CtxProf.isInSpecializedModule())
236 if (tryPromoteCall(CB&: *ICB))
237 NewCallee = ICB->getCalledFunction();
238 }
239 if (NewCallee)
240 if (!NewCallee->isDeclaration())
241 Calls->push(Elt: ICB);
242 }
243 }
244
245 // For local functions, check whether this makes the callee trivially
246 // dead. In that case, we can drop the body of the function eagerly
247 // which may reduce the number of callers of other functions to one,
248 // changing inline cost thresholds.
249 bool CalleeWasDeleted = false;
250 if (Callee.hasLocalLinkage()) {
251 // To check this we also need to nuke any dead constant uses (perhaps
252 // made dead by this operation on other functions).
253 Callee.removeDeadConstantUsers();
254 // if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
255 if (Callee.use_empty() && !isKnownLibFunction(F&: Callee, TLI&: GetTLI(Callee))) {
256 Calls->erase_if(
257 Pred: [&](const CallBase *CB) { return CB->getCaller() == &Callee; });
258
259 // Report inlining decision BEFORE deleting function contents, so we
260 // can still access e.g. the DebugLoc
261 Advice->recordInliningWithCalleeDeleted();
262 // Clear the body and queue the function itself for deletion when we
263 // finish inlining.
264 // Note that after this point, it is an error to do anything other
265 // than use the callee's address or delete it.
266 Callee.dropAllReferences();
267 assert(!is_contained(DeadFunctions, &Callee) &&
268 "Cannot put cause a function to become dead twice!");
269 DeadFunctions.push_back(Elt: &Callee);
270 CalleeWasDeleted = true;
271 }
272 }
273 if (!CalleeWasDeleted)
274 Advice->recordInlining();
275 }
276
277 // Now that we've finished inlining all of the calls across this module,
278 // delete all of the trivially dead functions.
279 //
280 // Note that this walks a pointer set which has non-deterministic order but
281 // that is OK as all we do is delete things and add pointers to unordered
282 // sets.
283 for (Function *DeadF : DeadFunctions) {
284 // Clear out any cached analyses.
285 FAM.clear(IR&: *DeadF, Name: DeadF->getName());
286
287 // And delete the actual function from the module.
288 M.getFunctionList().erase(IT: DeadF);
289
290 ++NumDeleted;
291 }
292
293 if (!Changed)
294 return PreservedAnalyses::all();
295
296 return PreservedAnalyses::none();
297}
298