1 | //===- CallGraph.h - Build a Module's call graph ----------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// This file provides interfaces used to build and manipulate a call graph, |
11 | /// which is a very useful tool for interprocedural optimization. |
12 | /// |
13 | /// Every function in a module is represented as a node in the call graph. The |
14 | /// callgraph node keeps track of which functions are called by the function |
15 | /// corresponding to the node. |
16 | /// |
17 | /// A call graph may contain nodes where the function that they correspond to |
18 | /// is null. These 'external' nodes are used to represent control flow that is |
19 | /// not represented (or analyzable) in the module. In particular, this |
20 | /// analysis builds one external node such that: |
21 | /// 1. All functions in the module without internal linkage will have edges |
22 | /// from this external node, indicating that they could be called by |
23 | /// functions outside of the module. |
24 | /// 2. All functions whose address is used for something more than a direct |
25 | /// call, for example being stored into a memory location will also have |
26 | /// an edge from this external node. Since they may be called by an |
27 | /// unknown caller later, they must be tracked as such. |
28 | /// |
29 | /// There is a second external node added for calls that leave this module. |
30 | /// Functions have a call edge to the external node iff: |
31 | /// 1. The function is external, reflecting the fact that they could call |
32 | /// anything without internal linkage or that has its address taken. |
33 | /// 2. The function contains an indirect function call. |
34 | /// |
35 | /// As an extension in the future, there may be multiple nodes with a null |
36 | /// function. These will be used when we can prove (through pointer analysis) |
37 | /// that an indirect call site can call only a specific set of functions. |
38 | /// |
39 | /// Because of these properties, the CallGraph captures a conservative superset |
40 | /// of all of the caller-callee relationships, which is useful for |
41 | /// transformations. |
42 | /// |
43 | //===----------------------------------------------------------------------===// |
44 | |
45 | #ifndef LLVM_ANALYSIS_CALLGRAPH_H |
46 | #define LLVM_ANALYSIS_CALLGRAPH_H |
47 | |
48 | #include "llvm/IR/InstrTypes.h" |
49 | #include "llvm/IR/PassManager.h" |
50 | #include "llvm/IR/ValueHandle.h" |
51 | #include "llvm/Pass.h" |
52 | #include "llvm/Support/Compiler.h" |
53 | #include <cassert> |
54 | #include <map> |
55 | #include <memory> |
56 | #include <utility> |
57 | #include <vector> |
58 | |
59 | namespace llvm { |
60 | |
61 | template <class GraphType> struct GraphTraits; |
62 | class CallGraphNode; |
63 | class Function; |
64 | class Module; |
65 | class raw_ostream; |
66 | |
67 | /// The basic data container for the call graph of a \c Module of IR. |
68 | /// |
69 | /// This class exposes both the interface to the call graph for a module of IR. |
70 | /// |
71 | /// The core call graph itself can also be updated to reflect changes to the IR. |
72 | class CallGraph { |
73 | Module &M; |
74 | |
75 | using FunctionMapTy = |
76 | std::map<const Function *, std::unique_ptr<CallGraphNode>>; |
77 | |
78 | /// A map from \c Function* to \c CallGraphNode*. |
79 | FunctionMapTy FunctionMap; |
80 | |
81 | /// This node has edges to all external functions and those internal |
82 | /// functions that have their address taken. |
83 | CallGraphNode *ExternalCallingNode; |
84 | |
85 | /// This node has edges to it from all functions making indirect calls |
86 | /// or calling an external function. |
87 | std::unique_ptr<CallGraphNode> CallsExternalNode; |
88 | |
89 | public: |
90 | LLVM_ABI explicit CallGraph(Module &M); |
91 | LLVM_ABI CallGraph(CallGraph &&Arg); |
92 | LLVM_ABI ~CallGraph(); |
93 | |
94 | LLVM_ABI void print(raw_ostream &OS) const; |
95 | LLVM_ABI void dump() const; |
96 | |
97 | using iterator = FunctionMapTy::iterator; |
98 | using const_iterator = FunctionMapTy::const_iterator; |
99 | |
100 | /// Returns the module the call graph corresponds to. |
101 | Module &getModule() const { return M; } |
102 | |
103 | LLVM_ABI bool invalidate(Module &, const PreservedAnalyses &PA, |
104 | ModuleAnalysisManager::Invalidator &); |
105 | |
106 | inline iterator begin() { return FunctionMap.begin(); } |
107 | inline iterator end() { return FunctionMap.end(); } |
108 | inline const_iterator begin() const { return FunctionMap.begin(); } |
109 | inline const_iterator end() const { return FunctionMap.end(); } |
110 | |
111 | /// Returns the call graph node for the provided function. |
112 | inline const CallGraphNode *operator[](const Function *F) const { |
113 | const_iterator I = FunctionMap.find(x: F); |
114 | assert(I != FunctionMap.end() && "Function not in callgraph!" ); |
115 | return I->second.get(); |
116 | } |
117 | |
118 | /// Returns the call graph node for the provided function. |
119 | inline CallGraphNode *operator[](const Function *F) { |
120 | const_iterator I = FunctionMap.find(x: F); |
121 | assert(I != FunctionMap.end() && "Function not in callgraph!" ); |
122 | return I->second.get(); |
123 | } |
124 | |
125 | /// Returns the \c CallGraphNode which is used to represent |
126 | /// undetermined calls into the callgraph. |
127 | CallGraphNode *getExternalCallingNode() const { return ExternalCallingNode; } |
128 | |
129 | CallGraphNode *getCallsExternalNode() const { |
130 | return CallsExternalNode.get(); |
131 | } |
132 | |
133 | //===--------------------------------------------------------------------- |
134 | // Functions to keep a call graph up to date with a function that has been |
135 | // modified. |
136 | // |
137 | |
138 | /// Unlink the function from this module, returning it. |
139 | /// |
140 | /// Because this removes the function from the module, the call graph node is |
141 | /// destroyed. This is only valid if the function does not call any other |
142 | /// functions (ie, there are no edges in it's CGN). The easiest way to do |
143 | /// this is to dropAllReferences before calling this. |
144 | LLVM_ABI Function *removeFunctionFromModule(CallGraphNode *CGN); |
145 | |
146 | /// Similar to operator[], but this will insert a new CallGraphNode for |
147 | /// \c F if one does not already exist. |
148 | LLVM_ABI CallGraphNode *getOrInsertFunction(const Function *F); |
149 | |
150 | /// Populate \p CGN based on the calls inside the associated function. |
151 | LLVM_ABI void populateCallGraphNode(CallGraphNode *CGN); |
152 | |
153 | /// Add a function to the call graph, and link the node to all of the |
154 | /// functions that it calls. |
155 | LLVM_ABI void addToCallGraph(Function *F); |
156 | }; |
157 | |
158 | /// A node in the call graph for a module. |
159 | /// |
160 | /// Typically represents a function in the call graph. There are also special |
161 | /// "null" nodes used to represent theoretical entries in the call graph. |
162 | class CallGraphNode { |
163 | public: |
164 | /// A pair of the calling instruction (a call or invoke) |
165 | /// and the call graph node being called. |
166 | /// Call graph node may have two types of call records which represent an edge |
167 | /// in the call graph - reference or a call edge. Reference edges are not |
168 | /// associated with any call instruction and are created with the first field |
169 | /// set to `None`, while real call edges have instruction address in this |
170 | /// field. Therefore, all real call edges are expected to have a value in the |
171 | /// first field and it is not supposed to be `nullptr`. |
172 | /// Reference edges, for example, are used for connecting broker function |
173 | /// caller to the callback function for callback call sites. |
174 | using CallRecord = std::pair<std::optional<WeakTrackingVH>, CallGraphNode *>; |
175 | |
176 | public: |
177 | using CalledFunctionsVector = std::vector<CallRecord>; |
178 | |
179 | /// Creates a node for the specified function. |
180 | inline CallGraphNode(CallGraph *CG, Function *F) : CG(CG), F(F) {} |
181 | |
182 | CallGraphNode(const CallGraphNode &) = delete; |
183 | CallGraphNode &operator=(const CallGraphNode &) = delete; |
184 | |
185 | ~CallGraphNode() { |
186 | assert(NumReferences == 0 && "Node deleted while references remain" ); |
187 | } |
188 | |
189 | using iterator = std::vector<CallRecord>::iterator; |
190 | using const_iterator = std::vector<CallRecord>::const_iterator; |
191 | |
192 | /// Returns the function that this call graph node represents. |
193 | Function *getFunction() const { return F; } |
194 | |
195 | inline iterator begin() { return CalledFunctions.begin(); } |
196 | inline iterator end() { return CalledFunctions.end(); } |
197 | inline const_iterator begin() const { return CalledFunctions.begin(); } |
198 | inline const_iterator end() const { return CalledFunctions.end(); } |
199 | inline bool empty() const { return CalledFunctions.empty(); } |
200 | inline unsigned size() const { return (unsigned)CalledFunctions.size(); } |
201 | |
202 | /// Returns the number of other CallGraphNodes in this CallGraph that |
203 | /// reference this node in their callee list. |
204 | unsigned getNumReferences() const { return NumReferences; } |
205 | |
206 | /// Returns the i'th called function. |
207 | CallGraphNode *operator[](unsigned i) const { |
208 | assert(i < CalledFunctions.size() && "Invalid index" ); |
209 | return CalledFunctions[i].second; |
210 | } |
211 | |
212 | /// Print out this call graph node. |
213 | LLVM_ABI void dump() const; |
214 | LLVM_ABI void print(raw_ostream &OS) const; |
215 | |
216 | //===--------------------------------------------------------------------- |
217 | // Methods to keep a call graph up to date with a function that has been |
218 | // modified |
219 | // |
220 | |
221 | /// Removes all edges from this CallGraphNode to any functions it |
222 | /// calls. |
223 | void removeAllCalledFunctions() { |
224 | while (!CalledFunctions.empty()) { |
225 | CalledFunctions.back().second->DropRef(); |
226 | CalledFunctions.pop_back(); |
227 | } |
228 | } |
229 | |
230 | /// Moves all the callee information from N to this node. |
231 | void stealCalledFunctionsFrom(CallGraphNode *N) { |
232 | assert(CalledFunctions.empty() && |
233 | "Cannot steal callsite information if I already have some" ); |
234 | std::swap(x&: CalledFunctions, y&: N->CalledFunctions); |
235 | } |
236 | |
237 | /// Adds a function to the list of functions called by this one. |
238 | void addCalledFunction(CallBase *Call, CallGraphNode *M) { |
239 | CalledFunctions.emplace_back(args: Call ? std::optional<WeakTrackingVH>(Call) |
240 | : std::optional<WeakTrackingVH>(), |
241 | args&: M); |
242 | M->AddRef(); |
243 | } |
244 | |
245 | void removeCallEdge(iterator I) { |
246 | I->second->DropRef(); |
247 | *I = CalledFunctions.back(); |
248 | CalledFunctions.pop_back(); |
249 | } |
250 | |
251 | /// Removes one edge associated with a null callsite from this node to |
252 | /// the specified callee function. |
253 | LLVM_ABI void removeOneAbstractEdgeTo(CallGraphNode *Callee); |
254 | |
255 | /// Replaces the edge in the node for the specified call site with a |
256 | /// new one. |
257 | /// |
258 | /// Note that this method takes linear time, so it should be used sparingly. |
259 | LLVM_ABI void replaceCallEdge(CallBase &Call, CallBase &NewCall, |
260 | CallGraphNode *NewNode); |
261 | |
262 | private: |
263 | friend class CallGraph; |
264 | |
265 | CallGraph *CG; |
266 | Function *F; |
267 | |
268 | std::vector<CallRecord> CalledFunctions; |
269 | |
270 | /// The number of times that this CallGraphNode occurs in the |
271 | /// CalledFunctions array of this or other CallGraphNodes. |
272 | unsigned NumReferences = 0; |
273 | |
274 | void DropRef() { --NumReferences; } |
275 | void AddRef() { ++NumReferences; } |
276 | |
277 | /// A special function that should only be used by the CallGraph class. |
278 | void allReferencesDropped() { NumReferences = 0; } |
279 | }; |
280 | |
281 | /// An analysis pass to compute the \c CallGraph for a \c Module. |
282 | /// |
283 | /// This class implements the concept of an analysis pass used by the \c |
284 | /// ModuleAnalysisManager to run an analysis over a module and cache the |
285 | /// resulting data. |
286 | class CallGraphAnalysis : public AnalysisInfoMixin<CallGraphAnalysis> { |
287 | friend AnalysisInfoMixin<CallGraphAnalysis>; |
288 | |
289 | LLVM_ABI static AnalysisKey Key; |
290 | |
291 | public: |
292 | /// A formulaic type to inform clients of the result type. |
293 | using Result = CallGraph; |
294 | |
295 | /// Compute the \c CallGraph for the module \c M. |
296 | /// |
297 | /// The real work here is done in the \c CallGraph constructor. |
298 | CallGraph run(Module &M, ModuleAnalysisManager &) { return CallGraph(M); } |
299 | }; |
300 | |
301 | /// Printer pass for the \c CallGraphAnalysis results. |
302 | class CallGraphPrinterPass : public PassInfoMixin<CallGraphPrinterPass> { |
303 | raw_ostream &OS; |
304 | |
305 | public: |
306 | explicit CallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} |
307 | |
308 | LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
309 | |
310 | static bool isRequired() { return true; } |
311 | }; |
312 | |
313 | /// Printer pass for the summarized \c CallGraphAnalysis results. |
314 | class CallGraphSCCsPrinterPass |
315 | : public PassInfoMixin<CallGraphSCCsPrinterPass> { |
316 | raw_ostream &OS; |
317 | |
318 | public: |
319 | explicit CallGraphSCCsPrinterPass(raw_ostream &OS) : OS(OS) {} |
320 | |
321 | LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
322 | |
323 | static bool isRequired() { return true; } |
324 | }; |
325 | |
326 | /// The \c ModulePass which wraps up a \c CallGraph and the logic to |
327 | /// build it. |
328 | /// |
329 | /// This class exposes both the interface to the call graph container and the |
330 | /// module pass which runs over a module of IR and produces the call graph. The |
331 | /// call graph interface is entirelly a wrapper around a \c CallGraph object |
332 | /// which is stored internally for each module. |
333 | class LLVM_ABI CallGraphWrapperPass : public ModulePass { |
334 | std::unique_ptr<CallGraph> G; |
335 | |
336 | public: |
337 | static char ID; // Class identification, replacement for typeinfo |
338 | |
339 | CallGraphWrapperPass(); |
340 | ~CallGraphWrapperPass() override; |
341 | |
342 | /// The internal \c CallGraph around which the rest of this interface |
343 | /// is wrapped. |
344 | const CallGraph &getCallGraph() const { return *G; } |
345 | CallGraph &getCallGraph() { return *G; } |
346 | |
347 | using iterator = CallGraph::iterator; |
348 | using const_iterator = CallGraph::const_iterator; |
349 | |
350 | /// Returns the module the call graph corresponds to. |
351 | Module &getModule() const { return G->getModule(); } |
352 | |
353 | inline iterator begin() { return G->begin(); } |
354 | inline iterator end() { return G->end(); } |
355 | inline const_iterator begin() const { return G->begin(); } |
356 | inline const_iterator end() const { return G->end(); } |
357 | |
358 | /// Returns the call graph node for the provided function. |
359 | inline const CallGraphNode *operator[](const Function *F) const { |
360 | return (*G)[F]; |
361 | } |
362 | |
363 | /// Returns the call graph node for the provided function. |
364 | inline CallGraphNode *operator[](const Function *F) { return (*G)[F]; } |
365 | |
366 | /// Returns the \c CallGraphNode which is used to represent |
367 | /// undetermined calls into the callgraph. |
368 | CallGraphNode *getExternalCallingNode() const { |
369 | return G->getExternalCallingNode(); |
370 | } |
371 | |
372 | CallGraphNode *getCallsExternalNode() const { |
373 | return G->getCallsExternalNode(); |
374 | } |
375 | |
376 | //===--------------------------------------------------------------------- |
377 | // Functions to keep a call graph up to date with a function that has been |
378 | // modified. |
379 | // |
380 | |
381 | /// Unlink the function from this module, returning it. |
382 | /// |
383 | /// Because this removes the function from the module, the call graph node is |
384 | /// destroyed. This is only valid if the function does not call any other |
385 | /// functions (ie, there are no edges in it's CGN). The easiest way to do |
386 | /// this is to dropAllReferences before calling this. |
387 | Function *removeFunctionFromModule(CallGraphNode *CGN) { |
388 | return G->removeFunctionFromModule(CGN); |
389 | } |
390 | |
391 | /// Similar to operator[], but this will insert a new CallGraphNode for |
392 | /// \c F if one does not already exist. |
393 | CallGraphNode *getOrInsertFunction(const Function *F) { |
394 | return G->getOrInsertFunction(F); |
395 | } |
396 | |
397 | //===--------------------------------------------------------------------- |
398 | // Implementation of the ModulePass interface needed here. |
399 | // |
400 | |
401 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
402 | bool runOnModule(Module &M) override; |
403 | void releaseMemory() override; |
404 | |
405 | void print(raw_ostream &o, const Module *) const override; |
406 | void dump() const; |
407 | }; |
408 | |
409 | //===----------------------------------------------------------------------===// |
410 | // GraphTraits specializations for call graphs so that they can be treated as |
411 | // graphs by the generic graph algorithms. |
412 | // |
413 | |
414 | // Provide graph traits for traversing call graphs using standard graph |
415 | // traversals. |
416 | template <> struct GraphTraits<CallGraphNode *> { |
417 | using NodeRef = CallGraphNode *; |
418 | using CGNPairTy = CallGraphNode::CallRecord; |
419 | |
420 | static NodeRef getEntryNode(CallGraphNode *CGN) { return CGN; } |
421 | static CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; } |
422 | |
423 | using ChildIteratorType = |
424 | mapped_iterator<CallGraphNode::iterator, decltype(&CGNGetValue)>; |
425 | |
426 | static ChildIteratorType child_begin(NodeRef N) { |
427 | return ChildIteratorType(N->begin(), &CGNGetValue); |
428 | } |
429 | |
430 | static ChildIteratorType child_end(NodeRef N) { |
431 | return ChildIteratorType(N->end(), &CGNGetValue); |
432 | } |
433 | }; |
434 | |
435 | template <> struct GraphTraits<const CallGraphNode *> { |
436 | using NodeRef = const CallGraphNode *; |
437 | using CGNPairTy = CallGraphNode::CallRecord; |
438 | using EdgeRef = const CallGraphNode::CallRecord &; |
439 | |
440 | static NodeRef getEntryNode(const CallGraphNode *CGN) { return CGN; } |
441 | static const CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; } |
442 | |
443 | using ChildIteratorType = |
444 | mapped_iterator<CallGraphNode::const_iterator, decltype(&CGNGetValue)>; |
445 | using ChildEdgeIteratorType = CallGraphNode::const_iterator; |
446 | |
447 | static ChildIteratorType child_begin(NodeRef N) { |
448 | return ChildIteratorType(N->begin(), &CGNGetValue); |
449 | } |
450 | |
451 | static ChildIteratorType child_end(NodeRef N) { |
452 | return ChildIteratorType(N->end(), &CGNGetValue); |
453 | } |
454 | |
455 | static ChildEdgeIteratorType child_edge_begin(NodeRef N) { |
456 | return N->begin(); |
457 | } |
458 | static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } |
459 | |
460 | static NodeRef edge_dest(EdgeRef E) { return E.second; } |
461 | }; |
462 | |
463 | template <> |
464 | struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> { |
465 | using PairTy = |
466 | std::pair<const Function *const, std::unique_ptr<CallGraphNode>>; |
467 | |
468 | static NodeRef getEntryNode(CallGraph *CGN) { |
469 | return CGN->getExternalCallingNode(); // Start at the external node! |
470 | } |
471 | |
472 | static CallGraphNode *CGGetValuePtr(const PairTy &P) { |
473 | return P.second.get(); |
474 | } |
475 | |
476 | // nodes_iterator/begin/end - Allow iteration over all nodes in the graph |
477 | using nodes_iterator = |
478 | mapped_iterator<CallGraph::iterator, decltype(&CGGetValuePtr)>; |
479 | |
480 | static nodes_iterator nodes_begin(CallGraph *CG) { |
481 | return nodes_iterator(CG->begin(), &CGGetValuePtr); |
482 | } |
483 | |
484 | static nodes_iterator nodes_end(CallGraph *CG) { |
485 | return nodes_iterator(CG->end(), &CGGetValuePtr); |
486 | } |
487 | }; |
488 | |
489 | template <> |
490 | struct GraphTraits<const CallGraph *> : public GraphTraits< |
491 | const CallGraphNode *> { |
492 | using PairTy = |
493 | std::pair<const Function *const, std::unique_ptr<CallGraphNode>>; |
494 | |
495 | static NodeRef getEntryNode(const CallGraph *CGN) { |
496 | return CGN->getExternalCallingNode(); // Start at the external node! |
497 | } |
498 | |
499 | static const CallGraphNode *CGGetValuePtr(const PairTy &P) { |
500 | return P.second.get(); |
501 | } |
502 | |
503 | // nodes_iterator/begin/end - Allow iteration over all nodes in the graph |
504 | using nodes_iterator = |
505 | mapped_iterator<CallGraph::const_iterator, decltype(&CGGetValuePtr)>; |
506 | |
507 | static nodes_iterator nodes_begin(const CallGraph *CG) { |
508 | return nodes_iterator(CG->begin(), &CGGetValuePtr); |
509 | } |
510 | |
511 | static nodes_iterator nodes_end(const CallGraph *CG) { |
512 | return nodes_iterator(CG->end(), &CGGetValuePtr); |
513 | } |
514 | }; |
515 | |
516 | } // end namespace llvm |
517 | |
518 | #endif // LLVM_ANALYSIS_CALLGRAPH_H |
519 | |