1 | //===- CallGraph.h - Build a Module's call graph ----------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// This file provides interfaces used to build and manipulate a call graph, |
11 | /// which is a very useful tool for interprocedural optimization. |
12 | /// |
13 | /// Every function in a module is represented as a node in the call graph. The |
14 | /// callgraph node keeps track of which functions are called by the function |
15 | /// corresponding to the node. |
16 | /// |
17 | /// A call graph may contain nodes where the function that they correspond to |
18 | /// is null. These 'external' nodes are used to represent control flow that is |
19 | /// not represented (or analyzable) in the module. In particular, this |
20 | /// analysis builds one external node such that: |
21 | /// 1. All functions in the module without internal linkage will have edges |
22 | /// from this external node, indicating that they could be called by |
23 | /// functions outside of the module. |
24 | /// 2. All functions whose address is used for something more than a direct |
25 | /// call, for example being stored into a memory location will also have |
26 | /// an edge from this external node. Since they may be called by an |
27 | /// unknown caller later, they must be tracked as such. |
28 | /// |
29 | /// There is a second external node added for calls that leave this module. |
30 | /// Functions have a call edge to the external node iff: |
31 | /// 1. The function is external, reflecting the fact that they could call |
32 | /// anything without internal linkage or that has its address taken. |
33 | /// 2. The function contains an indirect function call. |
34 | /// |
35 | /// As an extension in the future, there may be multiple nodes with a null |
36 | /// function. These will be used when we can prove (through pointer analysis) |
37 | /// that an indirect call site can call only a specific set of functions. |
38 | /// |
39 | /// Because of these properties, the CallGraph captures a conservative superset |
40 | /// of all of the caller-callee relationships, which is useful for |
41 | /// transformations. |
42 | /// |
43 | //===----------------------------------------------------------------------===// |
44 | |
45 | #ifndef LLVM_ANALYSIS_CALLGRAPH_H |
46 | #define LLVM_ANALYSIS_CALLGRAPH_H |
47 | |
48 | #include "llvm/IR/InstrTypes.h" |
49 | #include "llvm/IR/Intrinsics.h" |
50 | #include "llvm/IR/PassManager.h" |
51 | #include "llvm/IR/ValueHandle.h" |
52 | #include "llvm/Pass.h" |
53 | #include <cassert> |
54 | #include <map> |
55 | #include <memory> |
56 | #include <utility> |
57 | #include <vector> |
58 | |
59 | namespace llvm { |
60 | |
61 | template <class GraphType> struct GraphTraits; |
62 | class CallGraphNode; |
63 | class Function; |
64 | class Module; |
65 | class raw_ostream; |
66 | |
67 | /// The basic data container for the call graph of a \c Module of IR. |
68 | /// |
69 | /// This class exposes both the interface to the call graph for a module of IR. |
70 | /// |
71 | /// The core call graph itself can also be updated to reflect changes to the IR. |
72 | class CallGraph { |
73 | Module &M; |
74 | |
75 | using FunctionMapTy = |
76 | std::map<const Function *, std::unique_ptr<CallGraphNode>>; |
77 | |
78 | /// A map from \c Function* to \c CallGraphNode*. |
79 | FunctionMapTy FunctionMap; |
80 | |
81 | /// This node has edges to all external functions and those internal |
82 | /// functions that have their address taken. |
83 | CallGraphNode *ExternalCallingNode; |
84 | |
85 | /// This node has edges to it from all functions making indirect calls |
86 | /// or calling an external function. |
87 | std::unique_ptr<CallGraphNode> CallsExternalNode; |
88 | |
89 | public: |
90 | explicit CallGraph(Module &M); |
91 | CallGraph(CallGraph &&Arg); |
92 | ~CallGraph(); |
93 | |
94 | void print(raw_ostream &OS) const; |
95 | void dump() const; |
96 | |
97 | using iterator = FunctionMapTy::iterator; |
98 | using const_iterator = FunctionMapTy::const_iterator; |
99 | |
100 | /// Returns the module the call graph corresponds to. |
101 | Module &getModule() const { return M; } |
102 | |
103 | bool invalidate(Module &, const PreservedAnalyses &PA, |
104 | ModuleAnalysisManager::Invalidator &); |
105 | |
106 | inline iterator begin() { return FunctionMap.begin(); } |
107 | inline iterator end() { return FunctionMap.end(); } |
108 | inline const_iterator begin() const { return FunctionMap.begin(); } |
109 | inline const_iterator end() const { return FunctionMap.end(); } |
110 | |
111 | /// Returns the call graph node for the provided function. |
112 | inline const CallGraphNode *operator[](const Function *F) const { |
113 | const_iterator I = FunctionMap.find(x: F); |
114 | assert(I != FunctionMap.end() && "Function not in callgraph!" ); |
115 | return I->second.get(); |
116 | } |
117 | |
118 | /// Returns the call graph node for the provided function. |
119 | inline CallGraphNode *operator[](const Function *F) { |
120 | const_iterator I = FunctionMap.find(x: F); |
121 | assert(I != FunctionMap.end() && "Function not in callgraph!" ); |
122 | return I->second.get(); |
123 | } |
124 | |
125 | /// Returns the \c CallGraphNode which is used to represent |
126 | /// undetermined calls into the callgraph. |
127 | CallGraphNode *getExternalCallingNode() const { return ExternalCallingNode; } |
128 | |
129 | CallGraphNode *getCallsExternalNode() const { |
130 | return CallsExternalNode.get(); |
131 | } |
132 | |
133 | /// Old node has been deleted, and New is to be used in its place, update the |
134 | /// ExternalCallingNode. |
135 | void ReplaceExternalCallEdge(CallGraphNode *Old, CallGraphNode *New); |
136 | |
137 | //===--------------------------------------------------------------------- |
138 | // Functions to keep a call graph up to date with a function that has been |
139 | // modified. |
140 | // |
141 | |
142 | /// Unlink the function from this module, returning it. |
143 | /// |
144 | /// Because this removes the function from the module, the call graph node is |
145 | /// destroyed. This is only valid if the function does not call any other |
146 | /// functions (ie, there are no edges in it's CGN). The easiest way to do |
147 | /// this is to dropAllReferences before calling this. |
148 | Function *removeFunctionFromModule(CallGraphNode *CGN); |
149 | |
150 | /// Similar to operator[], but this will insert a new CallGraphNode for |
151 | /// \c F if one does not already exist. |
152 | CallGraphNode *getOrInsertFunction(const Function *F); |
153 | |
154 | /// Populate \p CGN based on the calls inside the associated function. |
155 | void populateCallGraphNode(CallGraphNode *CGN); |
156 | |
157 | /// Add a function to the call graph, and link the node to all of the |
158 | /// functions that it calls. |
159 | void addToCallGraph(Function *F); |
160 | }; |
161 | |
162 | /// A node in the call graph for a module. |
163 | /// |
164 | /// Typically represents a function in the call graph. There are also special |
165 | /// "null" nodes used to represent theoretical entries in the call graph. |
166 | class CallGraphNode { |
167 | public: |
168 | /// A pair of the calling instruction (a call or invoke) |
169 | /// and the call graph node being called. |
170 | /// Call graph node may have two types of call records which represent an edge |
171 | /// in the call graph - reference or a call edge. Reference edges are not |
172 | /// associated with any call instruction and are created with the first field |
173 | /// set to `None`, while real call edges have instruction address in this |
174 | /// field. Therefore, all real call edges are expected to have a value in the |
175 | /// first field and it is not supposed to be `nullptr`. |
176 | /// Reference edges, for example, are used for connecting broker function |
177 | /// caller to the callback function for callback call sites. |
178 | using CallRecord = std::pair<std::optional<WeakTrackingVH>, CallGraphNode *>; |
179 | |
180 | public: |
181 | using CalledFunctionsVector = std::vector<CallRecord>; |
182 | |
183 | /// Creates a node for the specified function. |
184 | inline CallGraphNode(CallGraph *CG, Function *F) : CG(CG), F(F) {} |
185 | |
186 | CallGraphNode(const CallGraphNode &) = delete; |
187 | CallGraphNode &operator=(const CallGraphNode &) = delete; |
188 | |
189 | ~CallGraphNode() { |
190 | assert(NumReferences == 0 && "Node deleted while references remain" ); |
191 | } |
192 | |
193 | using iterator = std::vector<CallRecord>::iterator; |
194 | using const_iterator = std::vector<CallRecord>::const_iterator; |
195 | |
196 | /// Returns the function that this call graph node represents. |
197 | Function *getFunction() const { return F; } |
198 | |
199 | inline iterator begin() { return CalledFunctions.begin(); } |
200 | inline iterator end() { return CalledFunctions.end(); } |
201 | inline const_iterator begin() const { return CalledFunctions.begin(); } |
202 | inline const_iterator end() const { return CalledFunctions.end(); } |
203 | inline bool empty() const { return CalledFunctions.empty(); } |
204 | inline unsigned size() const { return (unsigned)CalledFunctions.size(); } |
205 | |
206 | /// Returns the number of other CallGraphNodes in this CallGraph that |
207 | /// reference this node in their callee list. |
208 | unsigned getNumReferences() const { return NumReferences; } |
209 | |
210 | /// Returns the i'th called function. |
211 | CallGraphNode *operator[](unsigned i) const { |
212 | assert(i < CalledFunctions.size() && "Invalid index" ); |
213 | return CalledFunctions[i].second; |
214 | } |
215 | |
216 | /// Print out this call graph node. |
217 | void dump() const; |
218 | void print(raw_ostream &OS) const; |
219 | |
220 | //===--------------------------------------------------------------------- |
221 | // Methods to keep a call graph up to date with a function that has been |
222 | // modified |
223 | // |
224 | |
225 | /// Removes all edges from this CallGraphNode to any functions it |
226 | /// calls. |
227 | void removeAllCalledFunctions() { |
228 | while (!CalledFunctions.empty()) { |
229 | CalledFunctions.back().second->DropRef(); |
230 | CalledFunctions.pop_back(); |
231 | } |
232 | } |
233 | |
234 | /// Moves all the callee information from N to this node. |
235 | void stealCalledFunctionsFrom(CallGraphNode *N) { |
236 | assert(CalledFunctions.empty() && |
237 | "Cannot steal callsite information if I already have some" ); |
238 | std::swap(x&: CalledFunctions, y&: N->CalledFunctions); |
239 | } |
240 | |
241 | /// Adds a function to the list of functions called by this one. |
242 | void addCalledFunction(CallBase *Call, CallGraphNode *M) { |
243 | CalledFunctions.emplace_back(args: Call ? std::optional<WeakTrackingVH>(Call) |
244 | : std::optional<WeakTrackingVH>(), |
245 | args&: M); |
246 | M->AddRef(); |
247 | } |
248 | |
249 | void removeCallEdge(iterator I) { |
250 | I->second->DropRef(); |
251 | *I = CalledFunctions.back(); |
252 | CalledFunctions.pop_back(); |
253 | } |
254 | |
255 | /// Removes the edge in the node for the specified call site. |
256 | /// |
257 | /// Note that this method takes linear time, so it should be used sparingly. |
258 | void removeCallEdgeFor(CallBase &Call); |
259 | |
260 | /// Removes all call edges from this node to the specified callee |
261 | /// function. |
262 | /// |
263 | /// This takes more time to execute than removeCallEdgeTo, so it should not |
264 | /// be used unless necessary. |
265 | void removeAnyCallEdgeTo(CallGraphNode *Callee); |
266 | |
267 | /// Removes one edge associated with a null callsite from this node to |
268 | /// the specified callee function. |
269 | void removeOneAbstractEdgeTo(CallGraphNode *Callee); |
270 | |
271 | /// Replaces the edge in the node for the specified call site with a |
272 | /// new one. |
273 | /// |
274 | /// Note that this method takes linear time, so it should be used sparingly. |
275 | void replaceCallEdge(CallBase &Call, CallBase &NewCall, |
276 | CallGraphNode *NewNode); |
277 | |
278 | private: |
279 | friend class CallGraph; |
280 | |
281 | CallGraph *CG; |
282 | Function *F; |
283 | |
284 | std::vector<CallRecord> CalledFunctions; |
285 | |
286 | /// The number of times that this CallGraphNode occurs in the |
287 | /// CalledFunctions array of this or other CallGraphNodes. |
288 | unsigned NumReferences = 0; |
289 | |
290 | void DropRef() { --NumReferences; } |
291 | void AddRef() { ++NumReferences; } |
292 | |
293 | /// A special function that should only be used by the CallGraph class. |
294 | void allReferencesDropped() { NumReferences = 0; } |
295 | }; |
296 | |
297 | /// An analysis pass to compute the \c CallGraph for a \c Module. |
298 | /// |
299 | /// This class implements the concept of an analysis pass used by the \c |
300 | /// ModuleAnalysisManager to run an analysis over a module and cache the |
301 | /// resulting data. |
302 | class CallGraphAnalysis : public AnalysisInfoMixin<CallGraphAnalysis> { |
303 | friend AnalysisInfoMixin<CallGraphAnalysis>; |
304 | |
305 | static AnalysisKey Key; |
306 | |
307 | public: |
308 | /// A formulaic type to inform clients of the result type. |
309 | using Result = CallGraph; |
310 | |
311 | /// Compute the \c CallGraph for the module \c M. |
312 | /// |
313 | /// The real work here is done in the \c CallGraph constructor. |
314 | CallGraph run(Module &M, ModuleAnalysisManager &) { return CallGraph(M); } |
315 | }; |
316 | |
317 | /// Printer pass for the \c CallGraphAnalysis results. |
318 | class CallGraphPrinterPass : public PassInfoMixin<CallGraphPrinterPass> { |
319 | raw_ostream &OS; |
320 | |
321 | public: |
322 | explicit CallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} |
323 | |
324 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
325 | |
326 | static bool isRequired() { return true; } |
327 | }; |
328 | |
329 | /// Printer pass for the summarized \c CallGraphAnalysis results. |
330 | class CallGraphSCCsPrinterPass |
331 | : public PassInfoMixin<CallGraphSCCsPrinterPass> { |
332 | raw_ostream &OS; |
333 | |
334 | public: |
335 | explicit CallGraphSCCsPrinterPass(raw_ostream &OS) : OS(OS) {} |
336 | |
337 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
338 | |
339 | static bool isRequired() { return true; } |
340 | }; |
341 | |
342 | /// The \c ModulePass which wraps up a \c CallGraph and the logic to |
343 | /// build it. |
344 | /// |
345 | /// This class exposes both the interface to the call graph container and the |
346 | /// module pass which runs over a module of IR and produces the call graph. The |
347 | /// call graph interface is entirelly a wrapper around a \c CallGraph object |
348 | /// which is stored internally for each module. |
349 | class CallGraphWrapperPass : public ModulePass { |
350 | std::unique_ptr<CallGraph> G; |
351 | |
352 | public: |
353 | static char ID; // Class identification, replacement for typeinfo |
354 | |
355 | CallGraphWrapperPass(); |
356 | ~CallGraphWrapperPass() override; |
357 | |
358 | /// The internal \c CallGraph around which the rest of this interface |
359 | /// is wrapped. |
360 | const CallGraph &getCallGraph() const { return *G; } |
361 | CallGraph &getCallGraph() { return *G; } |
362 | |
363 | using iterator = CallGraph::iterator; |
364 | using const_iterator = CallGraph::const_iterator; |
365 | |
366 | /// Returns the module the call graph corresponds to. |
367 | Module &getModule() const { return G->getModule(); } |
368 | |
369 | inline iterator begin() { return G->begin(); } |
370 | inline iterator end() { return G->end(); } |
371 | inline const_iterator begin() const { return G->begin(); } |
372 | inline const_iterator end() const { return G->end(); } |
373 | |
374 | /// Returns the call graph node for the provided function. |
375 | inline const CallGraphNode *operator[](const Function *F) const { |
376 | return (*G)[F]; |
377 | } |
378 | |
379 | /// Returns the call graph node for the provided function. |
380 | inline CallGraphNode *operator[](const Function *F) { return (*G)[F]; } |
381 | |
382 | /// Returns the \c CallGraphNode which is used to represent |
383 | /// undetermined calls into the callgraph. |
384 | CallGraphNode *getExternalCallingNode() const { |
385 | return G->getExternalCallingNode(); |
386 | } |
387 | |
388 | CallGraphNode *getCallsExternalNode() const { |
389 | return G->getCallsExternalNode(); |
390 | } |
391 | |
392 | //===--------------------------------------------------------------------- |
393 | // Functions to keep a call graph up to date with a function that has been |
394 | // modified. |
395 | // |
396 | |
397 | /// Unlink the function from this module, returning it. |
398 | /// |
399 | /// Because this removes the function from the module, the call graph node is |
400 | /// destroyed. This is only valid if the function does not call any other |
401 | /// functions (ie, there are no edges in it's CGN). The easiest way to do |
402 | /// this is to dropAllReferences before calling this. |
403 | Function *removeFunctionFromModule(CallGraphNode *CGN) { |
404 | return G->removeFunctionFromModule(CGN); |
405 | } |
406 | |
407 | /// Similar to operator[], but this will insert a new CallGraphNode for |
408 | /// \c F if one does not already exist. |
409 | CallGraphNode *getOrInsertFunction(const Function *F) { |
410 | return G->getOrInsertFunction(F); |
411 | } |
412 | |
413 | //===--------------------------------------------------------------------- |
414 | // Implementation of the ModulePass interface needed here. |
415 | // |
416 | |
417 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
418 | bool runOnModule(Module &M) override; |
419 | void releaseMemory() override; |
420 | |
421 | void print(raw_ostream &o, const Module *) const override; |
422 | void dump() const; |
423 | }; |
424 | |
425 | //===----------------------------------------------------------------------===// |
426 | // GraphTraits specializations for call graphs so that they can be treated as |
427 | // graphs by the generic graph algorithms. |
428 | // |
429 | |
430 | // Provide graph traits for traversing call graphs using standard graph |
431 | // traversals. |
432 | template <> struct GraphTraits<CallGraphNode *> { |
433 | using NodeRef = CallGraphNode *; |
434 | using CGNPairTy = CallGraphNode::CallRecord; |
435 | |
436 | static NodeRef getEntryNode(CallGraphNode *CGN) { return CGN; } |
437 | static CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; } |
438 | |
439 | using ChildIteratorType = |
440 | mapped_iterator<CallGraphNode::iterator, decltype(&CGNGetValue)>; |
441 | |
442 | static ChildIteratorType child_begin(NodeRef N) { |
443 | return ChildIteratorType(N->begin(), &CGNGetValue); |
444 | } |
445 | |
446 | static ChildIteratorType child_end(NodeRef N) { |
447 | return ChildIteratorType(N->end(), &CGNGetValue); |
448 | } |
449 | }; |
450 | |
451 | template <> struct GraphTraits<const CallGraphNode *> { |
452 | using NodeRef = const CallGraphNode *; |
453 | using CGNPairTy = CallGraphNode::CallRecord; |
454 | using EdgeRef = const CallGraphNode::CallRecord &; |
455 | |
456 | static NodeRef getEntryNode(const CallGraphNode *CGN) { return CGN; } |
457 | static const CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; } |
458 | |
459 | using ChildIteratorType = |
460 | mapped_iterator<CallGraphNode::const_iterator, decltype(&CGNGetValue)>; |
461 | using ChildEdgeIteratorType = CallGraphNode::const_iterator; |
462 | |
463 | static ChildIteratorType child_begin(NodeRef N) { |
464 | return ChildIteratorType(N->begin(), &CGNGetValue); |
465 | } |
466 | |
467 | static ChildIteratorType child_end(NodeRef N) { |
468 | return ChildIteratorType(N->end(), &CGNGetValue); |
469 | } |
470 | |
471 | static ChildEdgeIteratorType child_edge_begin(NodeRef N) { |
472 | return N->begin(); |
473 | } |
474 | static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } |
475 | |
476 | static NodeRef edge_dest(EdgeRef E) { return E.second; } |
477 | }; |
478 | |
479 | template <> |
480 | struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> { |
481 | using PairTy = |
482 | std::pair<const Function *const, std::unique_ptr<CallGraphNode>>; |
483 | |
484 | static NodeRef getEntryNode(CallGraph *CGN) { |
485 | return CGN->getExternalCallingNode(); // Start at the external node! |
486 | } |
487 | |
488 | static CallGraphNode *CGGetValuePtr(const PairTy &P) { |
489 | return P.second.get(); |
490 | } |
491 | |
492 | // nodes_iterator/begin/end - Allow iteration over all nodes in the graph |
493 | using nodes_iterator = |
494 | mapped_iterator<CallGraph::iterator, decltype(&CGGetValuePtr)>; |
495 | |
496 | static nodes_iterator nodes_begin(CallGraph *CG) { |
497 | return nodes_iterator(CG->begin(), &CGGetValuePtr); |
498 | } |
499 | |
500 | static nodes_iterator nodes_end(CallGraph *CG) { |
501 | return nodes_iterator(CG->end(), &CGGetValuePtr); |
502 | } |
503 | }; |
504 | |
505 | template <> |
506 | struct GraphTraits<const CallGraph *> : public GraphTraits< |
507 | const CallGraphNode *> { |
508 | using PairTy = |
509 | std::pair<const Function *const, std::unique_ptr<CallGraphNode>>; |
510 | |
511 | static NodeRef getEntryNode(const CallGraph *CGN) { |
512 | return CGN->getExternalCallingNode(); // Start at the external node! |
513 | } |
514 | |
515 | static const CallGraphNode *CGGetValuePtr(const PairTy &P) { |
516 | return P.second.get(); |
517 | } |
518 | |
519 | // nodes_iterator/begin/end - Allow iteration over all nodes in the graph |
520 | using nodes_iterator = |
521 | mapped_iterator<CallGraph::const_iterator, decltype(&CGGetValuePtr)>; |
522 | |
523 | static nodes_iterator nodes_begin(const CallGraph *CG) { |
524 | return nodes_iterator(CG->begin(), &CGGetValuePtr); |
525 | } |
526 | |
527 | static nodes_iterator nodes_end(const CallGraph *CG) { |
528 | return nodes_iterator(CG->end(), &CGGetValuePtr); |
529 | } |
530 | }; |
531 | |
532 | } // end namespace llvm |
533 | |
534 | #endif // LLVM_ANALYSIS_CALLGRAPH_H |
535 | |