1 | //===- ExplodedGraph.h - Local, Path-Sens. "Exploded Graph" -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the template classes ExplodedNode and ExplodedGraph, |
10 | // which represent a path-sensitive, intra-procedural "exploded graph." |
11 | // See "Precise interprocedural dataflow analysis via graph reachability" |
12 | // by Reps, Horwitz, and Sagiv |
13 | // (http://portal.acm.org/citation.cfm?id=199462) for the definition of an |
14 | // exploded graph. |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H |
19 | #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H |
20 | |
21 | #include "clang/Analysis/AnalysisDeclContext.h" |
22 | #include "clang/Analysis/ProgramPoint.h" |
23 | #include "clang/Analysis/Support/BumpVector.h" |
24 | #include "clang/Basic/LLVM.h" |
25 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" |
26 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" |
27 | #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" |
28 | #include "llvm/ADT/ArrayRef.h" |
29 | #include "llvm/ADT/DenseMap.h" |
30 | #include "llvm/ADT/DepthFirstIterator.h" |
31 | #include "llvm/ADT/FoldingSet.h" |
32 | #include "llvm/ADT/GraphTraits.h" |
33 | #include "llvm/ADT/STLExtras.h" |
34 | #include "llvm/ADT/SetVector.h" |
35 | #include "llvm/ADT/iterator_range.h" |
36 | #include "llvm/Support/Allocator.h" |
37 | #include "llvm/Support/Compiler.h" |
38 | #include <cassert> |
39 | #include <cstdint> |
40 | #include <memory> |
41 | #include <optional> |
42 | #include <utility> |
43 | #include <vector> |
44 | |
45 | namespace clang { |
46 | |
47 | class CFG; |
48 | class Decl; |
49 | class Expr; |
50 | class ParentMap; |
51 | class Stmt; |
52 | |
53 | namespace ento { |
54 | |
55 | class ExplodedGraph; |
56 | |
57 | //===----------------------------------------------------------------------===// |
58 | // ExplodedGraph "implementation" classes. These classes are not typed to |
59 | // contain a specific kind of state. Typed-specialized versions are defined |
60 | // on top of these classes. |
61 | //===----------------------------------------------------------------------===// |
62 | |
63 | // ExplodedNode is not constified all over the engine because we need to add |
64 | // successors to it at any time after creating it. |
65 | |
66 | class ExplodedNode : public llvm::FoldingSetNode { |
67 | friend class BranchNodeBuilder; |
68 | friend class CoreEngine; |
69 | friend class EndOfFunctionNodeBuilder; |
70 | friend class ExplodedGraph; |
71 | friend class IndirectGotoNodeBuilder; |
72 | friend class NodeBuilder; |
73 | friend class SwitchNodeBuilder; |
74 | |
75 | /// Efficiently stores a list of ExplodedNodes, or an optional flag. |
76 | /// |
77 | /// NodeGroup provides opaque storage for a list of ExplodedNodes, optimizing |
78 | /// for the case when there is only one node in the group. This is a fairly |
79 | /// common case in an ExplodedGraph, where most nodes have only one |
80 | /// predecessor and many have only one successor. It can also be used to |
81 | /// store a flag rather than a node list, which ExplodedNode uses to mark |
82 | /// whether a node is a sink. If the flag is set, the group is implicitly |
83 | /// empty and no nodes may be added. |
84 | class NodeGroup { |
85 | // Conceptually a discriminated union. If the low bit is set, the node is |
86 | // a sink. If the low bit is not set, the pointer refers to the storage |
87 | // for the nodes in the group. |
88 | // This is not a PointerIntPair in order to keep the storage type opaque. |
89 | uintptr_t P; |
90 | |
91 | public: |
92 | NodeGroup(bool Flag = false) : P(Flag) { |
93 | assert(getFlag() == Flag); |
94 | } |
95 | |
96 | ExplodedNode * const *begin() const; |
97 | |
98 | ExplodedNode * const *end() const; |
99 | |
100 | unsigned size() const; |
101 | |
102 | bool empty() const { return P == 0 || getFlag() != 0; } |
103 | |
104 | /// Adds a node to the list. |
105 | /// |
106 | /// The group must not have been created with its flag set. |
107 | void addNode(ExplodedNode *N, ExplodedGraph &G); |
108 | |
109 | /// Replaces the single node in this group with a new node. |
110 | /// |
111 | /// Note that this should only be used when you know the group was not |
112 | /// created with its flag set, and that the group is empty or contains |
113 | /// only a single node. |
114 | void replaceNode(ExplodedNode *node); |
115 | |
116 | /// Returns whether this group was created with its flag set. |
117 | bool getFlag() const { |
118 | return (P & 1); |
119 | } |
120 | }; |
121 | |
122 | /// Location - The program location (within a function body) associated |
123 | /// with this node. |
124 | const ProgramPoint Location; |
125 | |
126 | /// State - The state associated with this node. |
127 | ProgramStateRef State; |
128 | |
129 | /// Preds - The predecessors of this node. |
130 | NodeGroup Preds; |
131 | |
132 | /// Succs - The successors of this node. |
133 | NodeGroup Succs; |
134 | |
135 | int64_t Id; |
136 | |
137 | public: |
138 | explicit ExplodedNode(const ProgramPoint &loc, ProgramStateRef state, |
139 | int64_t Id, bool IsSink) |
140 | : Location(loc), State(std::move(state)), Succs(IsSink), Id(Id) { |
141 | assert(isSink() == IsSink); |
142 | } |
143 | |
144 | /// getLocation - Returns the edge associated with the given node. |
145 | ProgramPoint getLocation() const { return Location; } |
146 | |
147 | const LocationContext *getLocationContext() const { |
148 | return getLocation().getLocationContext(); |
149 | } |
150 | |
151 | const StackFrameContext *getStackFrame() const { |
152 | return getLocation().getStackFrame(); |
153 | } |
154 | |
155 | const Decl &getCodeDecl() const { return *getLocationContext()->getDecl(); } |
156 | |
157 | CFG &getCFG() const { return *getLocationContext()->getCFG(); } |
158 | |
159 | const CFGBlock *getCFGBlock() const; |
160 | |
161 | const ParentMap &getParentMap() const { |
162 | return getLocationContext()->getParentMap(); |
163 | } |
164 | |
165 | template <typename T> T &getAnalysis() const { |
166 | return *getLocationContext()->getAnalysis<T>(); |
167 | } |
168 | |
169 | const ProgramStateRef &getState() const { return State; } |
170 | |
171 | template <typename T> std::optional<T> getLocationAs() const & { |
172 | return Location.getAs<T>(); |
173 | } |
174 | |
175 | /// Get the value of an arbitrary expression at this node. |
176 | SVal getSVal(const Stmt *S) const { |
177 | return getState()->getSVal(Ex: S, LCtx: getLocationContext()); |
178 | } |
179 | |
180 | static void Profile(llvm::FoldingSetNodeID &ID, |
181 | const ProgramPoint &Loc, |
182 | const ProgramStateRef &state, |
183 | bool IsSink) { |
184 | ID.Add(x: Loc); |
185 | ID.AddPointer(Ptr: state.get()); |
186 | ID.AddBoolean(B: IsSink); |
187 | } |
188 | |
189 | void Profile(llvm::FoldingSetNodeID& ID) const { |
190 | // We avoid copy constructors by not using accessors. |
191 | Profile(ID, Loc: Location, state: State, IsSink: isSink()); |
192 | } |
193 | |
194 | /// addPredeccessor - Adds a predecessor to the current node, and |
195 | /// in tandem add this node as a successor of the other node. |
196 | void addPredecessor(ExplodedNode *V, ExplodedGraph &G); |
197 | |
198 | unsigned succ_size() const { return Succs.size(); } |
199 | unsigned pred_size() const { return Preds.size(); } |
200 | bool succ_empty() const { return Succs.empty(); } |
201 | bool pred_empty() const { return Preds.empty(); } |
202 | |
203 | bool isSink() const { return Succs.getFlag(); } |
204 | |
205 | bool hasSinglePred() const { |
206 | return (pred_size() == 1); |
207 | } |
208 | |
209 | ExplodedNode *getFirstPred() { |
210 | return pred_empty() ? nullptr : *(pred_begin()); |
211 | } |
212 | |
213 | const ExplodedNode *getFirstPred() const { |
214 | return const_cast<ExplodedNode*>(this)->getFirstPred(); |
215 | } |
216 | |
217 | ExplodedNode *getFirstSucc() { |
218 | return succ_empty() ? nullptr : *(succ_begin()); |
219 | } |
220 | |
221 | const ExplodedNode *getFirstSucc() const { |
222 | return const_cast<ExplodedNode*>(this)->getFirstSucc(); |
223 | } |
224 | |
225 | // Iterators over successor and predecessor vertices. |
226 | using succ_iterator = ExplodedNode * const *; |
227 | using succ_range = llvm::iterator_range<succ_iterator>; |
228 | |
229 | using const_succ_iterator = const ExplodedNode * const *; |
230 | using const_succ_range = llvm::iterator_range<const_succ_iterator>; |
231 | |
232 | using pred_iterator = ExplodedNode * const *; |
233 | using pred_range = llvm::iterator_range<pred_iterator>; |
234 | |
235 | using const_pred_iterator = const ExplodedNode * const *; |
236 | using const_pred_range = llvm::iterator_range<const_pred_iterator>; |
237 | |
238 | pred_iterator pred_begin() { return Preds.begin(); } |
239 | pred_iterator pred_end() { return Preds.end(); } |
240 | pred_range preds() { return {Preds.begin(), Preds.end()}; } |
241 | |
242 | const_pred_iterator pred_begin() const { |
243 | return const_cast<ExplodedNode*>(this)->pred_begin(); |
244 | } |
245 | const_pred_iterator pred_end() const { |
246 | return const_cast<ExplodedNode*>(this)->pred_end(); |
247 | } |
248 | const_pred_range preds() const { return {Preds.begin(), Preds.end()}; } |
249 | |
250 | succ_iterator succ_begin() { return Succs.begin(); } |
251 | succ_iterator succ_end() { return Succs.end(); } |
252 | succ_range succs() { return {Succs.begin(), Succs.end()}; } |
253 | |
254 | const_succ_iterator succ_begin() const { |
255 | return const_cast<ExplodedNode*>(this)->succ_begin(); |
256 | } |
257 | const_succ_iterator succ_end() const { |
258 | return const_cast<ExplodedNode*>(this)->succ_end(); |
259 | } |
260 | const_succ_range succs() const { return {Succs.begin(), Succs.end()}; } |
261 | |
262 | int64_t getID() const { return Id; } |
263 | |
264 | /// The node is trivial if it has only one successor, only one predecessor, |
265 | /// it's predecessor has only one successor, |
266 | /// and its program state is the same as the program state of the previous |
267 | /// node. |
268 | /// Trivial nodes may be skipped while printing exploded graph. |
269 | bool isTrivial() const; |
270 | |
271 | /// If the node's program point corresponds to a statement, retrieve that |
272 | /// statement. Useful for figuring out where to put a warning or a note. |
273 | /// If the statement belongs to a body-farmed definition, |
274 | /// retrieve the call site for that definition. |
275 | const Stmt *getStmtForDiagnostics() const; |
276 | |
277 | /// Find the next statement that was executed on this node's execution path. |
278 | /// Useful for explaining control flow that follows the current node. |
279 | /// If the statement belongs to a body-farmed definition, retrieve the |
280 | /// call site for that definition. |
281 | const Stmt *getNextStmtForDiagnostics() const; |
282 | |
283 | /// Find the statement that was executed immediately before this node. |
284 | /// Useful when the node corresponds to a CFG block entrance. |
285 | /// If the statement belongs to a body-farmed definition, retrieve the |
286 | /// call site for that definition. |
287 | const Stmt *getPreviousStmtForDiagnostics() const; |
288 | |
289 | /// Find the statement that was executed at or immediately before this node. |
290 | /// Useful when any nearby statement will do. |
291 | /// If the statement belongs to a body-farmed definition, retrieve the |
292 | /// call site for that definition. |
293 | const Stmt *getCurrentOrPreviousStmtForDiagnostics() const; |
294 | |
295 | private: |
296 | void replaceSuccessor(ExplodedNode *node) { Succs.replaceNode(node); } |
297 | void replacePredecessor(ExplodedNode *node) { Preds.replaceNode(node); } |
298 | }; |
299 | |
300 | using InterExplodedGraphMap = |
301 | llvm::DenseMap<const ExplodedNode *, const ExplodedNode *>; |
302 | |
303 | class ExplodedGraph { |
304 | protected: |
305 | friend class CoreEngine; |
306 | |
307 | // Type definitions. |
308 | using NodeVector = std::vector<ExplodedNode *>; |
309 | |
310 | /// The roots of the simulation graph. Usually there will be only |
311 | /// one, but clients are free to establish multiple subgraphs within a single |
312 | /// SimulGraph. Moreover, these subgraphs can often merge when paths from |
313 | /// different roots reach the same state at the same program location. |
314 | NodeVector Roots; |
315 | |
316 | /// The nodes in the simulation graph which have been |
317 | /// specially marked as the endpoint of an abstract simulation path. |
318 | NodeVector EndNodes; |
319 | |
320 | /// Nodes - The nodes in the graph. |
321 | llvm::FoldingSet<ExplodedNode> Nodes; |
322 | |
323 | /// BVC - Allocator and context for allocating nodes and their predecessor |
324 | /// and successor groups. |
325 | BumpVectorContext BVC; |
326 | |
327 | /// NumNodes - The number of nodes in the graph. |
328 | int64_t NumNodes = 0; |
329 | |
330 | /// A list of recently allocated nodes that can potentially be recycled. |
331 | NodeVector ChangedNodes; |
332 | |
333 | /// A list of nodes that can be reused. |
334 | NodeVector FreeNodes; |
335 | |
336 | /// Determines how often nodes are reclaimed. |
337 | /// |
338 | /// If this is 0, nodes will never be reclaimed. |
339 | unsigned ReclaimNodeInterval = 0; |
340 | |
341 | /// Counter to determine when to reclaim nodes. |
342 | unsigned ReclaimCounter; |
343 | |
344 | public: |
345 | ExplodedGraph(); |
346 | ~ExplodedGraph(); |
347 | |
348 | /// Retrieve the node associated with a (Location,State) pair, |
349 | /// where the 'Location' is a ProgramPoint in the CFG. If no node for |
350 | /// this pair exists, it is created. IsNew is set to true if |
351 | /// the node was freshly created. |
352 | ExplodedNode *getNode(const ProgramPoint &L, ProgramStateRef State, |
353 | bool IsSink = false, |
354 | bool* IsNew = nullptr); |
355 | |
356 | /// Create a node for a (Location, State) pair, |
357 | /// but don't store it for deduplication later. This |
358 | /// is useful when copying an already completed |
359 | /// ExplodedGraph for further processing. |
360 | ExplodedNode *createUncachedNode(const ProgramPoint &L, |
361 | ProgramStateRef State, |
362 | int64_t Id, |
363 | bool IsSink = false); |
364 | |
365 | std::unique_ptr<ExplodedGraph> MakeEmptyGraph() const { |
366 | return std::make_unique<ExplodedGraph>(); |
367 | } |
368 | |
369 | /// addRoot - Add an untyped node to the set of roots. |
370 | ExplodedNode *addRoot(ExplodedNode *V) { |
371 | Roots.push_back(x: V); |
372 | return V; |
373 | } |
374 | |
375 | /// addEndOfPath - Add an untyped node to the set of EOP nodes. |
376 | ExplodedNode *addEndOfPath(ExplodedNode *V) { |
377 | EndNodes.push_back(x: V); |
378 | return V; |
379 | } |
380 | |
381 | unsigned num_roots() const { return Roots.size(); } |
382 | unsigned num_eops() const { return EndNodes.size(); } |
383 | |
384 | bool empty() const { return NumNodes == 0; } |
385 | unsigned size() const { return NumNodes; } |
386 | |
387 | void reserve(unsigned NodeCount) { Nodes.reserve(EltCount: NodeCount); } |
388 | |
389 | // Iterators. |
390 | using NodeTy = ExplodedNode; |
391 | using AllNodesTy = llvm::FoldingSet<ExplodedNode>; |
392 | using roots_iterator = NodeVector::iterator; |
393 | using const_roots_iterator = NodeVector::const_iterator; |
394 | using eop_iterator = NodeVector::iterator; |
395 | using const_eop_iterator = NodeVector::const_iterator; |
396 | using node_iterator = AllNodesTy::iterator; |
397 | using const_node_iterator = AllNodesTy::const_iterator; |
398 | |
399 | llvm::iterator_range<node_iterator> nodes() { return Nodes; } |
400 | |
401 | llvm::iterator_range<const_node_iterator> nodes() const { return Nodes; } |
402 | |
403 | roots_iterator roots_begin() { return Roots.begin(); } |
404 | |
405 | roots_iterator roots_end() { return Roots.end(); } |
406 | |
407 | const_roots_iterator roots_begin() const { return Roots.begin(); } |
408 | |
409 | const_roots_iterator roots_end() const { return Roots.end(); } |
410 | |
411 | eop_iterator eop_begin() { return EndNodes.begin(); } |
412 | |
413 | eop_iterator eop_end() { return EndNodes.end(); } |
414 | |
415 | const_eop_iterator eop_begin() const { return EndNodes.begin(); } |
416 | |
417 | const_eop_iterator eop_end() const { return EndNodes.end(); } |
418 | |
419 | llvm::BumpPtrAllocator & getAllocator() { return BVC.getAllocator(); } |
420 | BumpVectorContext &getNodeAllocator() { return BVC; } |
421 | |
422 | using NodeMap = llvm::DenseMap<const ExplodedNode *, ExplodedNode *>; |
423 | |
424 | /// Creates a trimmed version of the graph that only contains paths leading |
425 | /// to the given nodes. |
426 | /// |
427 | /// \param Nodes The nodes which must appear in the final graph. Presumably |
428 | /// these are end-of-path nodes (i.e. they have no successors). |
429 | /// \param[out] ForwardMap A optional map from nodes in this graph to nodes in |
430 | /// the returned graph. |
431 | /// \param[out] InverseMap An optional map from nodes in the returned graph to |
432 | /// nodes in this graph. |
433 | /// \returns The trimmed graph |
434 | std::unique_ptr<ExplodedGraph> |
435 | trim(ArrayRef<const NodeTy *> Nodes, |
436 | InterExplodedGraphMap *ForwardMap = nullptr, |
437 | InterExplodedGraphMap *InverseMap = nullptr) const; |
438 | |
439 | /// Enable tracking of recently allocated nodes for potential reclamation |
440 | /// when calling reclaimRecentlyAllocatedNodes(). |
441 | void enableNodeReclamation(unsigned Interval) { |
442 | ReclaimCounter = ReclaimNodeInterval = Interval; |
443 | } |
444 | |
445 | /// Reclaim "uninteresting" nodes created since the last time this method |
446 | /// was called. |
447 | void reclaimRecentlyAllocatedNodes(); |
448 | |
449 | /// Returns true if nodes for the given expression kind are always |
450 | /// kept around. |
451 | static bool isInterestingLValueExpr(const Expr *Ex); |
452 | |
453 | private: |
454 | bool shouldCollect(const ExplodedNode *node); |
455 | void collectNode(ExplodedNode *node); |
456 | }; |
457 | |
458 | class ExplodedNodeSet { |
459 | using ImplTy = llvm::SmallSetVector<ExplodedNode *, 4>; |
460 | ImplTy Impl; |
461 | |
462 | public: |
463 | ExplodedNodeSet(ExplodedNode *N) { |
464 | assert(N && !static_cast<ExplodedNode*>(N)->isSink()); |
465 | Impl.insert(X: N); |
466 | } |
467 | |
468 | ExplodedNodeSet() = default; |
469 | |
470 | void Add(ExplodedNode *N) { |
471 | if (N && !static_cast<ExplodedNode*>(N)->isSink()) Impl.insert(X: N); |
472 | } |
473 | |
474 | using iterator = ImplTy::iterator; |
475 | using const_iterator = ImplTy::const_iterator; |
476 | |
477 | unsigned size() const { return Impl.size(); } |
478 | bool empty() const { return Impl.empty(); } |
479 | bool erase(ExplodedNode *N) { return Impl.remove(X: N); } |
480 | |
481 | void clear() { Impl.clear(); } |
482 | |
483 | void insert(const ExplodedNodeSet &S) { |
484 | assert(&S != this); |
485 | if (empty()) |
486 | Impl = S.Impl; |
487 | else |
488 | Impl.insert(Start: S.begin(), End: S.end()); |
489 | } |
490 | |
491 | iterator begin() { return Impl.begin(); } |
492 | iterator end() { return Impl.end(); } |
493 | |
494 | const_iterator begin() const { return Impl.begin(); } |
495 | const_iterator end() const { return Impl.end(); } |
496 | }; |
497 | |
498 | } // namespace ento |
499 | |
500 | } // namespace clang |
501 | |
502 | // GraphTraits |
503 | |
504 | namespace llvm { |
505 | template <> struct GraphTraits<clang::ento::ExplodedGraph *> { |
506 | using GraphTy = clang::ento::ExplodedGraph *; |
507 | using NodeRef = clang::ento::ExplodedNode *; |
508 | using ChildIteratorType = clang::ento::ExplodedNode::succ_iterator; |
509 | using nodes_iterator = llvm::df_iterator<GraphTy>; |
510 | |
511 | static NodeRef getEntryNode(const GraphTy G) { |
512 | return *G->roots_begin(); |
513 | } |
514 | |
515 | static bool predecessorOfTrivial(NodeRef N) { |
516 | return N->succ_size() == 1 && N->getFirstSucc()->isTrivial(); |
517 | } |
518 | |
519 | static ChildIteratorType child_begin(NodeRef N) { |
520 | if (predecessorOfTrivial(N)) |
521 | return child_begin(N: *N->succ_begin()); |
522 | return N->succ_begin(); |
523 | } |
524 | |
525 | static ChildIteratorType child_end(NodeRef N) { |
526 | if (predecessorOfTrivial(N)) |
527 | return child_end(N: N->getFirstSucc()); |
528 | return N->succ_end(); |
529 | } |
530 | |
531 | static nodes_iterator nodes_begin(const GraphTy G) { |
532 | return df_begin(G); |
533 | } |
534 | |
535 | static nodes_iterator nodes_end(const GraphTy G) { |
536 | return df_end(G); |
537 | } |
538 | }; |
539 | } // namespace llvm |
540 | |
541 | #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_EXPLODEDGRAPH_H |
542 | |