1//===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the template classes ExplodedNode and ExplodedGraph,
10// which represent a path-sensitive, intra-procedural "exploded graph."
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
15#include "clang/AST/Expr.h"
16#include "clang/AST/ExprObjC.h"
17#include "clang/AST/ParentMap.h"
18#include "clang/AST/Stmt.h"
19#include "clang/Analysis/ProgramPoint.h"
20#include "clang/Analysis/Support/BumpVector.h"
21#include "clang/Basic/LLVM.h"
22#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
23#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
24#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/FoldingSet.h"
27#include "llvm/ADT/PointerUnion.h"
28#include <cassert>
29#include <memory>
30#include <optional>
31
32using namespace clang;
33using namespace ento;
34
35//===----------------------------------------------------------------------===//
36// Cleanup.
37//===----------------------------------------------------------------------===//
38
39ExplodedGraph::ExplodedGraph() = default;
40
41ExplodedGraph::~ExplodedGraph() = default;
42
43//===----------------------------------------------------------------------===//
44// Node reclamation.
45//===----------------------------------------------------------------------===//
46
47bool ExplodedGraph::isInterestingLValueExpr(const Expr *Ex) {
48 if (!Ex->isLValue())
49 return false;
50 return isa<DeclRefExpr, MemberExpr, ObjCIvarRefExpr, ArraySubscriptExpr>(Val: Ex);
51}
52
53bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
54 // First, we only consider nodes for reclamation of the following
55 // conditions apply:
56 //
57 // (1) 1 predecessor (that has one successor)
58 // (2) 1 successor (that has one predecessor)
59 //
60 // If a node has no successor it is on the "frontier", while a node
61 // with no predecessor is a root.
62 //
63 // After these prerequisites, we discard all "filler" nodes that
64 // are used only for intermediate processing, and are not essential
65 // for analyzer history:
66 //
67 // (a) PreStmtPurgeDeadSymbols
68 //
69 // We then discard all other nodes where *all* of the following conditions
70 // apply:
71 //
72 // (3) The ProgramPoint is for a PostStmt, but not a PostStore.
73 // (4) There is no 'tag' for the ProgramPoint.
74 // (5) The 'store' is the same as the predecessor.
75 // (6) The 'GDM' is the same as the predecessor.
76 // (7) The LocationContext is the same as the predecessor.
77 // (8) Expressions that are *not* lvalue expressions.
78 // (9) The PostStmt isn't for a non-consumed Stmt or Expr.
79 // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or
80 // PreImplicitCall (so that we would be able to find it when retrying a
81 // call with no inlining).
82 // FIXME: It may be safe to reclaim PreCall and PostCall nodes as well.
83
84 // Conditions 1 and 2.
85 if (node->pred_size() != 1 || node->succ_size() != 1)
86 return false;
87
88 const ExplodedNode *pred = *(node->pred_begin());
89 if (pred->succ_size() != 1)
90 return false;
91
92 const ExplodedNode *succ = *(node->succ_begin());
93 if (succ->pred_size() != 1)
94 return false;
95
96 // Now reclaim any nodes that are (by definition) not essential to
97 // analysis history and are not consulted by any client code.
98 ProgramPoint progPoint = node->getLocation();
99 if (progPoint.getAs<PreStmtPurgeDeadSymbols>())
100 return !progPoint.getTag();
101
102 // Condition 3.
103 if (!progPoint.getAs<PostStmt>() || progPoint.getAs<PostStore>())
104 return false;
105
106 // Condition 4.
107 if (progPoint.getTag())
108 return false;
109
110 // Conditions 5, 6, and 7.
111 ProgramStateRef state = node->getState();
112 ProgramStateRef pred_state = pred->getState();
113 if (state->store != pred_state->store || state->GDM != pred_state->GDM ||
114 progPoint.getLocationContext() != pred->getLocationContext())
115 return false;
116
117 // All further checks require expressions. As per #3, we know that we have
118 // a PostStmt.
119 const Expr *Ex = dyn_cast<Expr>(Val: progPoint.castAs<PostStmt>().getStmt());
120 if (!Ex)
121 return false;
122
123 // Condition 8.
124 // Do not collect nodes for "interesting" lvalue expressions since they are
125 // used extensively for generating path diagnostics.
126 if (isInterestingLValueExpr(Ex))
127 return false;
128
129 // Condition 9.
130 // Do not collect nodes for non-consumed Stmt or Expr to ensure precise
131 // diagnostic generation; specifically, so that we could anchor arrows
132 // pointing to the beginning of statements (as written in code).
133 const ParentMap &PM = progPoint.getLocationContext()->getParentMap();
134 if (!PM.isConsumedExpr(E: Ex))
135 return false;
136
137 // Condition 10.
138 const ProgramPoint SuccLoc = succ->getLocation();
139 if (std::optional<StmtPoint> SP = SuccLoc.getAs<StmtPoint>())
140 if (CallEvent::isCallStmt(S: SP->getStmt()))
141 return false;
142
143 // Condition 10, continuation.
144 if (SuccLoc.getAs<CallEnter>() || SuccLoc.getAs<PreImplicitCall>())
145 return false;
146
147 return true;
148}
149
150void ExplodedGraph::collectNode(ExplodedNode *node) {
151 // Removing a node means:
152 // (a) changing the predecessors successor to the successor of this node
153 // (b) changing the successors predecessor to the predecessor of this node
154 // (c) Putting 'node' onto freeNodes.
155 assert(node->pred_size() == 1 || node->succ_size() == 1);
156 ExplodedNode *pred = *(node->pred_begin());
157 ExplodedNode *succ = *(node->succ_begin());
158 pred->replaceSuccessor(node: succ);
159 succ->replacePredecessor(node: pred);
160 FreeNodes.push_back(x: node);
161 Nodes.RemoveNode(N: node);
162 --NumNodes;
163 node->~ExplodedNode();
164}
165
166void ExplodedGraph::reclaimRecentlyAllocatedNodes() {
167 if (ChangedNodes.empty())
168 return;
169
170 // Only periodically reclaim nodes so that we can build up a set of
171 // nodes that meet the reclamation criteria. Freshly created nodes
172 // by definition have no successor, and thus cannot be reclaimed (see below).
173 assert(ReclaimCounter > 0);
174 if (--ReclaimCounter != 0)
175 return;
176 ReclaimCounter = ReclaimNodeInterval;
177
178 for (const auto node : ChangedNodes)
179 if (shouldCollect(node))
180 collectNode(node);
181 ChangedNodes.clear();
182}
183
184//===----------------------------------------------------------------------===//
185// ExplodedNode.
186//===----------------------------------------------------------------------===//
187
188// An NodeGroup's storage type is actually very much like a TinyPtrVector:
189// it can be either a pointer to a single ExplodedNode, or a pointer to a
190// BumpVector allocated with the ExplodedGraph's allocator. This allows the
191// common case of single-node NodeGroups to be implemented with no extra memory.
192//
193// Consequently, each of the NodeGroup methods have up to four cases to handle:
194// 1. The flag is set and this group does not actually contain any nodes.
195// 2. The group is empty, in which case the storage value is null.
196// 3. The group contains a single node.
197// 4. The group contains more than one node.
198using ExplodedNodeVector = BumpVector<ExplodedNode *>;
199using GroupStorage = llvm::PointerUnion<ExplodedNode *, ExplodedNodeVector *>;
200
201void ExplodedNode::addPredecessor(ExplodedNode *V, ExplodedGraph &G) {
202 assert(!V->isSink());
203 Preds.addNode(N: V, G);
204 V->Succs.addNode(N: this, G);
205}
206
207void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) {
208 assert(!getFlag());
209
210 GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
211 assert(isa<ExplodedNode *>(Storage));
212 Storage = node;
213 assert(isa<ExplodedNode *>(Storage));
214}
215
216void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) {
217 assert(!getFlag());
218
219 GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
220 if (Storage.isNull()) {
221 Storage = N;
222 assert(isa<ExplodedNode *>(Storage));
223 return;
224 }
225
226 ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Val&: Storage);
227
228 if (!V) {
229 // Switch from single-node to multi-node representation.
230 auto *Old = cast<ExplodedNode *>(Val&: Storage);
231
232 BumpVectorContext &Ctx = G.getNodeAllocator();
233 V = new (G.getAllocator()) ExplodedNodeVector(Ctx, 4);
234 V->push_back(Elt: Old, C&: Ctx);
235
236 Storage = V;
237 assert(!getFlag());
238 assert(isa<ExplodedNodeVector *>(Storage));
239 }
240
241 V->push_back(Elt: N, C&: G.getNodeAllocator());
242}
243
244unsigned ExplodedNode::NodeGroup::size() const {
245 if (getFlag())
246 return 0;
247
248 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
249 if (Storage.isNull())
250 return 0;
251 if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Val: Storage))
252 return V->size();
253 return 1;
254}
255
256ExplodedNode * const *ExplodedNode::NodeGroup::begin() const {
257 if (getFlag())
258 return nullptr;
259
260 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
261 if (Storage.isNull())
262 return nullptr;
263 if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Val: Storage))
264 return V->begin();
265 return Storage.getAddrOfPtr1();
266}
267
268ExplodedNode * const *ExplodedNode::NodeGroup::end() const {
269 if (getFlag())
270 return nullptr;
271
272 const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
273 if (Storage.isNull())
274 return nullptr;
275 if (ExplodedNodeVector *V = dyn_cast<ExplodedNodeVector *>(Val: Storage))
276 return V->end();
277 return Storage.getAddrOfPtr1() + 1;
278}
279
280bool ExplodedNode::isTrivial() const {
281 return pred_size() == 1 && succ_size() == 1 &&
282 getFirstPred()->getState()->getID() == getState()->getID() &&
283 getFirstPred()->succ_size() == 1;
284}
285
286const CFGBlock *ExplodedNode::getCFGBlock() const {
287 ProgramPoint P = getLocation();
288 if (auto BEP = P.getAs<BlockEntrance>())
289 return BEP->getBlock();
290
291 // Find the node's current statement in the CFG.
292 // FIXME: getStmtForDiagnostics() does nasty things in order to provide
293 // a valid statement for body farms, do we need this behavior here?
294 if (const Stmt *S = getStmtForDiagnostics())
295 return getLocationContext()
296 ->getAnalysisDeclContext()
297 ->getCFGStmtMap()
298 ->getBlock(S);
299
300 return nullptr;
301}
302
303static const LocationContext *
304findTopAutosynthesizedParentContext(const LocationContext *LC) {
305 assert(LC->getAnalysisDeclContext()->isBodyAutosynthesized());
306 const LocationContext *ParentLC = LC->getParent();
307 assert(ParentLC && "We don't start analysis from autosynthesized code");
308 while (ParentLC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
309 LC = ParentLC;
310 ParentLC = LC->getParent();
311 assert(ParentLC && "We don't start analysis from autosynthesized code");
312 }
313 return LC;
314}
315
316const Stmt *ExplodedNode::getStmtForDiagnostics() const {
317 // We cannot place diagnostics on autosynthesized code.
318 // Put them onto the call site through which we jumped into autosynthesized
319 // code for the first time.
320 const LocationContext *LC = getLocationContext();
321 if (LC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
322 // It must be a stack frame because we only autosynthesize functions.
323 return cast<StackFrameContext>(Val: findTopAutosynthesizedParentContext(LC))
324 ->getCallSite();
325 }
326 // Otherwise, see if the node's program point directly points to a statement.
327 // FIXME: Refactor into a ProgramPoint method?
328 ProgramPoint P = getLocation();
329 if (auto SP = P.getAs<StmtPoint>())
330 return SP->getStmt();
331 if (auto BE = P.getAs<BlockEdge>())
332 return BE->getSrc()->getTerminatorStmt();
333 if (auto CE = P.getAs<CallEnter>())
334 return CE->getCallExpr();
335 if (auto CEE = P.getAs<CallExitEnd>())
336 return CEE->getCalleeContext()->getCallSite();
337 if (auto PIPP = P.getAs<PostInitializer>())
338 return PIPP->getInitializer()->getInit();
339 if (auto CEB = P.getAs<CallExitBegin>())
340 return CEB->getReturnStmt();
341 if (auto FEP = P.getAs<FunctionExitPoint>())
342 return FEP->getStmt();
343
344 return nullptr;
345}
346
347const Stmt *ExplodedNode::getNextStmtForDiagnostics() const {
348 for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
349 if (N->getLocation().isPurgeKind())
350 continue;
351 if (const Stmt *S = N->getStmtForDiagnostics()) {
352 // Check if the statement is '?' or '&&'/'||'. These are "merges",
353 // not actual statement points.
354 switch (S->getStmtClass()) {
355 case Stmt::ChooseExprClass:
356 case Stmt::BinaryConditionalOperatorClass:
357 case Stmt::ConditionalOperatorClass:
358 continue;
359 case Stmt::BinaryOperatorClass: {
360 BinaryOperatorKind Op = cast<BinaryOperator>(Val: S)->getOpcode();
361 if (Op == BO_LAnd || Op == BO_LOr)
362 continue;
363 break;
364 }
365 default:
366 break;
367 }
368 // We found the statement, so return it.
369 return S;
370 }
371 }
372
373 return nullptr;
374}
375
376const Stmt *ExplodedNode::getPreviousStmtForDiagnostics() const {
377 for (const ExplodedNode *N = getFirstPred(); N; N = N->getFirstPred())
378 if (const Stmt *S = N->getStmtForDiagnostics(); S && !isa<CompoundStmt>(Val: S))
379 return S;
380
381 return nullptr;
382}
383
384const Stmt *ExplodedNode::getCurrentOrPreviousStmtForDiagnostics() const {
385 if (const Stmt *S = getStmtForDiagnostics())
386 return S;
387
388 return getPreviousStmtForDiagnostics();
389}
390
391ExplodedNode *ExplodedGraph::getNode(const ProgramPoint &L,
392 ProgramStateRef State,
393 bool IsSink,
394 bool* IsNew) {
395 // Profile 'State' to determine if we already have an existing node.
396 llvm::FoldingSetNodeID profile;
397 void *InsertPos = nullptr;
398
399 NodeTy::Profile(ID&: profile, Loc: L, state: State, IsSink);
400 NodeTy* V = Nodes.FindNodeOrInsertPos(ID: profile, InsertPos);
401
402 if (!V) {
403 if (!FreeNodes.empty()) {
404 V = FreeNodes.back();
405 FreeNodes.pop_back();
406 }
407 else {
408 // Allocate a new node.
409 V = getAllocator().Allocate<NodeTy>();
410 }
411
412 ++NumNodes;
413 new (V) NodeTy(L, State, NumNodes, IsSink);
414
415 if (ReclaimNodeInterval)
416 ChangedNodes.push_back(x: V);
417
418 // Insert the node into the node set and return it.
419 Nodes.InsertNode(N: V, InsertPos);
420
421 if (IsNew) *IsNew = true;
422 }
423 else
424 if (IsNew) *IsNew = false;
425
426 return V;
427}
428
429ExplodedNode *ExplodedGraph::createUncachedNode(const ProgramPoint &L,
430 ProgramStateRef State,
431 int64_t Id,
432 bool IsSink) {
433 NodeTy *V = getAllocator().Allocate<NodeTy>();
434 new (V) NodeTy(L, State, Id, IsSink);
435 return V;
436}
437
438std::unique_ptr<ExplodedGraph>
439ExplodedGraph::trim(ArrayRef<const NodeTy *> Sinks,
440 InterExplodedGraphMap *ForwardMap,
441 InterExplodedGraphMap *InverseMap) const {
442 // FIXME: The two-pass algorithm of this function (which was introduced in
443 // 2008) is terribly overcomplicated and should be replaced by a single
444 // (backward) pass.
445
446 if (Nodes.empty())
447 return nullptr;
448
449 using Pass1Ty = llvm::DenseSet<const ExplodedNode *>;
450 Pass1Ty Pass1;
451
452 using Pass2Ty = InterExplodedGraphMap;
453 InterExplodedGraphMap Pass2Scratch;
454 Pass2Ty &Pass2 = ForwardMap ? *ForwardMap : Pass2Scratch;
455
456 SmallVector<const ExplodedNode*, 10> WL1, WL2;
457
458 // ===- Pass 1 (reverse DFS) -===
459 for (const auto Sink : Sinks)
460 if (Sink)
461 WL1.push_back(Elt: Sink);
462
463 // Process the first worklist until it is empty.
464 while (!WL1.empty()) {
465 const ExplodedNode *N = WL1.pop_back_val();
466
467 // Have we already visited this node? If so, continue to the next one.
468 if (!Pass1.insert(V: N).second)
469 continue;
470
471 // If this is the root enqueue it to the second worklist.
472 if (N->Preds.empty()) {
473 assert(N == getRoot() && "Found non-root node with no predecessors!");
474 WL2.push_back(Elt: N);
475 continue;
476 }
477
478 // Visit our predecessors and enqueue them.
479 WL1.append(in_start: N->Preds.begin(), in_end: N->Preds.end());
480 }
481
482 // We didn't hit the root? Return with a null pointer for the new graph.
483 if (WL2.empty())
484 return nullptr;
485
486 assert(WL2.size() == 1 && "There must be only one root!");
487
488 // Create an empty graph.
489 std::unique_ptr<ExplodedGraph> G = std::make_unique<ExplodedGraph>();
490
491 // ===- Pass 2 (forward DFS to construct the new graph) -===
492 while (!WL2.empty()) {
493 const ExplodedNode *N = WL2.pop_back_val();
494
495 auto [Place, Inserted] = Pass2.try_emplace(Key: N);
496
497 // Skip this node if we have already processed it.
498 if (!Inserted)
499 continue;
500
501 // Create the corresponding node in the new graph and record the mapping
502 // from the old node to the new node.
503 ExplodedNode *NewN = G->createUncachedNode(L: N->getLocation(), State: N->State,
504 Id: N->getID(), IsSink: N->isSink());
505 Place->second = NewN;
506
507 // Also record the reverse mapping from the new node to the old node.
508 if (InverseMap) (*InverseMap)[NewN] = N;
509
510 // If this node is the root, designate it as such in the graph.
511 if (N->Preds.empty()) {
512 assert(N == getRoot());
513 G->designateAsRoot(V: NewN);
514 }
515
516 // In the case that some of the intended predecessors of NewN have already
517 // been created, we should hook them up as predecessors.
518
519 // Walk through the predecessors of 'N' and hook up their corresponding
520 // nodes in the new graph (if any) to the freshly created node.
521 for (const ExplodedNode *Pred : N->Preds) {
522 Pass2Ty::iterator PI = Pass2.find(Val: Pred);
523 if (PI == Pass2.end())
524 continue;
525
526 NewN->addPredecessor(V: const_cast<ExplodedNode *>(PI->second), G&: *G);
527 }
528
529 // In the case that some of the intended successors of NewN have already
530 // been created, we should hook them up as successors. Otherwise, enqueue
531 // the new nodes from the original graph that should have nodes created
532 // in the new graph.
533 for (const ExplodedNode *Succ : N->Succs) {
534 Pass2Ty::iterator PI = Pass2.find(Val: Succ);
535 if (PI != Pass2.end()) {
536 const_cast<ExplodedNode *>(PI->second)->addPredecessor(V: NewN, G&: *G);
537 continue;
538 }
539
540 // Enqueue nodes to the worklist that were marked during pass 1.
541 if (Pass1.count(V: Succ))
542 WL2.push_back(Elt: Succ);
543 }
544 }
545
546 return G;
547}
548