ExplodedGraph.cpp source code [llvm_projects/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp]

1	//===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the template classes ExplodedNode and ExplodedGraph,
10	// which represent a path-sensitive, intra-procedural "exploded graph."
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
15	#include "clang/AST/Expr.h"
16	#include "clang/AST/ExprObjC.h"
17	#include "clang/AST/ParentMap.h"
18	#include "clang/AST/Stmt.h"
19	#include "clang/Analysis/ProgramPoint.h"
20	#include "clang/Analysis/Support/BumpVector.h"
21	#include "clang/Basic/LLVM.h"
22	#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
23	#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
24	#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
25	#include "llvm/ADT/DenseSet.h"
26	#include "llvm/ADT/FoldingSet.h"
27	#include "llvm/ADT/PointerUnion.h"
28	#include <cassert>
29	#include <memory>
30	#include <optional>
31
32	using namespace clang;
33	using namespace ento;
34
35	//===----------------------------------------------------------------------===//
36	// Cleanup.
37	//===----------------------------------------------------------------------===//
38
39	ExplodedGraph::ExplodedGraph() = default;
40
41	ExplodedGraph::~ExplodedGraph() = default;
42
43	//===----------------------------------------------------------------------===//
44	// Node reclamation.
45	//===----------------------------------------------------------------------===//
46
47	bool ExplodedGraph::isInterestingLValueExpr(const Expr *Ex) {
48	if (!Ex->isLValue())
49	return false;
50	return isa<DeclRefExpr, MemberExpr, ObjCIvarRefExpr, ArraySubscriptExpr>(Val: Ex);
51	}
52
53	bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
54	// First, we only consider nodes for reclamation of the following
55	// conditions apply:
56	//
57	// (1) 1 predecessor (that has one successor)
58	// (2) 1 successor (that has one predecessor)
59	//
60	// If a node has no successor it is on the "frontier", while a node
61	// with no predecessor is a root.
62	//
63	// After these prerequisites, we discard all "filler" nodes that
64	// are used only for intermediate processing, and are not essential
65	// for analyzer history:
66	//
67	// (a) PreStmtPurgeDeadSymbols
68	//
69	// We then discard all other nodes where all* of the following conditions*
70	// apply:
71	//
72	// (3) The ProgramPoint is for a PostStmt, but not a PostStore.
73	// (4) There is no 'tag' for the ProgramPoint.
74	// (5) The 'store' is the same as the predecessor.
75	// (6) The 'GDM' is the same as the predecessor.
76	// (7) The LocationContext is the same as the predecessor.
77	// (8) Expressions that are not* lvalue expressions.*
78	// (9) The PostStmt isn't for a non-consumed Stmt or Expr.
79	// (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or
80	// PreImplicitCall (so that we would be able to find it when retrying a
81	// call with no inlining).
82	// FIXME: It may be safe to reclaim PreCall and PostCall nodes as well.
83
84	// Conditions 1 and 2.
85	if (node->pred_size() != `1` \|\| node->succ_size() != `1`)
86	return false;
87
88	const ExplodedNode pred = (node->pred_begin());
89	if (pred->succ_size() != `1`)
90	return false;
91
92	const ExplodedNode succ = (node->succ_begin());
93	if (succ->pred_size() != `1`)
94	return false;
95
96	// Now reclaim any nodes that are (by definition) not essential to
97	// analysis history and are not consulted by any client code.
98	ProgramPoint progPoint = node->getLocation();
99	if (progPoint.getAs<PreStmtPurgeDeadSymbols>())
100	return !progPoint.getTag();
101
102	// Condition 3.
103	if (!progPoint.getAs<PostStmt>() \|\| progPoint.getAs<PostStore>())
104	return false;
105
106	// Condition 4.
107	if (progPoint.getTag())
108	return false;
109
110	// Conditions 5, 6, and 7.
111	ProgramStateRef state = node->getState();
112	ProgramStateRef pred_state = pred->getState();
113	if (state ->store != pred_state ->store \|\| state ->GDM != pred_state ->GDM \|\|
114	progPoint.getLocationContext() != pred->getLocationContext())
115	return false;
116
117	// All further checks require expressions. As per #3, we know that we have
118	// a PostStmt.
119	const Expr *Ex = dyn_cast<Expr>(Val: progPoint.castAs<PostStmt>().getStmt());
120	if (!Ex)
121	return false;
122
123	// Condition 8.
124	// Do not collect nodes for "interesting" lvalue expressions since they are
125	// used extensively for generating path diagnostics.
126	if (isInterestingLValueExpr(Ex))
127	return false;
128
129	// Condition 9.
130	// Do not collect nodes for non-consumed Stmt or Expr to ensure precise
131	// diagnostic generation; specifically, so that we could anchor arrows
132	// pointing to the beginning of statements (as written in code).
133	const ParentMap &PM = progPoint.getLocationContext()->getParentMap();
134	if (!PM.isConsumedExpr(E: Ex))
135	return false;
136
137	// Condition 10.
138	const ProgramPoint SuccLoc = succ->getLocation();
139	if (std::optional<StmtPoint> SP = SuccLoc.getAs<StmtPoint>())
140	if (CallEvent::isCallStmt(S: SP ->getStmt()))
141	return false;
142
143	// Condition 10, continuation.
144	if (SuccLoc.getAs<CallEnter>() \|\| SuccLoc.getAs<PreImplicitCall>())
145	return false;
146
147	return true;
148	}
149
150	void ExplodedGraph::collectNode(ExplodedNode *node) {
151	// Removing a node means:
152	// (a) changing the predecessors successor to the successor of this node
153	// (b) changing the successors predecessor to the predecessor of this node
154	// (c) Putting 'node' onto freeNodes.
155	assert(node->pred_size() == `1` \|\| node->succ_size() == `1`);
156	ExplodedNode pred = (node->pred_begin());
157	ExplodedNode succ = (node->succ_begin());
158	pred->replaceSuccessor(node: succ);
159	succ->replacePredecessor(node: pred);
160	FreeNodes.push_back(x: node);
161	Nodes.RemoveNode(N: node);
162	--NumNodes;
163	node->~ExplodedNode();
164	}
165
166	void ExplodedGraph::reclaimRecentlyAllocatedNodes() {
167	if (ChangedNodes.empty())
168	return;
169
170	// Only periodically reclaim nodes so that we can build up a set of
171	// nodes that meet the reclamation criteria. Freshly created nodes
172	// by definition have no successor, and thus cannot be reclaimed (see below).
173	assert(ReclaimCounter > `0`);
174	if (--ReclaimCounter != `0`)
175	return;
176	ReclaimCounter = ReclaimNodeInterval;
177
178	for (const auto node : ChangedNodes)
179	if (shouldCollect(node))
180	collectNode(node);
181	ChangedNodes.clear();
182	}
183
184	//===----------------------------------------------------------------------===//
185	// ExplodedNode.
186	//===----------------------------------------------------------------------===//
187
188	// An NodeGroup's storage type is actually very much like a TinyPtrVector:
189	// it can be either a pointer to a single ExplodedNode, or a pointer to a
190	// BumpVector allocated with the ExplodedGraph's allocator. This allows the
191	// common case of single-node NodeGroups to be implemented with no extra memory.
192	//
193	// Consequently, each of the NodeGroup methods have up to four cases to handle:
194	// 1. The flag is set and this group does not actually contain any nodes.
195	// 2. The group is empty, in which case the storage value is null.
196	// 3. The group contains a single node.
197	// 4. The group contains more than one node.
198	using ExplodedNodeVector = BumpVector<ExplodedNode *>;
199	using GroupStorage = llvm::PointerUnion<ExplodedNode , ExplodedNodeVector >;
200
201	void ExplodedNode::addPredecessor(ExplodedNode *V, ExplodedGraph &G) {
202	assert(!V->isSink());
203	Preds.addNode(N: V, G);
204	V->Succs.addNode(N: this, G);
205	}
206
207	void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) {
208	assert(!getFlag());
209
210	GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
211	assert(isa<ExplodedNode *>(Storage));
212	Storage = node;
213	assert(isa<ExplodedNode *>(Storage));
214	}
215
216	void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) {
217	assert(!getFlag());
218
219	GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
220	if (Storage.isNull()) {
221	Storage = N;
222	assert(isa<ExplodedNode *>(Storage));
223	return;
224	}
225
226	ExplodedNodeVector V = dyn_cast<ExplodedNodeVector >(Val&: Storage);
227
228	if (!V) {
229	// Switch from single-node to multi-node representation.
230	auto Old = cast<ExplodedNode >(Val&: Storage);
231
232	BumpVectorContext &Ctx = G.getNodeAllocator();
233	V = new (G.getAllocator()) ExplodedNodeVector (Ctx, `4`);
234	V->push_back(Elt: Old, C&: Ctx);
235
236	Storage = V;
237	assert(!getFlag());
238	assert(isa<ExplodedNodeVector *>(Storage));
239	}
240
241	V->push_back(Elt: N, C&: G.getNodeAllocator());
242	}
243
244	unsigned ExplodedNode::NodeGroup::size() const {
245	if (getFlag())
246	return `0`;
247
248	const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
249	if (Storage.isNull())
250	return `0`;
251	if (ExplodedNodeVector V = dyn_cast<ExplodedNodeVector >(Val: Storage))
252	return V->size();
253	return `1`;
254	}
255
256	ExplodedNode * const ExplodedNode::NodeGroup::begin() const* {
257	if (getFlag())
258	return nullptr;
259
260	const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
261	if (Storage.isNull())
262	return nullptr;
263	if (ExplodedNodeVector V = dyn_cast<ExplodedNodeVector >(Val: Storage))
264	return V->begin();
265	return Storage.getAddrOfPtr1();
266	}
267
268	ExplodedNode * const ExplodedNode::NodeGroup::end() const* {
269	if (getFlag())
270	return nullptr;
271
272	const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
273	if (Storage.isNull())
274	return nullptr;
275	if (ExplodedNodeVector V = dyn_cast<ExplodedNodeVector >(Val: Storage))
276	return V->end();
277	return Storage.getAddrOfPtr1() + `1`;
278	}
279
280	bool ExplodedNode::isTrivial() const {
281	return pred_size() == `1` && succ_size() == `1` &&
282	getFirstPred()->getState()->getID() == getState()->getID() &&
283	getFirstPred()->succ_size() == `1`;
284	}
285
286	const CFGBlock ExplodedNode::getCFGBlock() const* {
287	ProgramPoint P = getLocation();
288	if (auto BEP = P.getAs<BlockEntrance>())
289	return BEP ->getBlock();
290
291	// Find the node's current statement in the CFG.
292	// FIXME: getStmtForDiagnostics() does nasty things in order to provide
293	// a valid statement for body farms, do we need this behavior here?
294	if (const Stmt *S = getStmtForDiagnostics())
295	return getLocationContext()
296	->getAnalysisDeclContext()
297	->getCFGStmtMap()
298	->getBlock(S);
299
300	return nullptr;
301	}
302
303	static const LocationContext *
304	findTopAutosynthesizedParentContext(const LocationContext *LC) {
305	assert(LC->getAnalysisDeclContext()->isBodyAutosynthesized());
306	const LocationContext *ParentLC = LC->getParent();
307	assert(ParentLC && "We don't start analysis from autosynthesized code");
308	while (ParentLC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
309	LC = ParentLC;
310	ParentLC = LC->getParent();
311	assert(ParentLC && "We don't start analysis from autosynthesized code");
312	}
313	return LC;
314	}
315
316	const Stmt ExplodedNode::getStmtForDiagnostics() const* {
317	// We cannot place diagnostics on autosynthesized code.
318	// Put them onto the call site through which we jumped into autosynthesized
319	// code for the first time.
320	const LocationContext *LC = getLocationContext();
321	if (LC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
322	// It must be a stack frame because we only autosynthesize functions.
323	return cast<StackFrameContext>(Val: findTopAutosynthesizedParentContext(LC))
324	->getCallSite();
325	}
326	// Otherwise, see if the node's program point directly points to a statement.
327	// FIXME: Refactor into a ProgramPoint method?
328	ProgramPoint P = getLocation();
329	if (auto SP = P.getAs<StmtPoint>())
330	return SP ->getStmt();
331	if (auto BE = P.getAs<BlockEdge>())
332	return BE ->getSrc()->getTerminatorStmt();
333	if (auto CE = P.getAs<CallEnter>())
334	return CE ->getCallExpr();
335	if (auto CEE = P.getAs<CallExitEnd>())
336	return CEE ->getCalleeContext()->getCallSite();
337	if (auto PIPP = P.getAs<PostInitializer>())
338	return PIPP ->getInitializer()->getInit();
339	if (auto CEB = P.getAs<CallExitBegin>())
340	return CEB ->getReturnStmt();
341	if (auto FEP = P.getAs<FunctionExitPoint>())
342	return FEP ->getStmt();
343
344	return nullptr;
345	}
346
347	const Stmt ExplodedNode::getNextStmtForDiagnostics() const* {
348	for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
349	if (N->getLocation().isPurgeKind())
350	continue;
351	if (const Stmt *S = N->getStmtForDiagnostics()) {
352	// Check if the statement is '?' or '&&'/'\|\|'. These are "merges",
353	// not actual statement points.
354	switch (S->getStmtClass()) {
355	case Stmt::ChooseExprClass:
356	case Stmt::BinaryConditionalOperatorClass:
357	case Stmt::ConditionalOperatorClass:
358	continue;
359	case Stmt::BinaryOperatorClass: {
360	BinaryOperatorKind Op = cast<BinaryOperator>(Val: S)->getOpcode();
361	if (Op == BO_LAnd \|\| Op == BO_LOr)
362	continue;
363	break;
364	}
365	default:
366	break;
367	}
368	// We found the statement, so return it.
369	return S;
370	}
371	}
372
373	return nullptr;
374	}
375
376	const Stmt ExplodedNode::getPreviousStmtForDiagnostics() const* {
377	for (const ExplodedNode *N = getFirstPred(); N; N = N->getFirstPred())
378	if (const Stmt *S = N->getStmtForDiagnostics(); S && !isa<CompoundStmt>(Val: S))
379	return S;
380
381	return nullptr;
382	}
383
384	const Stmt ExplodedNode::getCurrentOrPreviousStmtForDiagnostics() const* {
385	if (const Stmt *S = getStmtForDiagnostics())
386	return S;
387
388	return getPreviousStmtForDiagnostics();
389	}
390
391	ExplodedNode ExplodedGraph::getNode(const* ProgramPoint &L,
392	ProgramStateRef State,
393	bool IsSink,
394	bool* IsNew) {
395	// Profile 'State' to determine if we already have an existing node.
396	llvm::FoldingSetNodeID profile;
397	void InsertPos = nullptr*;
398
399	NodeTy::Profile(ID&: profile, Loc: L, state: State, IsSink);
400	NodeTy* V = Nodes.FindNodeOrInsertPos(ID: profile, InsertPos);
401
402	if (!V) {
403	if (!FreeNodes.empty()) {
404	V = FreeNodes.back();
405	FreeNodes.pop_back();
406	}
407	else {
408	// Allocate a new node.
409	V = getAllocator().Allocate<NodeTy>();
410	}
411
412	++NumNodes;
413	new (V) NodeTy (L, State, NumNodes, IsSink);
414
415	if (ReclaimNodeInterval)
416	ChangedNodes.push_back(x: V);
417
418	// Insert the node into the node set and return it.
419	Nodes.InsertNode(N: V, InsertPos);
420
421	if (IsNew) IsNew = true*;
422	}
423	else
424	if (IsNew) IsNew = false*;
425
426	return V;
427	}
428
429	ExplodedNode ExplodedGraph::createUncachedNode(const* ProgramPoint &L,
430	ProgramStateRef State,
431	int64_t Id,
432	bool IsSink) {
433	NodeTy *V = getAllocator().Allocate<NodeTy>();
434	new (V) NodeTy (L, State, Id, IsSink);
435	return V;
436	}
437
438	std::unique_ptr<ExplodedGraph>
439	ExplodedGraph::trim(ArrayRef<const NodeTy *> Sinks,
440	InterExplodedGraphMap *ForwardMap,
441	InterExplodedGraphMap InverseMap) const* {
442	// FIXME: The two-pass algorithm of this function (which was introduced in
443	// 2008) is terribly overcomplicated and should be replaced by a single
444	// (backward) pass.
445
446	if (Nodes.empty())
447	return nullptr;
448
449	using Pass1Ty = llvm::DenseSet<const ExplodedNode *>;
450	Pass1Ty Pass1;
451
452	using Pass2Ty = InterExplodedGraphMap;
453	InterExplodedGraphMap Pass2Scratch;
454	Pass2Ty &Pass2 = ForwardMap ? *ForwardMap : Pass2Scratch;
455
456	SmallVector<const ExplodedNode*, `10`> WL1, WL2;
457
458	// ===- Pass 1 (reverse DFS) -===
459	for (const auto Sink : Sinks)
460	if (Sink)
461	WL1.push_back(Elt: Sink);
462
463	// Process the first worklist until it is empty.
464	while (!WL1.empty()) {
465	const ExplodedNode *N = WL1.pop_back_val();
466
467	// Have we already visited this node? If so, continue to the next one.
468	if (!Pass1.insert(V: N).second)
469	continue;
470
471	// If this is the root enqueue it to the second worklist.
472	if (N->Preds.empty()) {
473	assert(N == getRoot() && "Found non-root node with no predecessors!");
474	WL2.push_back(Elt: N);
475	continue;
476	}
477
478	// Visit our predecessors and enqueue them.
479	WL1.append(in_start: N->Preds.begin(), in_end: N->Preds.end());
480	}
481
482	// We didn't hit the root? Return with a null pointer for the new graph.
483	if (WL2.empty())
484	return nullptr;
485
486	assert(WL2.size() == `1` && "There must be only one root!");
487
488	// Create an empty graph.
489	std::unique_ptr<ExplodedGraph> G = std::make_unique<ExplodedGraph>();
490
491	// ===- Pass 2 (forward DFS to construct the new graph) -===
492	while (!WL2.empty()) {
493	const ExplodedNode *N = WL2.pop_back_val();
494
495	auto [Place, Inserted] = Pass2.try_emplace(Key: N);
496
497	// Skip this node if we have already processed it.
498	if (!Inserted)
499	continue;
500
501	// Create the corresponding node in the new graph and record the mapping
502	// from the old node to the new node.
503	ExplodedNode *NewN = G ->createUncachedNode(L: N->getLocation(), State: N->State,
504	Id: N->getID(), IsSink: N->isSink());
505	Place ->second = NewN;
506
507	// Also record the reverse mapping from the new node to the old node.
508	if (InverseMap) (*InverseMap)[NewN] = N;
509
510	// If this node is the root, designate it as such in the graph.
511	if (N->Preds.empty()) {
512	assert(N == getRoot());
513	G ->designateAsRoot(V: NewN);
514	}
515
516	// In the case that some of the intended predecessors of NewN have already
517	// been created, we should hook them up as predecessors.
518
519	// Walk through the predecessors of 'N' and hook up their corresponding
520	// nodes in the new graph (if any) to the freshly created node.
521	for (const ExplodedNode *Pred : N->Preds) {
522	Pass2Ty::iterator PI = Pass2.find(Val: Pred);
523	if (PI == Pass2.end())
524	continue;
525
526	NewN->addPredecessor(V: const_cast<ExplodedNode >(PI ->second), G&: G);
527	}
528
529	// In the case that some of the intended successors of NewN have already
530	// been created, we should hook them up as successors. Otherwise, enqueue
531	// the new nodes from the original graph that should have nodes created
532	// in the new graph.
533	for (const ExplodedNode *Succ : N->Succs) {
534	Pass2Ty::iterator PI = Pass2.find(Val: Succ);
535	if (PI != Pass2.end()) {
536	const_cast<ExplodedNode >(PI ->second)->addPredecessor(V: NewN, G&: G);
537	continue;
538	}
539
540	// Enqueue nodes to the worklist that were marked during pass 1.
541	if (Pass1.count(V: Succ))
542	WL2.push_back(Elt: Succ);
543	}
544	}
545
546	return G;
547	}
548

Browse the source code of llvm_projects/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp