1//===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DataflowAnalysisContext class that owns objects that
10// encompass the state of a program and stores context that is used during
11// dataflow analysis.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
16#include "clang/AST/Type.h"
17#include "clang/Analysis/FlowSensitive/ASTOps.h"
18#include "clang/Analysis/FlowSensitive/Formula.h"
19#include "clang/Analysis/FlowSensitive/Logger.h"
20#include "clang/Analysis/FlowSensitive/SimplifyConstraints.h"
21#include "clang/Analysis/FlowSensitive/Value.h"
22#include "clang/Basic/LLVM.h"
23#include "llvm/ADT/DenseSet.h"
24#include "llvm/ADT/SetOperations.h"
25#include "llvm/ADT/SetVector.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/Debug.h"
28#include "llvm/Support/FileSystem.h"
29#include "llvm/Support/Path.h"
30#include "llvm/Support/raw_ostream.h"
31#include <cassert>
32#include <memory>
33#include <stack>
34#include <string>
35#include <utility>
36#include <vector>
37
38static llvm::cl::opt<std::string> DataflowLog(
39 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
40 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
41 "log to stderr. With an arg, writes HTML logs under the "
42 "specified directory (one per analyzed function)."));
43
44namespace clang {
45namespace dataflow {
46
47FieldSet DataflowAnalysisContext::computeModeledFields(QualType Type) {
48 // During context-sensitive analysis, a struct may be allocated in one
49 // function, but its field accessed in a function lower in the stack than
50 // the allocation. Since we only collect fields used in the function where
51 // the allocation occurs, we can't apply that filter when performing
52 // context-sensitive analysis. But, this only applies to storage locations,
53 // since field access it not allowed to fail. In contrast, field *values*
54 // don't need this allowance, since the API allows for uninitialized fields.
55 if (Opts.ContextSensitiveOpts)
56 return getObjectFields(Type);
57
58 return llvm::set_intersection(S1: getObjectFields(Type), S2: ModeledFields);
59}
60
61const FieldSet &DataflowAnalysisContext::getModeledFields(QualType Type) {
62 QualType CanonicalType = Type.getCanonicalType().getUnqualifiedType();
63 std::unique_ptr<FieldSet> &Fields = CachedModeledFields[CanonicalType];
64 if (Fields == nullptr)
65 Fields = std::make_unique<FieldSet>(args: computeModeledFields(Type: CanonicalType));
66 return *Fields;
67}
68
69void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) {
70 ModeledFields.set_union(Fields);
71 CachedModeledFields.clear();
72}
73
74StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) {
75 if (!Type.isNull() && Type->isRecordType()) {
76 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
77 for (const FieldDecl *Field : getModeledFields(Type))
78 if (Field->getType()->isReferenceType())
79 FieldLocs.insert(KV: {Field, nullptr});
80 else
81 FieldLocs.insert(KV: {Field, &createStorageLocation(
82 Type: Field->getType().getNonReferenceType())});
83
84 RecordStorageLocation::SyntheticFieldMap SyntheticFields;
85 for (const auto &Entry : getSyntheticFields(Type))
86 SyntheticFields.insert(
87 KV: {Entry.getKey(),
88 &createStorageLocation(Type: Entry.getValue().getNonReferenceType())});
89
90 return createRecordStorageLocation(Type, FieldLocs: std::move(FieldLocs),
91 SyntheticFields: std::move(SyntheticFields));
92 }
93 return arena().create<ScalarStorageLocation>(args&: Type);
94}
95
96// Returns the keys for a given `StringMap`.
97// Can't use `StringSet` as the return type as it doesn't support `operator==`.
98template <typename T>
99[[maybe_unused]] static llvm::DenseSet<llvm::StringRef>
100getKeys(const llvm::StringMap<T> &Map) {
101 return llvm::DenseSet<llvm::StringRef>(llvm::from_range, Map.keys());
102}
103
104RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation(
105 QualType Type, RecordStorageLocation::FieldToLoc FieldLocs,
106 RecordStorageLocation::SyntheticFieldMap SyntheticFields) {
107 assert(Type->isRecordType());
108 assert(containsSameFields(getModeledFields(Type), FieldLocs));
109 assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields));
110
111 RecordStorageLocationCreated = true;
112 return arena().create<RecordStorageLocation>(args&: Type, args: std::move(FieldLocs),
113 args: std::move(SyntheticFields));
114}
115
116StorageLocation &
117DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) {
118 if (auto *Loc = DeclToLoc.lookup(Val: &D))
119 return *Loc;
120 auto &Loc = createStorageLocation(Type: D.getType().getNonReferenceType());
121 DeclToLoc[&D] = &Loc;
122 return Loc;
123}
124
125StorageLocation &
126DataflowAnalysisContext::getStableStorageLocation(const Expr &E) {
127 const Expr &CanonE = ignoreCFGOmittedNodes(E);
128
129 if (auto *Loc = ExprToLoc.lookup(Val: &CanonE))
130 return *Loc;
131 auto &Loc = createStorageLocation(Type: CanonE.getType());
132 ExprToLoc[&CanonE] = &Loc;
133 return Loc;
134}
135
136PointerValue &
137DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
138 auto CanonicalPointeeType =
139 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
140 auto Res = NullPointerVals.try_emplace(Key: CanonicalPointeeType, Args: nullptr);
141 if (Res.second) {
142 auto &PointeeLoc = createStorageLocation(Type: CanonicalPointeeType);
143 Res.first->second = &arena().create<PointerValue>(args&: PointeeLoc);
144 }
145 return *Res.first->second;
146}
147
148void DataflowAnalysisContext::addInvariant(const Formula &Constraint) {
149 if (Invariant == nullptr)
150 Invariant = &Constraint;
151 else
152 Invariant = &arena().makeAnd(LHS: *Invariant, RHS: Constraint);
153}
154
155void DataflowAnalysisContext::addFlowConditionConstraint(
156 Atom Token, const Formula &Constraint) {
157 auto Res = FlowConditionConstraints.try_emplace(Key: Token, Args: &Constraint);
158 if (!Res.second) {
159 Res.first->second =
160 &arena().makeAnd(LHS: *Res.first->second, RHS: Constraint);
161 }
162}
163
164Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
165 Atom ForkToken = arena().makeFlowConditionToken();
166 FlowConditionDeps[ForkToken].insert(V: Token);
167 addFlowConditionConstraint(Token: ForkToken, Constraint: arena().makeAtomRef(A: Token));
168 return ForkToken;
169}
170
171Atom
172DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
173 Atom SecondToken) {
174 Atom Token = arena().makeFlowConditionToken();
175 auto &TokenDeps = FlowConditionDeps[Token];
176 TokenDeps.insert(V: FirstToken);
177 TokenDeps.insert(V: SecondToken);
178 addFlowConditionConstraint(Token,
179 Constraint: arena().makeOr(LHS: arena().makeAtomRef(A: FirstToken),
180 RHS: arena().makeAtomRef(A: SecondToken)));
181 return Token;
182}
183
184Solver::Result DataflowAnalysisContext::querySolver(
185 llvm::SetVector<const Formula *> Constraints) {
186 return S.solve(Vals: Constraints.getArrayRef());
187}
188
189bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
190 const Formula &F) {
191 if (F.isLiteral(b: true))
192 return true;
193
194 // Returns true if and only if truth assignment of the flow condition implies
195 // that `F` is also true. We prove whether or not this property holds by
196 // reducing the problem to satisfiability checking. In other words, we attempt
197 // to show that assuming `F` is false makes the constraints induced by the
198 // flow condition unsatisfiable.
199 llvm::SetVector<const Formula *> Constraints;
200 Constraints.insert(X: &arena().makeAtomRef(A: Token));
201 Constraints.insert(X: &arena().makeNot(Val: F));
202 addTransitiveFlowConditionConstraints(Token, Out&: Constraints);
203 return isUnsatisfiable(Constraints: std::move(Constraints));
204}
205
206bool DataflowAnalysisContext::flowConditionAllows(Atom Token,
207 const Formula &F) {
208 if (F.isLiteral(b: false))
209 return false;
210
211 llvm::SetVector<const Formula *> Constraints;
212 Constraints.insert(X: &arena().makeAtomRef(A: Token));
213 Constraints.insert(X: &F);
214 addTransitiveFlowConditionConstraints(Token, Out&: Constraints);
215 return isSatisfiable(Constraints: std::move(Constraints));
216}
217
218bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
219 const Formula &Val2) {
220 llvm::SetVector<const Formula *> Constraints;
221 Constraints.insert(X: &arena().makeNot(Val: arena().makeEquals(LHS: Val1, RHS: Val2)));
222 return isUnsatisfiable(Constraints: std::move(Constraints));
223}
224
225llvm::DenseSet<Atom> DataflowAnalysisContext::collectDependencies(
226 llvm::DenseSet<Atom> Tokens) const {
227 // Use a worklist algorithm, with `Remaining` holding the worklist and
228 // `Tokens` tracking which atoms have already been added to the worklist.
229 std::vector<Atom> Remaining(Tokens.begin(), Tokens.end());
230 while (!Remaining.empty()) {
231 Atom CurrentToken = Remaining.back();
232 Remaining.pop_back();
233 if (auto DepsIt = FlowConditionDeps.find(Val: CurrentToken);
234 DepsIt != FlowConditionDeps.end())
235 for (Atom A : DepsIt->second)
236 if (Tokens.insert(V: A).second)
237 Remaining.push_back(x: A);
238 }
239
240 return Tokens;
241}
242
243void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
244 Atom Token, llvm::SetVector<const Formula *> &Constraints) {
245 llvm::DenseSet<Atom> AddedTokens;
246 std::vector<Atom> Remaining = {Token};
247
248 if (Invariant)
249 Constraints.insert(X: Invariant);
250 // Define all the flow conditions that might be referenced in constraints.
251 while (!Remaining.empty()) {
252 auto Token = Remaining.back();
253 Remaining.pop_back();
254 if (!AddedTokens.insert(V: Token).second)
255 continue;
256
257 auto ConstraintsIt = FlowConditionConstraints.find(Val: Token);
258 if (ConstraintsIt == FlowConditionConstraints.end()) {
259 // The flow condition is unconstrained. Just add the atom directly, which
260 // is equivalent to asserting it is true.
261 Constraints.insert(X: &arena().makeAtomRef(A: Token));
262 } else {
263 // Bind flow condition token via `iff` to its set of constraints:
264 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
265 Constraints.insert(X: &arena().makeEquals(LHS: arena().makeAtomRef(A: Token),
266 RHS: *ConstraintsIt->second));
267 }
268
269 if (auto DepsIt = FlowConditionDeps.find(Val: Token);
270 DepsIt != FlowConditionDeps.end())
271 for (Atom A : DepsIt->second)
272 Remaining.push_back(x: A);
273 }
274}
275
276static void getReferencedAtoms(const Formula &F,
277 llvm::DenseSet<dataflow::Atom> &Refs) {
278 // Avoid recursion to avoid stack overflows from very large formulas.
279 // The shape of the tree structure for very large formulas is such that there
280 // are at most 2 children from any node, but there may be many generations.
281 std::stack<const Formula *> WorkList;
282 WorkList.push(x: &F);
283
284 while (!WorkList.empty()) {
285 const Formula *Current = WorkList.top();
286 WorkList.pop();
287 switch (Current->kind()) {
288 case Formula::AtomRef:
289 Refs.insert(V: Current->getAtom());
290 break;
291 case Formula::Literal:
292 break;
293 case Formula::Not:
294 WorkList.push(x: Current->operands()[0]);
295 break;
296 case Formula::And:
297 case Formula::Or:
298 case Formula::Implies:
299 case Formula::Equal:
300 ArrayRef<const Formula *> Operands = Current->operands();
301 WorkList.push(x: Operands[0]);
302 WorkList.push(x: Operands[1]);
303 break;
304 }
305 }
306}
307
308SimpleLogicalContext DataflowAnalysisContext::exportLogicalContext(
309 llvm::DenseSet<dataflow::Atom> TargetTokens) const {
310 SimpleLogicalContext LC;
311
312 // Copy `Invariant` even if it is null, to initialize the field.
313 LC.Invariant = Invariant;
314 if (Invariant != nullptr)
315 getReferencedAtoms(F: *Invariant, Refs&: TargetTokens);
316
317 llvm::DenseSet<dataflow::Atom> Dependencies =
318 collectDependencies(Tokens: std::move(TargetTokens));
319
320 for (dataflow::Atom Token : Dependencies) {
321 // Only process the token if it is constrained. Unconstrained tokens don't
322 // have dependencies.
323 const Formula *Constraints = FlowConditionConstraints.lookup(Val: Token);
324 if (Constraints == nullptr)
325 continue;
326 LC.TokenDefs[Token] = Constraints;
327
328 if (auto DepsIt = FlowConditionDeps.find(Val: Token);
329 DepsIt != FlowConditionDeps.end())
330 LC.TokenDeps[Token] = DepsIt->second;
331 }
332
333 return LC;
334}
335
336void DataflowAnalysisContext::initLogicalContext(SimpleLogicalContext LC) {
337 Invariant = LC.Invariant;
338 FlowConditionConstraints = std::move(LC.TokenDefs);
339 // TODO: The dependencies in `LC.TokenDeps` can be reconstructed from
340 // `LC.TokenDefs`. Give the caller the option to reconstruct, rather than
341 // providing them directly, to save caller space (memory/disk).
342 FlowConditionDeps = std::move(LC.TokenDeps);
343}
344
345static void printAtomList(const llvm::SmallVector<Atom> &Atoms,
346 llvm::raw_ostream &OS) {
347 OS << "(";
348 for (size_t i = 0; i < Atoms.size(); ++i) {
349 OS << Atoms[i];
350 if (i + 1 < Atoms.size())
351 OS << ", ";
352 }
353 OS << ")\n";
354}
355
356void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
357 llvm::raw_ostream &OS) {
358 llvm::SetVector<const Formula *> Constraints;
359 Constraints.insert(X: &arena().makeAtomRef(A: Token));
360 addTransitiveFlowConditionConstraints(Token, Constraints);
361
362 OS << "Flow condition token: " << Token << "\n";
363 SimplifyConstraintsInfo Info;
364 llvm::SetVector<const Formula *> OriginalConstraints = Constraints;
365 simplifyConstraints(Constraints, arena&: arena(), Info: &Info);
366 if (!Constraints.empty()) {
367 OS << "Constraints:\n";
368 for (const auto *Constraint : Constraints) {
369 Constraint->print(OS);
370 OS << "\n";
371 }
372 }
373 if (!Info.TrueAtoms.empty()) {
374 OS << "True atoms: ";
375 printAtomList(Atoms: Info.TrueAtoms, OS);
376 }
377 if (!Info.FalseAtoms.empty()) {
378 OS << "False atoms: ";
379 printAtomList(Atoms: Info.FalseAtoms, OS);
380 }
381 if (!Info.EquivalentAtoms.empty()) {
382 OS << "Equivalent atoms:\n";
383 for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms)
384 printAtomList(Atoms: Class, OS);
385 }
386
387 OS << "\nFlow condition constraints before simplification:\n";
388 for (const auto *Constraint : OriginalConstraints) {
389 Constraint->print(OS);
390 OS << "\n";
391 }
392}
393
394const AdornedCFG *
395DataflowAnalysisContext::getAdornedCFG(const FunctionDecl *F) {
396 // Canonicalize the key:
397 F = F->getDefinition();
398 if (F == nullptr)
399 return nullptr;
400 auto It = FunctionContexts.find(Val: F);
401 if (It != FunctionContexts.end())
402 return &It->second;
403
404 if (F->doesThisDeclarationHaveABody()) {
405 auto ACFG = AdornedCFG::build(Func: *F);
406 // FIXME: Handle errors.
407 assert(ACFG);
408 auto Result = FunctionContexts.insert(KV: {F, std::move(*ACFG)});
409 return &Result.first->second;
410 }
411
412 return nullptr;
413}
414
415static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
416 if (DataflowLog.empty())
417 return Logger::textual(llvm::errs());
418
419 llvm::StringRef Dir = DataflowLog;
420 if (auto EC = llvm::sys::fs::create_directories(path: Dir))
421 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
422 // All analysis runs within a process will log to the same directory.
423 // Share a counter so they don't all overwrite each other's 0.html.
424 // (Don't share a logger, it's not threadsafe).
425 static std::atomic<unsigned> Counter = {0};
426 auto StreamFactory =
427 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
428 llvm::SmallString<256> File(Dir);
429 llvm::sys::path::append(path&: File,
430 a: std::to_string(val: Counter.fetch_add(i: 1)) + ".html");
431 std::error_code EC;
432 auto OS = std::make_unique<llvm::raw_fd_ostream>(args&: File, args&: EC);
433 if (EC) {
434 llvm::errs() << "Failed to create log " << File << ": " << EC.message()
435 << "\n";
436 return std::make_unique<llvm::raw_null_ostream>();
437 }
438 return OS;
439 };
440 return Logger::html(std::move(StreamFactory));
441}
442
443DataflowAnalysisContext::DataflowAnalysisContext(
444 Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts)
445 : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()),
446 Opts(Opts) {
447 // If the -dataflow-log command-line flag was set, synthesize a logger.
448 // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
449 // based tools.
450 if (Opts.Log == nullptr) {
451 if (DataflowLog.getNumOccurrences() > 0) {
452 LogOwner = makeLoggerFromCommandLine();
453 this->Opts.Log = LogOwner.get();
454 // FIXME: if the flag is given a value, write an HTML log to a file.
455 } else {
456 this->Opts.Log = &Logger::null();
457 }
458 }
459}
460
461DataflowAnalysisContext::~DataflowAnalysisContext() = default;
462
463} // namespace dataflow
464} // namespace clang
465