1//===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DataflowAnalysisContext class that owns objects that
10// encompass the state of a program and stores context that is used during
11// dataflow analysis.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
16#include "clang/Analysis/FlowSensitive/ASTOps.h"
17#include "clang/Analysis/FlowSensitive/Formula.h"
18#include "clang/Analysis/FlowSensitive/Logger.h"
19#include "clang/Analysis/FlowSensitive/SimplifyConstraints.h"
20#include "clang/Analysis/FlowSensitive/Value.h"
21#include "clang/Basic/LLVM.h"
22#include "llvm/ADT/DenseSet.h"
23#include "llvm/ADT/SetOperations.h"
24#include "llvm/ADT/SetVector.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/FileSystem.h"
28#include "llvm/Support/Path.h"
29#include "llvm/Support/raw_ostream.h"
30#include <cassert>
31#include <memory>
32#include <stack>
33#include <string>
34#include <utility>
35#include <vector>
36
37static llvm::cl::opt<std::string> DataflowLog(
38 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
39 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
40 "log to stderr. With an arg, writes HTML logs under the "
41 "specified directory (one per analyzed function)."));
42
43namespace clang {
44namespace dataflow {
45
46FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) {
47 // During context-sensitive analysis, a struct may be allocated in one
48 // function, but its field accessed in a function lower in the stack than
49 // the allocation. Since we only collect fields used in the function where
50 // the allocation occurs, we can't apply that filter when performing
51 // context-sensitive analysis. But, this only applies to storage locations,
52 // since field access it not allowed to fail. In contrast, field *values*
53 // don't need this allowance, since the API allows for uninitialized fields.
54 if (Opts.ContextSensitiveOpts)
55 return getObjectFields(Type);
56
57 return llvm::set_intersection(S1: getObjectFields(Type), S2: ModeledFields);
58}
59
60void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) {
61 ModeledFields.set_union(Fields);
62}
63
64StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) {
65 if (!Type.isNull() && Type->isRecordType()) {
66 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
67 for (const FieldDecl *Field : getModeledFields(Type))
68 if (Field->getType()->isReferenceType())
69 FieldLocs.insert(KV: {Field, nullptr});
70 else
71 FieldLocs.insert(KV: {Field, &createStorageLocation(
72 Type: Field->getType().getNonReferenceType())});
73
74 RecordStorageLocation::SyntheticFieldMap SyntheticFields;
75 for (const auto &Entry : getSyntheticFields(Type))
76 SyntheticFields.insert(
77 KV: {Entry.getKey(),
78 &createStorageLocation(Type: Entry.getValue().getNonReferenceType())});
79
80 return createRecordStorageLocation(Type, FieldLocs: std::move(FieldLocs),
81 SyntheticFields: std::move(SyntheticFields));
82 }
83 return arena().create<ScalarStorageLocation>(args&: Type);
84}
85
86// Returns the keys for a given `StringMap`.
87// Can't use `StringSet` as the return type as it doesn't support `operator==`.
88template <typename T>
89static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) {
90 return llvm::DenseSet<llvm::StringRef>(llvm::from_range, Map.keys());
91}
92
93RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation(
94 QualType Type, RecordStorageLocation::FieldToLoc FieldLocs,
95 RecordStorageLocation::SyntheticFieldMap SyntheticFields) {
96 assert(Type->isRecordType());
97 assert(containsSameFields(getModeledFields(Type), FieldLocs));
98 assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields));
99
100 RecordStorageLocationCreated = true;
101 return arena().create<RecordStorageLocation>(args&: Type, args: std::move(FieldLocs),
102 args: std::move(SyntheticFields));
103}
104
105StorageLocation &
106DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) {
107 if (auto *Loc = DeclToLoc.lookup(Val: &D))
108 return *Loc;
109 auto &Loc = createStorageLocation(Type: D.getType().getNonReferenceType());
110 DeclToLoc[&D] = &Loc;
111 return Loc;
112}
113
114StorageLocation &
115DataflowAnalysisContext::getStableStorageLocation(const Expr &E) {
116 const Expr &CanonE = ignoreCFGOmittedNodes(E);
117
118 if (auto *Loc = ExprToLoc.lookup(Val: &CanonE))
119 return *Loc;
120 auto &Loc = createStorageLocation(Type: CanonE.getType());
121 ExprToLoc[&CanonE] = &Loc;
122 return Loc;
123}
124
125PointerValue &
126DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
127 auto CanonicalPointeeType =
128 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
129 auto Res = NullPointerVals.try_emplace(Key: CanonicalPointeeType, Args: nullptr);
130 if (Res.second) {
131 auto &PointeeLoc = createStorageLocation(Type: CanonicalPointeeType);
132 Res.first->second = &arena().create<PointerValue>(args&: PointeeLoc);
133 }
134 return *Res.first->second;
135}
136
137void DataflowAnalysisContext::addInvariant(const Formula &Constraint) {
138 if (Invariant == nullptr)
139 Invariant = &Constraint;
140 else
141 Invariant = &arena().makeAnd(LHS: *Invariant, RHS: Constraint);
142}
143
144void DataflowAnalysisContext::addFlowConditionConstraint(
145 Atom Token, const Formula &Constraint) {
146 auto Res = FlowConditionConstraints.try_emplace(Key: Token, Args: &Constraint);
147 if (!Res.second) {
148 Res.first->second =
149 &arena().makeAnd(LHS: *Res.first->second, RHS: Constraint);
150 }
151}
152
153Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
154 Atom ForkToken = arena().makeFlowConditionToken();
155 FlowConditionDeps[ForkToken].insert(V: Token);
156 addFlowConditionConstraint(Token: ForkToken, Constraint: arena().makeAtomRef(A: Token));
157 return ForkToken;
158}
159
160Atom
161DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
162 Atom SecondToken) {
163 Atom Token = arena().makeFlowConditionToken();
164 auto &TokenDeps = FlowConditionDeps[Token];
165 TokenDeps.insert(V: FirstToken);
166 TokenDeps.insert(V: SecondToken);
167 addFlowConditionConstraint(Token,
168 Constraint: arena().makeOr(LHS: arena().makeAtomRef(A: FirstToken),
169 RHS: arena().makeAtomRef(A: SecondToken)));
170 return Token;
171}
172
173Solver::Result DataflowAnalysisContext::querySolver(
174 llvm::SetVector<const Formula *> Constraints) {
175 return S.solve(Vals: Constraints.getArrayRef());
176}
177
178bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
179 const Formula &F) {
180 if (F.isLiteral(b: true))
181 return true;
182
183 // Returns true if and only if truth assignment of the flow condition implies
184 // that `F` is also true. We prove whether or not this property holds by
185 // reducing the problem to satisfiability checking. In other words, we attempt
186 // to show that assuming `F` is false makes the constraints induced by the
187 // flow condition unsatisfiable.
188 llvm::SetVector<const Formula *> Constraints;
189 Constraints.insert(X: &arena().makeAtomRef(A: Token));
190 Constraints.insert(X: &arena().makeNot(Val: F));
191 addTransitiveFlowConditionConstraints(Token, Out&: Constraints);
192 return isUnsatisfiable(Constraints: std::move(Constraints));
193}
194
195bool DataflowAnalysisContext::flowConditionAllows(Atom Token,
196 const Formula &F) {
197 if (F.isLiteral(b: false))
198 return false;
199
200 llvm::SetVector<const Formula *> Constraints;
201 Constraints.insert(X: &arena().makeAtomRef(A: Token));
202 Constraints.insert(X: &F);
203 addTransitiveFlowConditionConstraints(Token, Out&: Constraints);
204 return isSatisfiable(Constraints: std::move(Constraints));
205}
206
207bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
208 const Formula &Val2) {
209 llvm::SetVector<const Formula *> Constraints;
210 Constraints.insert(X: &arena().makeNot(Val: arena().makeEquals(LHS: Val1, RHS: Val2)));
211 return isUnsatisfiable(Constraints: std::move(Constraints));
212}
213
214llvm::DenseSet<Atom> DataflowAnalysisContext::collectDependencies(
215 llvm::DenseSet<Atom> Tokens) const {
216 // Use a worklist algorithm, with `Remaining` holding the worklist and
217 // `Tokens` tracking which atoms have already been added to the worklist.
218 std::vector<Atom> Remaining(Tokens.begin(), Tokens.end());
219 while (!Remaining.empty()) {
220 Atom CurrentToken = Remaining.back();
221 Remaining.pop_back();
222 if (auto DepsIt = FlowConditionDeps.find(Val: CurrentToken);
223 DepsIt != FlowConditionDeps.end())
224 for (Atom A : DepsIt->second)
225 if (Tokens.insert(V: A).second)
226 Remaining.push_back(x: A);
227 }
228
229 return Tokens;
230}
231
232void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
233 Atom Token, llvm::SetVector<const Formula *> &Constraints) {
234 llvm::DenseSet<Atom> AddedTokens;
235 std::vector<Atom> Remaining = {Token};
236
237 if (Invariant)
238 Constraints.insert(X: Invariant);
239 // Define all the flow conditions that might be referenced in constraints.
240 while (!Remaining.empty()) {
241 auto Token = Remaining.back();
242 Remaining.pop_back();
243 if (!AddedTokens.insert(V: Token).second)
244 continue;
245
246 auto ConstraintsIt = FlowConditionConstraints.find(Val: Token);
247 if (ConstraintsIt == FlowConditionConstraints.end()) {
248 // The flow condition is unconstrained. Just add the atom directly, which
249 // is equivalent to asserting it is true.
250 Constraints.insert(X: &arena().makeAtomRef(A: Token));
251 } else {
252 // Bind flow condition token via `iff` to its set of constraints:
253 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
254 Constraints.insert(X: &arena().makeEquals(LHS: arena().makeAtomRef(A: Token),
255 RHS: *ConstraintsIt->second));
256 }
257
258 if (auto DepsIt = FlowConditionDeps.find(Val: Token);
259 DepsIt != FlowConditionDeps.end())
260 for (Atom A : DepsIt->second)
261 Remaining.push_back(x: A);
262 }
263}
264
265static void getReferencedAtoms(const Formula &F,
266 llvm::DenseSet<dataflow::Atom> &Refs) {
267 // Avoid recursion to avoid stack overflows from very large formulas.
268 // The shape of the tree structure for very large formulas is such that there
269 // are at most 2 children from any node, but there may be many generations.
270 std::stack<const Formula *> WorkList;
271 WorkList.push(x: &F);
272
273 while (!WorkList.empty()) {
274 const Formula *Current = WorkList.top();
275 WorkList.pop();
276 switch (Current->kind()) {
277 case Formula::AtomRef:
278 Refs.insert(V: Current->getAtom());
279 break;
280 case Formula::Literal:
281 break;
282 case Formula::Not:
283 WorkList.push(x: Current->operands()[0]);
284 break;
285 case Formula::And:
286 case Formula::Or:
287 case Formula::Implies:
288 case Formula::Equal:
289 ArrayRef<const Formula *> Operands = Current->operands();
290 WorkList.push(x: Operands[0]);
291 WorkList.push(x: Operands[1]);
292 break;
293 }
294 }
295}
296
297SimpleLogicalContext DataflowAnalysisContext::exportLogicalContext(
298 llvm::DenseSet<dataflow::Atom> TargetTokens) const {
299 SimpleLogicalContext LC;
300
301 // Copy `Invariant` even if it is null, to initialize the field.
302 LC.Invariant = Invariant;
303 if (Invariant != nullptr)
304 getReferencedAtoms(F: *Invariant, Refs&: TargetTokens);
305
306 llvm::DenseSet<dataflow::Atom> Dependencies =
307 collectDependencies(Tokens: std::move(TargetTokens));
308
309 for (dataflow::Atom Token : Dependencies) {
310 // Only process the token if it is constrained. Unconstrained tokens don't
311 // have dependencies.
312 const Formula *Constraints = FlowConditionConstraints.lookup(Val: Token);
313 if (Constraints == nullptr)
314 continue;
315 LC.TokenDefs[Token] = Constraints;
316
317 if (auto DepsIt = FlowConditionDeps.find(Val: Token);
318 DepsIt != FlowConditionDeps.end())
319 LC.TokenDeps[Token] = DepsIt->second;
320 }
321
322 return LC;
323}
324
325void DataflowAnalysisContext::initLogicalContext(SimpleLogicalContext LC) {
326 Invariant = LC.Invariant;
327 FlowConditionConstraints = std::move(LC.TokenDefs);
328 // TODO: The dependencies in `LC.TokenDeps` can be reconstructed from
329 // `LC.TokenDefs`. Give the caller the option to reconstruct, rather than
330 // providing them directly, to save caller space (memory/disk).
331 FlowConditionDeps = std::move(LC.TokenDeps);
332}
333
334static void printAtomList(const llvm::SmallVector<Atom> &Atoms,
335 llvm::raw_ostream &OS) {
336 OS << "(";
337 for (size_t i = 0; i < Atoms.size(); ++i) {
338 OS << Atoms[i];
339 if (i + 1 < Atoms.size())
340 OS << ", ";
341 }
342 OS << ")\n";
343}
344
345void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
346 llvm::raw_ostream &OS) {
347 llvm::SetVector<const Formula *> Constraints;
348 Constraints.insert(X: &arena().makeAtomRef(A: Token));
349 addTransitiveFlowConditionConstraints(Token, Constraints);
350
351 OS << "Flow condition token: " << Token << "\n";
352 SimplifyConstraintsInfo Info;
353 llvm::SetVector<const Formula *> OriginalConstraints = Constraints;
354 simplifyConstraints(Constraints, arena&: arena(), Info: &Info);
355 if (!Constraints.empty()) {
356 OS << "Constraints:\n";
357 for (const auto *Constraint : Constraints) {
358 Constraint->print(OS);
359 OS << "\n";
360 }
361 }
362 if (!Info.TrueAtoms.empty()) {
363 OS << "True atoms: ";
364 printAtomList(Atoms: Info.TrueAtoms, OS);
365 }
366 if (!Info.FalseAtoms.empty()) {
367 OS << "False atoms: ";
368 printAtomList(Atoms: Info.FalseAtoms, OS);
369 }
370 if (!Info.EquivalentAtoms.empty()) {
371 OS << "Equivalent atoms:\n";
372 for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms)
373 printAtomList(Atoms: Class, OS);
374 }
375
376 OS << "\nFlow condition constraints before simplification:\n";
377 for (const auto *Constraint : OriginalConstraints) {
378 Constraint->print(OS);
379 OS << "\n";
380 }
381}
382
383const AdornedCFG *
384DataflowAnalysisContext::getAdornedCFG(const FunctionDecl *F) {
385 // Canonicalize the key:
386 F = F->getDefinition();
387 if (F == nullptr)
388 return nullptr;
389 auto It = FunctionContexts.find(Val: F);
390 if (It != FunctionContexts.end())
391 return &It->second;
392
393 if (F->doesThisDeclarationHaveABody()) {
394 auto ACFG = AdornedCFG::build(Func: *F);
395 // FIXME: Handle errors.
396 assert(ACFG);
397 auto Result = FunctionContexts.insert(KV: {F, std::move(*ACFG)});
398 return &Result.first->second;
399 }
400
401 return nullptr;
402}
403
404static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
405 if (DataflowLog.empty())
406 return Logger::textual(llvm::errs());
407
408 llvm::StringRef Dir = DataflowLog;
409 if (auto EC = llvm::sys::fs::create_directories(path: Dir))
410 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
411 // All analysis runs within a process will log to the same directory.
412 // Share a counter so they don't all overwrite each other's 0.html.
413 // (Don't share a logger, it's not threadsafe).
414 static std::atomic<unsigned> Counter = {0};
415 auto StreamFactory =
416 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
417 llvm::SmallString<256> File(Dir);
418 llvm::sys::path::append(path&: File,
419 a: std::to_string(val: Counter.fetch_add(i: 1)) + ".html");
420 std::error_code EC;
421 auto OS = std::make_unique<llvm::raw_fd_ostream>(args&: File, args&: EC);
422 if (EC) {
423 llvm::errs() << "Failed to create log " << File << ": " << EC.message()
424 << "\n";
425 return std::make_unique<llvm::raw_null_ostream>();
426 }
427 return OS;
428 };
429 return Logger::html(std::move(StreamFactory));
430}
431
432DataflowAnalysisContext::DataflowAnalysisContext(
433 Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts)
434 : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()),
435 Opts(Opts) {
436 // If the -dataflow-log command-line flag was set, synthesize a logger.
437 // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
438 // based tools.
439 if (Opts.Log == nullptr) {
440 if (DataflowLog.getNumOccurrences() > 0) {
441 LogOwner = makeLoggerFromCommandLine();
442 this->Opts.Log = LogOwner.get();
443 // FIXME: if the flag is given a value, write an HTML log to a file.
444 } else {
445 this->Opts.Log = &Logger::null();
446 }
447 }
448}
449
450DataflowAnalysisContext::~DataflowAnalysisContext() = default;
451
452} // namespace dataflow
453} // namespace clang
454