1//===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DataflowAnalysisContext class that owns objects that
10// encompass the state of a program and stores context that is used during
11// dataflow analysis.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
16#include "clang/AST/ExprCXX.h"
17#include "clang/Analysis/FlowSensitive/ASTOps.h"
18#include "clang/Analysis/FlowSensitive/DebugSupport.h"
19#include "clang/Analysis/FlowSensitive/Formula.h"
20#include "clang/Analysis/FlowSensitive/Logger.h"
21#include "clang/Analysis/FlowSensitive/SimplifyConstraints.h"
22#include "clang/Analysis/FlowSensitive/Value.h"
23#include "llvm/ADT/SetOperations.h"
24#include "llvm/ADT/SetVector.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/FileSystem.h"
28#include "llvm/Support/Path.h"
29#include "llvm/Support/raw_ostream.h"
30#include <cassert>
31#include <memory>
32#include <string>
33#include <utility>
34#include <vector>
35
36static llvm::cl::opt<std::string> DataflowLog(
37 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
38 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
39 "log to stderr. With an arg, writes HTML logs under the "
40 "specified directory (one per analyzed function)."));
41
42namespace clang {
43namespace dataflow {
44
45FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) {
46 // During context-sensitive analysis, a struct may be allocated in one
47 // function, but its field accessed in a function lower in the stack than
48 // the allocation. Since we only collect fields used in the function where
49 // the allocation occurs, we can't apply that filter when performing
50 // context-sensitive analysis. But, this only applies to storage locations,
51 // since field access it not allowed to fail. In contrast, field *values*
52 // don't need this allowance, since the API allows for uninitialized fields.
53 if (Opts.ContextSensitiveOpts)
54 return getObjectFields(Type);
55
56 return llvm::set_intersection(S1: getObjectFields(Type), S2: ModeledFields);
57}
58
59void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) {
60 ModeledFields.set_union(Fields);
61}
62
63StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) {
64 if (!Type.isNull() && Type->isRecordType()) {
65 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
66 for (const FieldDecl *Field : getModeledFields(Type))
67 if (Field->getType()->isReferenceType())
68 FieldLocs.insert(KV: {Field, nullptr});
69 else
70 FieldLocs.insert(KV: {Field, &createStorageLocation(
71 Type: Field->getType().getNonReferenceType())});
72
73 RecordStorageLocation::SyntheticFieldMap SyntheticFields;
74 for (const auto &Entry : getSyntheticFields(Type))
75 SyntheticFields.insert(
76 KV: {Entry.getKey(),
77 &createStorageLocation(Type: Entry.getValue().getNonReferenceType())});
78
79 return createRecordStorageLocation(Type, FieldLocs: std::move(FieldLocs),
80 SyntheticFields: std::move(SyntheticFields));
81 }
82 return arena().create<ScalarStorageLocation>(args&: Type);
83}
84
85// Returns the keys for a given `StringMap`.
86// Can't use `StringSet` as the return type as it doesn't support `operator==`.
87template <typename T>
88static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) {
89 return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end());
90}
91
92RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation(
93 QualType Type, RecordStorageLocation::FieldToLoc FieldLocs,
94 RecordStorageLocation::SyntheticFieldMap SyntheticFields) {
95 assert(Type->isRecordType());
96 assert(containsSameFields(getModeledFields(Type), FieldLocs));
97 assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields));
98
99 RecordStorageLocationCreated = true;
100 return arena().create<RecordStorageLocation>(args&: Type, args: std::move(FieldLocs),
101 args: std::move(SyntheticFields));
102}
103
104StorageLocation &
105DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) {
106 if (auto *Loc = DeclToLoc.lookup(Val: &D))
107 return *Loc;
108 auto &Loc = createStorageLocation(Type: D.getType().getNonReferenceType());
109 DeclToLoc[&D] = &Loc;
110 return Loc;
111}
112
113StorageLocation &
114DataflowAnalysisContext::getStableStorageLocation(const Expr &E) {
115 const Expr &CanonE = ignoreCFGOmittedNodes(E);
116
117 if (auto *Loc = ExprToLoc.lookup(Val: &CanonE))
118 return *Loc;
119 auto &Loc = createStorageLocation(Type: CanonE.getType());
120 ExprToLoc[&CanonE] = &Loc;
121 return Loc;
122}
123
124PointerValue &
125DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
126 auto CanonicalPointeeType =
127 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
128 auto Res = NullPointerVals.try_emplace(Key: CanonicalPointeeType, Args: nullptr);
129 if (Res.second) {
130 auto &PointeeLoc = createStorageLocation(Type: CanonicalPointeeType);
131 Res.first->second = &arena().create<PointerValue>(args&: PointeeLoc);
132 }
133 return *Res.first->second;
134}
135
136void DataflowAnalysisContext::addInvariant(const Formula &Constraint) {
137 if (Invariant == nullptr)
138 Invariant = &Constraint;
139 else
140 Invariant = &arena().makeAnd(LHS: *Invariant, RHS: Constraint);
141}
142
143void DataflowAnalysisContext::addFlowConditionConstraint(
144 Atom Token, const Formula &Constraint) {
145 auto Res = FlowConditionConstraints.try_emplace(Key: Token, Args: &Constraint);
146 if (!Res.second) {
147 Res.first->second =
148 &arena().makeAnd(LHS: *Res.first->second, RHS: Constraint);
149 }
150}
151
152Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
153 Atom ForkToken = arena().makeFlowConditionToken();
154 FlowConditionDeps[ForkToken].insert(V: Token);
155 addFlowConditionConstraint(Token: ForkToken, Constraint: arena().makeAtomRef(A: Token));
156 return ForkToken;
157}
158
159Atom
160DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
161 Atom SecondToken) {
162 Atom Token = arena().makeFlowConditionToken();
163 FlowConditionDeps[Token].insert(V: FirstToken);
164 FlowConditionDeps[Token].insert(V: SecondToken);
165 addFlowConditionConstraint(Token,
166 Constraint: arena().makeOr(LHS: arena().makeAtomRef(A: FirstToken),
167 RHS: arena().makeAtomRef(A: SecondToken)));
168 return Token;
169}
170
171Solver::Result DataflowAnalysisContext::querySolver(
172 llvm::SetVector<const Formula *> Constraints) {
173 return S.solve(Vals: Constraints.getArrayRef());
174}
175
176bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
177 const Formula &F) {
178 if (F.isLiteral(b: true))
179 return true;
180
181 // Returns true if and only if truth assignment of the flow condition implies
182 // that `F` is also true. We prove whether or not this property holds by
183 // reducing the problem to satisfiability checking. In other words, we attempt
184 // to show that assuming `F` is false makes the constraints induced by the
185 // flow condition unsatisfiable.
186 llvm::SetVector<const Formula *> Constraints;
187 Constraints.insert(X: &arena().makeAtomRef(A: Token));
188 Constraints.insert(X: &arena().makeNot(Val: F));
189 addTransitiveFlowConditionConstraints(Token, Out&: Constraints);
190 return isUnsatisfiable(Constraints: std::move(Constraints));
191}
192
193bool DataflowAnalysisContext::flowConditionAllows(Atom Token,
194 const Formula &F) {
195 if (F.isLiteral(b: false))
196 return false;
197
198 llvm::SetVector<const Formula *> Constraints;
199 Constraints.insert(X: &arena().makeAtomRef(A: Token));
200 Constraints.insert(X: &F);
201 addTransitiveFlowConditionConstraints(Token, Out&: Constraints);
202 return isSatisfiable(Constraints: std::move(Constraints));
203}
204
205bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
206 const Formula &Val2) {
207 llvm::SetVector<const Formula *> Constraints;
208 Constraints.insert(X: &arena().makeNot(Val: arena().makeEquals(LHS: Val1, RHS: Val2)));
209 return isUnsatisfiable(Constraints: std::move(Constraints));
210}
211
212void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
213 Atom Token, llvm::SetVector<const Formula *> &Constraints) {
214 llvm::DenseSet<Atom> AddedTokens;
215 std::vector<Atom> Remaining = {Token};
216
217 if (Invariant)
218 Constraints.insert(X: Invariant);
219 // Define all the flow conditions that might be referenced in constraints.
220 while (!Remaining.empty()) {
221 auto Token = Remaining.back();
222 Remaining.pop_back();
223 if (!AddedTokens.insert(V: Token).second)
224 continue;
225
226 auto ConstraintsIt = FlowConditionConstraints.find(Val: Token);
227 if (ConstraintsIt == FlowConditionConstraints.end()) {
228 Constraints.insert(X: &arena().makeAtomRef(A: Token));
229 } else {
230 // Bind flow condition token via `iff` to its set of constraints:
231 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
232 Constraints.insert(X: &arena().makeEquals(LHS: arena().makeAtomRef(A: Token),
233 RHS: *ConstraintsIt->second));
234 }
235
236 if (auto DepsIt = FlowConditionDeps.find(Val: Token);
237 DepsIt != FlowConditionDeps.end())
238 for (Atom A : DepsIt->second)
239 Remaining.push_back(x: A);
240 }
241}
242
243static void printAtomList(const llvm::SmallVector<Atom> &Atoms,
244 llvm::raw_ostream &OS) {
245 OS << "(";
246 for (size_t i = 0; i < Atoms.size(); ++i) {
247 OS << Atoms[i];
248 if (i + 1 < Atoms.size())
249 OS << ", ";
250 }
251 OS << ")\n";
252}
253
254void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
255 llvm::raw_ostream &OS) {
256 llvm::SetVector<const Formula *> Constraints;
257 Constraints.insert(X: &arena().makeAtomRef(A: Token));
258 addTransitiveFlowConditionConstraints(Token, Constraints);
259
260 OS << "Flow condition token: " << Token << "\n";
261 SimplifyConstraintsInfo Info;
262 llvm::SetVector<const Formula *> OriginalConstraints = Constraints;
263 simplifyConstraints(Constraints, arena&: arena(), Info: &Info);
264 if (!Constraints.empty()) {
265 OS << "Constraints:\n";
266 for (const auto *Constraint : Constraints) {
267 Constraint->print(OS);
268 OS << "\n";
269 }
270 }
271 if (!Info.TrueAtoms.empty()) {
272 OS << "True atoms: ";
273 printAtomList(Atoms: Info.TrueAtoms, OS);
274 }
275 if (!Info.FalseAtoms.empty()) {
276 OS << "False atoms: ";
277 printAtomList(Atoms: Info.FalseAtoms, OS);
278 }
279 if (!Info.EquivalentAtoms.empty()) {
280 OS << "Equivalent atoms:\n";
281 for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms)
282 printAtomList(Atoms: Class, OS);
283 }
284
285 OS << "\nFlow condition constraints before simplification:\n";
286 for (const auto *Constraint : OriginalConstraints) {
287 Constraint->print(OS);
288 OS << "\n";
289 }
290}
291
292const AdornedCFG *
293DataflowAnalysisContext::getAdornedCFG(const FunctionDecl *F) {
294 // Canonicalize the key:
295 F = F->getDefinition();
296 if (F == nullptr)
297 return nullptr;
298 auto It = FunctionContexts.find(Val: F);
299 if (It != FunctionContexts.end())
300 return &It->second;
301
302 if (F->doesThisDeclarationHaveABody()) {
303 auto ACFG = AdornedCFG::build(Func: *F);
304 // FIXME: Handle errors.
305 assert(ACFG);
306 auto Result = FunctionContexts.insert(KV: {F, std::move(*ACFG)});
307 return &Result.first->second;
308 }
309
310 return nullptr;
311}
312
313static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
314 if (DataflowLog.empty())
315 return Logger::textual(llvm::errs());
316
317 llvm::StringRef Dir = DataflowLog;
318 if (auto EC = llvm::sys::fs::create_directories(path: Dir))
319 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
320 // All analysis runs within a process will log to the same directory.
321 // Share a counter so they don't all overwrite each other's 0.html.
322 // (Don't share a logger, it's not threadsafe).
323 static std::atomic<unsigned> Counter = {0};
324 auto StreamFactory =
325 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
326 llvm::SmallString<256> File(Dir);
327 llvm::sys::path::append(path&: File,
328 a: std::to_string(val: Counter.fetch_add(i: 1)) + ".html");
329 std::error_code EC;
330 auto OS = std::make_unique<llvm::raw_fd_ostream>(args&: File, args&: EC);
331 if (EC) {
332 llvm::errs() << "Failed to create log " << File << ": " << EC.message()
333 << "\n";
334 return std::make_unique<llvm::raw_null_ostream>();
335 }
336 return OS;
337 };
338 return Logger::html(std::move(StreamFactory));
339}
340
341DataflowAnalysisContext::DataflowAnalysisContext(
342 Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts)
343 : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()),
344 Opts(Opts) {
345 // If the -dataflow-log command-line flag was set, synthesize a logger.
346 // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
347 // based tools.
348 if (Opts.Log == nullptr) {
349 if (DataflowLog.getNumOccurrences() > 0) {
350 LogOwner = makeLoggerFromCommandLine();
351 this->Opts.Log = LogOwner.get();
352 // FIXME: if the flag is given a value, write an HTML log to a file.
353 } else {
354 this->Opts.Log = &Logger::null();
355 }
356 }
357}
358
359DataflowAnalysisContext::~DataflowAnalysisContext() = default;
360
361} // namespace dataflow
362} // namespace clang
363