1 | //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines a DataflowAnalysisContext class that owns objects that |
10 | // encompass the state of a program and stores context that is used during |
11 | // dataflow analysis. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" |
16 | #include "clang/AST/ExprCXX.h" |
17 | #include "clang/Analysis/FlowSensitive/ASTOps.h" |
18 | #include "clang/Analysis/FlowSensitive/DebugSupport.h" |
19 | #include "clang/Analysis/FlowSensitive/Formula.h" |
20 | #include "clang/Analysis/FlowSensitive/Logger.h" |
21 | #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" |
22 | #include "clang/Analysis/FlowSensitive/Value.h" |
23 | #include "llvm/ADT/SetOperations.h" |
24 | #include "llvm/ADT/SetVector.h" |
25 | #include "llvm/Support/CommandLine.h" |
26 | #include "llvm/Support/Debug.h" |
27 | #include "llvm/Support/FileSystem.h" |
28 | #include "llvm/Support/Path.h" |
29 | #include "llvm/Support/raw_ostream.h" |
30 | #include <cassert> |
31 | #include <memory> |
32 | #include <string> |
33 | #include <utility> |
34 | #include <vector> |
35 | |
36 | static llvm::cl::opt<std::string> DataflowLog( |
37 | "dataflow-log" , llvm::cl::Hidden, llvm::cl::ValueOptional, |
38 | llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " |
39 | "log to stderr. With an arg, writes HTML logs under the " |
40 | "specified directory (one per analyzed function)." )); |
41 | |
42 | namespace clang { |
43 | namespace dataflow { |
44 | |
45 | FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { |
46 | // During context-sensitive analysis, a struct may be allocated in one |
47 | // function, but its field accessed in a function lower in the stack than |
48 | // the allocation. Since we only collect fields used in the function where |
49 | // the allocation occurs, we can't apply that filter when performing |
50 | // context-sensitive analysis. But, this only applies to storage locations, |
51 | // since field access it not allowed to fail. In contrast, field *values* |
52 | // don't need this allowance, since the API allows for uninitialized fields. |
53 | if (Opts.ContextSensitiveOpts) |
54 | return getObjectFields(Type); |
55 | |
56 | return llvm::set_intersection(S1: getObjectFields(Type), S2: ModeledFields); |
57 | } |
58 | |
59 | void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { |
60 | ModeledFields.set_union(Fields); |
61 | } |
62 | |
63 | StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { |
64 | if (!Type.isNull() && Type->isRecordType()) { |
65 | llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; |
66 | for (const FieldDecl *Field : getModeledFields(Type)) |
67 | if (Field->getType()->isReferenceType()) |
68 | FieldLocs.insert(KV: {Field, nullptr}); |
69 | else |
70 | FieldLocs.insert(KV: {Field, &createStorageLocation( |
71 | Type: Field->getType().getNonReferenceType())}); |
72 | |
73 | RecordStorageLocation::SyntheticFieldMap SyntheticFields; |
74 | for (const auto &Entry : getSyntheticFields(Type)) |
75 | SyntheticFields.insert( |
76 | KV: {Entry.getKey(), |
77 | &createStorageLocation(Type: Entry.getValue().getNonReferenceType())}); |
78 | |
79 | return createRecordStorageLocation(Type, FieldLocs: std::move(FieldLocs), |
80 | SyntheticFields: std::move(SyntheticFields)); |
81 | } |
82 | return arena().create<ScalarStorageLocation>(args&: Type); |
83 | } |
84 | |
85 | // Returns the keys for a given `StringMap`. |
86 | // Can't use `StringSet` as the return type as it doesn't support `operator==`. |
87 | template <typename T> |
88 | static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) { |
89 | return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end()); |
90 | } |
91 | |
92 | RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation( |
93 | QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, |
94 | RecordStorageLocation::SyntheticFieldMap SyntheticFields) { |
95 | assert(Type->isRecordType()); |
96 | assert(containsSameFields(getModeledFields(Type), FieldLocs)); |
97 | assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields)); |
98 | |
99 | RecordStorageLocationCreated = true; |
100 | return arena().create<RecordStorageLocation>(args&: Type, args: std::move(FieldLocs), |
101 | args: std::move(SyntheticFields)); |
102 | } |
103 | |
104 | StorageLocation & |
105 | DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) { |
106 | if (auto *Loc = DeclToLoc.lookup(Val: &D)) |
107 | return *Loc; |
108 | auto &Loc = createStorageLocation(Type: D.getType().getNonReferenceType()); |
109 | DeclToLoc[&D] = &Loc; |
110 | return Loc; |
111 | } |
112 | |
113 | StorageLocation & |
114 | DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { |
115 | const Expr &CanonE = ignoreCFGOmittedNodes(E); |
116 | |
117 | if (auto *Loc = ExprToLoc.lookup(Val: &CanonE)) |
118 | return *Loc; |
119 | auto &Loc = createStorageLocation(Type: CanonE.getType()); |
120 | ExprToLoc[&CanonE] = &Loc; |
121 | return Loc; |
122 | } |
123 | |
124 | PointerValue & |
125 | DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { |
126 | auto CanonicalPointeeType = |
127 | PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); |
128 | auto Res = NullPointerVals.try_emplace(Key: CanonicalPointeeType, Args: nullptr); |
129 | if (Res.second) { |
130 | auto &PointeeLoc = createStorageLocation(Type: CanonicalPointeeType); |
131 | Res.first->second = &arena().create<PointerValue>(args&: PointeeLoc); |
132 | } |
133 | return *Res.first->second; |
134 | } |
135 | |
136 | void DataflowAnalysisContext::addInvariant(const Formula &Constraint) { |
137 | if (Invariant == nullptr) |
138 | Invariant = &Constraint; |
139 | else |
140 | Invariant = &arena().makeAnd(LHS: *Invariant, RHS: Constraint); |
141 | } |
142 | |
143 | void DataflowAnalysisContext::addFlowConditionConstraint( |
144 | Atom Token, const Formula &Constraint) { |
145 | auto Res = FlowConditionConstraints.try_emplace(Key: Token, Args: &Constraint); |
146 | if (!Res.second) { |
147 | Res.first->second = |
148 | &arena().makeAnd(LHS: *Res.first->second, RHS: Constraint); |
149 | } |
150 | } |
151 | |
152 | Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { |
153 | Atom ForkToken = arena().makeFlowConditionToken(); |
154 | FlowConditionDeps[ForkToken].insert(V: Token); |
155 | addFlowConditionConstraint(Token: ForkToken, Constraint: arena().makeAtomRef(A: Token)); |
156 | return ForkToken; |
157 | } |
158 | |
159 | Atom |
160 | DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, |
161 | Atom SecondToken) { |
162 | Atom Token = arena().makeFlowConditionToken(); |
163 | FlowConditionDeps[Token].insert(V: FirstToken); |
164 | FlowConditionDeps[Token].insert(V: SecondToken); |
165 | addFlowConditionConstraint(Token, |
166 | Constraint: arena().makeOr(LHS: arena().makeAtomRef(A: FirstToken), |
167 | RHS: arena().makeAtomRef(A: SecondToken))); |
168 | return Token; |
169 | } |
170 | |
171 | Solver::Result DataflowAnalysisContext::querySolver( |
172 | llvm::SetVector<const Formula *> Constraints) { |
173 | return S.solve(Vals: Constraints.getArrayRef()); |
174 | } |
175 | |
176 | bool DataflowAnalysisContext::flowConditionImplies(Atom Token, |
177 | const Formula &F) { |
178 | if (F.isLiteral(b: true)) |
179 | return true; |
180 | |
181 | // Returns true if and only if truth assignment of the flow condition implies |
182 | // that `F` is also true. We prove whether or not this property holds by |
183 | // reducing the problem to satisfiability checking. In other words, we attempt |
184 | // to show that assuming `F` is false makes the constraints induced by the |
185 | // flow condition unsatisfiable. |
186 | llvm::SetVector<const Formula *> Constraints; |
187 | Constraints.insert(X: &arena().makeAtomRef(A: Token)); |
188 | Constraints.insert(X: &arena().makeNot(Val: F)); |
189 | addTransitiveFlowConditionConstraints(Token, Out&: Constraints); |
190 | return isUnsatisfiable(Constraints: std::move(Constraints)); |
191 | } |
192 | |
193 | bool DataflowAnalysisContext::flowConditionAllows(Atom Token, |
194 | const Formula &F) { |
195 | if (F.isLiteral(b: false)) |
196 | return false; |
197 | |
198 | llvm::SetVector<const Formula *> Constraints; |
199 | Constraints.insert(X: &arena().makeAtomRef(A: Token)); |
200 | Constraints.insert(X: &F); |
201 | addTransitiveFlowConditionConstraints(Token, Out&: Constraints); |
202 | return isSatisfiable(Constraints: std::move(Constraints)); |
203 | } |
204 | |
205 | bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, |
206 | const Formula &Val2) { |
207 | llvm::SetVector<const Formula *> Constraints; |
208 | Constraints.insert(X: &arena().makeNot(Val: arena().makeEquals(LHS: Val1, RHS: Val2))); |
209 | return isUnsatisfiable(Constraints: std::move(Constraints)); |
210 | } |
211 | |
212 | void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( |
213 | Atom Token, llvm::SetVector<const Formula *> &Constraints) { |
214 | llvm::DenseSet<Atom> AddedTokens; |
215 | std::vector<Atom> Remaining = {Token}; |
216 | |
217 | if (Invariant) |
218 | Constraints.insert(X: Invariant); |
219 | // Define all the flow conditions that might be referenced in constraints. |
220 | while (!Remaining.empty()) { |
221 | auto Token = Remaining.back(); |
222 | Remaining.pop_back(); |
223 | if (!AddedTokens.insert(V: Token).second) |
224 | continue; |
225 | |
226 | auto ConstraintsIt = FlowConditionConstraints.find(Val: Token); |
227 | if (ConstraintsIt == FlowConditionConstraints.end()) { |
228 | Constraints.insert(X: &arena().makeAtomRef(A: Token)); |
229 | } else { |
230 | // Bind flow condition token via `iff` to its set of constraints: |
231 | // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints |
232 | Constraints.insert(X: &arena().makeEquals(LHS: arena().makeAtomRef(A: Token), |
233 | RHS: *ConstraintsIt->second)); |
234 | } |
235 | |
236 | if (auto DepsIt = FlowConditionDeps.find(Val: Token); |
237 | DepsIt != FlowConditionDeps.end()) |
238 | for (Atom A : DepsIt->second) |
239 | Remaining.push_back(x: A); |
240 | } |
241 | } |
242 | |
243 | static void printAtomList(const llvm::SmallVector<Atom> &Atoms, |
244 | llvm::raw_ostream &OS) { |
245 | OS << "(" ; |
246 | for (size_t i = 0; i < Atoms.size(); ++i) { |
247 | OS << Atoms[i]; |
248 | if (i + 1 < Atoms.size()) |
249 | OS << ", " ; |
250 | } |
251 | OS << ")\n" ; |
252 | } |
253 | |
254 | void DataflowAnalysisContext::dumpFlowCondition(Atom Token, |
255 | llvm::raw_ostream &OS) { |
256 | llvm::SetVector<const Formula *> Constraints; |
257 | Constraints.insert(X: &arena().makeAtomRef(A: Token)); |
258 | addTransitiveFlowConditionConstraints(Token, Constraints); |
259 | |
260 | OS << "Flow condition token: " << Token << "\n" ; |
261 | SimplifyConstraintsInfo Info; |
262 | llvm::SetVector<const Formula *> OriginalConstraints = Constraints; |
263 | simplifyConstraints(Constraints, arena&: arena(), Info: &Info); |
264 | if (!Constraints.empty()) { |
265 | OS << "Constraints:\n" ; |
266 | for (const auto *Constraint : Constraints) { |
267 | Constraint->print(OS); |
268 | OS << "\n" ; |
269 | } |
270 | } |
271 | if (!Info.TrueAtoms.empty()) { |
272 | OS << "True atoms: " ; |
273 | printAtomList(Atoms: Info.TrueAtoms, OS); |
274 | } |
275 | if (!Info.FalseAtoms.empty()) { |
276 | OS << "False atoms: " ; |
277 | printAtomList(Atoms: Info.FalseAtoms, OS); |
278 | } |
279 | if (!Info.EquivalentAtoms.empty()) { |
280 | OS << "Equivalent atoms:\n" ; |
281 | for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms) |
282 | printAtomList(Atoms: Class, OS); |
283 | } |
284 | |
285 | OS << "\nFlow condition constraints before simplification:\n" ; |
286 | for (const auto *Constraint : OriginalConstraints) { |
287 | Constraint->print(OS); |
288 | OS << "\n" ; |
289 | } |
290 | } |
291 | |
292 | const AdornedCFG * |
293 | DataflowAnalysisContext::getAdornedCFG(const FunctionDecl *F) { |
294 | // Canonicalize the key: |
295 | F = F->getDefinition(); |
296 | if (F == nullptr) |
297 | return nullptr; |
298 | auto It = FunctionContexts.find(Val: F); |
299 | if (It != FunctionContexts.end()) |
300 | return &It->second; |
301 | |
302 | if (F->doesThisDeclarationHaveABody()) { |
303 | auto ACFG = AdornedCFG::build(Func: *F); |
304 | // FIXME: Handle errors. |
305 | assert(ACFG); |
306 | auto Result = FunctionContexts.insert(KV: {F, std::move(*ACFG)}); |
307 | return &Result.first->second; |
308 | } |
309 | |
310 | return nullptr; |
311 | } |
312 | |
313 | static std::unique_ptr<Logger> makeLoggerFromCommandLine() { |
314 | if (DataflowLog.empty()) |
315 | return Logger::textual(llvm::errs()); |
316 | |
317 | llvm::StringRef Dir = DataflowLog; |
318 | if (auto EC = llvm::sys::fs::create_directories(path: Dir)) |
319 | llvm::errs() << "Failed to create log dir: " << EC.message() << "\n" ; |
320 | // All analysis runs within a process will log to the same directory. |
321 | // Share a counter so they don't all overwrite each other's 0.html. |
322 | // (Don't share a logger, it's not threadsafe). |
323 | static std::atomic<unsigned> Counter = {0}; |
324 | auto StreamFactory = |
325 | [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { |
326 | llvm::SmallString<256> File(Dir); |
327 | llvm::sys::path::append(path&: File, |
328 | a: std::to_string(val: Counter.fetch_add(i: 1)) + ".html" ); |
329 | std::error_code EC; |
330 | auto OS = std::make_unique<llvm::raw_fd_ostream>(args&: File, args&: EC); |
331 | if (EC) { |
332 | llvm::errs() << "Failed to create log " << File << ": " << EC.message() |
333 | << "\n" ; |
334 | return std::make_unique<llvm::raw_null_ostream>(); |
335 | } |
336 | return OS; |
337 | }; |
338 | return Logger::html(std::move(StreamFactory)); |
339 | } |
340 | |
341 | DataflowAnalysisContext::DataflowAnalysisContext( |
342 | Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts) |
343 | : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()), |
344 | Opts(Opts) { |
345 | // If the -dataflow-log command-line flag was set, synthesize a logger. |
346 | // This is ugly but provides a uniform method for ad-hoc debugging dataflow- |
347 | // based tools. |
348 | if (Opts.Log == nullptr) { |
349 | if (DataflowLog.getNumOccurrences() > 0) { |
350 | LogOwner = makeLoggerFromCommandLine(); |
351 | this->Opts.Log = LogOwner.get(); |
352 | // FIXME: if the flag is given a value, write an HTML log to a file. |
353 | } else { |
354 | this->Opts.Log = &Logger::null(); |
355 | } |
356 | } |
357 | } |
358 | |
359 | DataflowAnalysisContext::~DataflowAnalysisContext() = default; |
360 | |
361 | } // namespace dataflow |
362 | } // namespace clang |
363 | |