1 | //===- CNFFormula.cpp -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // A representation of a boolean formula in 3-CNF. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "clang/Analysis/FlowSensitive/CNFFormula.h" |
14 | #include "llvm/ADT/DenseSet.h" |
15 | |
16 | #include <queue> |
17 | |
18 | namespace clang { |
19 | namespace dataflow { |
20 | |
21 | namespace { |
22 | |
23 | /// Applies simplifications while building up a BooleanFormula. |
24 | /// We keep track of unit clauses, which tell us variables that must be |
25 | /// true/false in any model that satisfies the overall formula. |
26 | /// Such variables can be dropped from subsequently-added clauses, which |
27 | /// may in turn yield more unit clauses or even a contradiction. |
28 | /// The total added complexity of this preprocessing is O(N) where we |
29 | /// for every clause, we do a lookup for each unit clauses. |
30 | /// The lookup is O(1) on average. This method won't catch all |
31 | /// contradictory formulas, more passes can in principle catch |
32 | /// more cases but we leave all these and the general case to the |
33 | /// proper SAT solver. |
34 | struct CNFFormulaBuilder { |
35 | // Formula should outlive CNFFormulaBuilder. |
36 | explicit CNFFormulaBuilder(CNFFormula &CNF) : Formula(CNF) {} |
37 | |
38 | /// Adds the `L1 v ... v Ln` clause to the formula. Applies |
39 | /// simplifications, based on single-literal clauses. |
40 | /// |
41 | /// Requirements: |
42 | /// |
43 | /// `Li` must not be `NullLit`. |
44 | /// |
45 | /// All literals must be distinct. |
46 | void addClause(ArrayRef<Literal> Literals) { |
47 | // We generate clauses with up to 3 literals in this file. |
48 | assert(!Literals.empty() && Literals.size() <= 3); |
49 | // Contains literals of the simplified clause. |
50 | llvm::SmallVector<Literal> Simplified; |
51 | for (auto L : Literals) { |
52 | assert(L != NullLit && !llvm::is_contained(Simplified, L)); |
53 | auto X = var(L); |
54 | if (trueVars.contains(V: X)) { // X must be true |
55 | if (isPosLit(L)) |
56 | return; // Omit clause `(... v X v ...)`, it is `true`. |
57 | else |
58 | continue; // Omit `!X` from `(... v !X v ...)`. |
59 | } |
60 | if (falseVars.contains(V: X)) { // X must be false |
61 | if (isNegLit(L)) |
62 | return; // Omit clause `(... v !X v ...)`, it is `true`. |
63 | else |
64 | continue; // Omit `X` from `(... v X v ...)`. |
65 | } |
66 | Simplified.push_back(Elt: L); |
67 | } |
68 | if (Simplified.empty()) { |
69 | // Simplification made the clause empty, which is equivalent to `false`. |
70 | // We already know that this formula is unsatisfiable. |
71 | Formula.addClause(lits: Simplified); |
72 | return; |
73 | } |
74 | if (Simplified.size() == 1) { |
75 | // We have new unit clause. |
76 | const Literal lit = Simplified.front(); |
77 | const Variable v = var(L: lit); |
78 | if (isPosLit(L: lit)) |
79 | trueVars.insert(V: v); |
80 | else |
81 | falseVars.insert(V: v); |
82 | } |
83 | Formula.addClause(lits: Simplified); |
84 | } |
85 | |
86 | /// Returns true if we observed a contradiction while adding clauses. |
87 | /// In this case then the formula is already known to be unsatisfiable. |
88 | bool isKnownContradictory() { return Formula.knownContradictory(); } |
89 | |
90 | private: |
91 | CNFFormula &Formula; |
92 | llvm::DenseSet<Variable> trueVars; |
93 | llvm::DenseSet<Variable> falseVars; |
94 | }; |
95 | |
96 | } // namespace |
97 | |
98 | CNFFormula::CNFFormula(Variable LargestVar) |
99 | : LargestVar(LargestVar), KnownContradictory(false) { |
100 | Clauses.push_back(x: 0); |
101 | ClauseStarts.push_back(x: 0); |
102 | } |
103 | |
104 | void CNFFormula::addClause(ArrayRef<Literal> lits) { |
105 | assert(!llvm::is_contained(lits, NullLit)); |
106 | |
107 | if (lits.empty()) |
108 | KnownContradictory = true; |
109 | |
110 | const size_t S = Clauses.size(); |
111 | ClauseStarts.push_back(x: S); |
112 | llvm::append_range(C&: Clauses, R&: lits); |
113 | } |
114 | |
115 | CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Formulas, |
116 | llvm::DenseMap<Variable, Atom> &Atomics) { |
117 | // The general strategy of the algorithm implemented below is to map each |
118 | // of the sub-values in `Vals` to a unique variable and use these variables in |
119 | // the resulting CNF expression to avoid exponential blow up. The number of |
120 | // literals in the resulting formula is guaranteed to be linear in the number |
121 | // of sub-formulas in `Vals`. |
122 | |
123 | // Map each sub-formula in `Vals` to a unique variable. |
124 | llvm::DenseMap<const Formula *, Variable> FormulaToVar; |
125 | // Store variable identifiers and Atom of atomic booleans. |
126 | Variable NextVar = 1; |
127 | { |
128 | std::queue<const Formula *> UnprocessedFormulas; |
129 | for (const Formula *F : Formulas) |
130 | UnprocessedFormulas.push(x: F); |
131 | while (!UnprocessedFormulas.empty()) { |
132 | Variable Var = NextVar; |
133 | const Formula *F = UnprocessedFormulas.front(); |
134 | UnprocessedFormulas.pop(); |
135 | |
136 | if (!FormulaToVar.try_emplace(Key: F, Args&: Var).second) |
137 | continue; |
138 | ++NextVar; |
139 | |
140 | for (const Formula *Op : F->operands()) |
141 | UnprocessedFormulas.push(x: Op); |
142 | if (F->kind() == Formula::AtomRef) |
143 | Atomics[Var] = F->getAtom(); |
144 | } |
145 | } |
146 | |
147 | auto GetVar = [&FormulaToVar](const Formula *F) { |
148 | auto ValIt = FormulaToVar.find(Val: F); |
149 | assert(ValIt != FormulaToVar.end()); |
150 | return ValIt->second; |
151 | }; |
152 | |
153 | CNFFormula CNF(NextVar - 1); |
154 | std::vector<bool> ProcessedSubVals(NextVar, false); |
155 | CNFFormulaBuilder builder(CNF); |
156 | |
157 | // Add a conjunct for each variable that represents a top-level conjunction |
158 | // value in `Vals`. |
159 | for (const Formula *F : Formulas) |
160 | builder.addClause(Literals: posLit(V: GetVar(F))); |
161 | |
162 | // Add conjuncts that represent the mapping between newly-created variables |
163 | // and their corresponding sub-formulas. |
164 | std::queue<const Formula *> UnprocessedFormulas; |
165 | for (const Formula *F : Formulas) |
166 | UnprocessedFormulas.push(x: F); |
167 | while (!UnprocessedFormulas.empty()) { |
168 | const Formula *F = UnprocessedFormulas.front(); |
169 | UnprocessedFormulas.pop(); |
170 | const Variable Var = GetVar(F); |
171 | |
172 | if (ProcessedSubVals[Var]) |
173 | continue; |
174 | ProcessedSubVals[Var] = true; |
175 | |
176 | switch (F->kind()) { |
177 | case Formula::AtomRef: |
178 | break; |
179 | case Formula::Literal: |
180 | CNF.addClause(lits: F->literal() ? posLit(V: Var) : negLit(V: Var)); |
181 | break; |
182 | case Formula::And: { |
183 | const Variable LHS = GetVar(F->operands()[0]); |
184 | const Variable RHS = GetVar(F->operands()[1]); |
185 | |
186 | if (LHS == RHS) { |
187 | // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is |
188 | // already in conjunctive normal form. Below we add each of the |
189 | // conjuncts of the latter expression to the result. |
190 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS)}); |
191 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS)}); |
192 | } else { |
193 | // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v |
194 | // !B)` which is already in conjunctive normal form. Below we add each |
195 | // of the conjuncts of the latter expression to the result. |
196 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS)}); |
197 | builder.addClause(Literals: {negLit(V: Var), posLit(V: RHS)}); |
198 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS), negLit(V: RHS)}); |
199 | } |
200 | break; |
201 | } |
202 | case Formula::Or: { |
203 | const Variable LHS = GetVar(F->operands()[0]); |
204 | const Variable RHS = GetVar(F->operands()[1]); |
205 | |
206 | if (LHS == RHS) { |
207 | // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is |
208 | // already in conjunctive normal form. Below we add each of the |
209 | // conjuncts of the latter expression to the result. |
210 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS)}); |
211 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS)}); |
212 | } else { |
213 | // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v |
214 | // !B)` which is already in conjunctive normal form. Below we add each |
215 | // of the conjuncts of the latter expression to the result. |
216 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS), posLit(V: RHS)}); |
217 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS)}); |
218 | builder.addClause(Literals: {posLit(V: Var), negLit(V: RHS)}); |
219 | } |
220 | break; |
221 | } |
222 | case Formula::Not: { |
223 | const Variable Operand = GetVar(F->operands()[0]); |
224 | |
225 | // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is |
226 | // already in conjunctive normal form. Below we add each of the |
227 | // conjuncts of the latter expression to the result. |
228 | builder.addClause(Literals: {negLit(V: Var), negLit(V: Operand)}); |
229 | builder.addClause(Literals: {posLit(V: Var), posLit(V: Operand)}); |
230 | break; |
231 | } |
232 | case Formula::Implies: { |
233 | const Variable LHS = GetVar(F->operands()[0]); |
234 | const Variable RHS = GetVar(F->operands()[1]); |
235 | |
236 | // `X <=> (A => B)` is equivalent to |
237 | // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in |
238 | // conjunctive normal form. Below we add each of the conjuncts of |
239 | // the latter expression to the result. |
240 | builder.addClause(Literals: {posLit(V: Var), posLit(V: LHS)}); |
241 | builder.addClause(Literals: {posLit(V: Var), negLit(V: RHS)}); |
242 | builder.addClause(Literals: {negLit(V: Var), negLit(V: LHS), posLit(V: RHS)}); |
243 | break; |
244 | } |
245 | case Formula::Equal: { |
246 | const Variable LHS = GetVar(F->operands()[0]); |
247 | const Variable RHS = GetVar(F->operands()[1]); |
248 | |
249 | if (LHS == RHS) { |
250 | // `X <=> (A <=> A)` is equivalent to `X` which is already in |
251 | // conjunctive normal form. Below we add each of the conjuncts of the |
252 | // latter expression to the result. |
253 | builder.addClause(Literals: posLit(V: Var)); |
254 | |
255 | // No need to visit the sub-values of `Val`. |
256 | continue; |
257 | } |
258 | // `X <=> (A <=> B)` is equivalent to |
259 | // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which |
260 | // is already in conjunctive normal form. Below we add each of the |
261 | // conjuncts of the latter expression to the result. |
262 | builder.addClause(Literals: {posLit(V: Var), posLit(V: LHS), posLit(V: RHS)}); |
263 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS), negLit(V: RHS)}); |
264 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS), negLit(V: RHS)}); |
265 | builder.addClause(Literals: {negLit(V: Var), negLit(V: LHS), posLit(V: RHS)}); |
266 | break; |
267 | } |
268 | } |
269 | if (builder.isKnownContradictory()) { |
270 | return CNF; |
271 | } |
272 | for (const Formula *Child : F->operands()) |
273 | UnprocessedFormulas.push(x: Child); |
274 | } |
275 | |
276 | // Unit clauses that were added later were not |
277 | // considered for the simplification of earlier clauses. Do a final |
278 | // pass to find more opportunities for simplification. |
279 | CNFFormula FinalCNF(NextVar - 1); |
280 | CNFFormulaBuilder FinalBuilder(FinalCNF); |
281 | |
282 | // Collect unit clauses. |
283 | for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { |
284 | if (CNF.clauseSize(C) == 1) { |
285 | FinalBuilder.addClause(Literals: CNF.clauseLiterals(C)[0]); |
286 | } |
287 | } |
288 | |
289 | // Add all clauses that were added previously, preserving the order. |
290 | for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { |
291 | FinalBuilder.addClause(Literals: CNF.clauseLiterals(C)); |
292 | if (FinalBuilder.isKnownContradictory()) { |
293 | break; |
294 | } |
295 | } |
296 | // It is possible there were new unit clauses again, but |
297 | // we stop here and leave the rest to the solver algorithm. |
298 | return FinalCNF; |
299 | } |
300 | |
301 | } // namespace dataflow |
302 | } // namespace clang |
303 | |