1 | //===- CNFFormula.cpp -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // A representation of a boolean formula in 3-CNF. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "clang/Analysis/FlowSensitive/CNFFormula.h" |
14 | #include "llvm/ADT/DenseSet.h" |
15 | |
16 | #include <queue> |
17 | |
18 | namespace clang { |
19 | namespace dataflow { |
20 | |
21 | namespace { |
22 | |
23 | /// Applies simplifications while building up a BooleanFormula. |
24 | /// We keep track of unit clauses, which tell us variables that must be |
25 | /// true/false in any model that satisfies the overall formula. |
26 | /// Such variables can be dropped from subsequently-added clauses, which |
27 | /// may in turn yield more unit clauses or even a contradiction. |
28 | /// The total added complexity of this preprocessing is O(N) where we |
29 | /// for every clause, we do a lookup for each unit clauses. |
30 | /// The lookup is O(1) on average. This method won't catch all |
31 | /// contradictory formulas, more passes can in principle catch |
32 | /// more cases but we leave all these and the general case to the |
33 | /// proper SAT solver. |
34 | struct CNFFormulaBuilder { |
35 | // Formula should outlive CNFFormulaBuilder. |
36 | explicit CNFFormulaBuilder(CNFFormula &CNF) : Formula(CNF) {} |
37 | |
38 | /// Adds the `L1 v ... v Ln` clause to the formula. Applies |
39 | /// simplifications, based on single-literal clauses. |
40 | /// |
41 | /// Requirements: |
42 | /// |
43 | /// `Li` must not be `NullLit`. |
44 | /// |
45 | /// All literals must be distinct. |
46 | void addClause(ArrayRef<Literal> Literals) { |
47 | // We generate clauses with up to 3 literals in this file. |
48 | assert(!Literals.empty() && Literals.size() <= 3); |
49 | // Contains literals of the simplified clause. |
50 | llvm::SmallVector<Literal> Simplified; |
51 | for (auto L : Literals) { |
52 | assert(L != NullLit && |
53 | llvm::all_of(Simplified, [L](Literal S) { return S != L; })); |
54 | auto X = var(L); |
55 | if (trueVars.contains(V: X)) { // X must be true |
56 | if (isPosLit(L)) |
57 | return; // Omit clause `(... v X v ...)`, it is `true`. |
58 | else |
59 | continue; // Omit `!X` from `(... v !X v ...)`. |
60 | } |
61 | if (falseVars.contains(V: X)) { // X must be false |
62 | if (isNegLit(L)) |
63 | return; // Omit clause `(... v !X v ...)`, it is `true`. |
64 | else |
65 | continue; // Omit `X` from `(... v X v ...)`. |
66 | } |
67 | Simplified.push_back(Elt: L); |
68 | } |
69 | if (Simplified.empty()) { |
70 | // Simplification made the clause empty, which is equivalent to `false`. |
71 | // We already know that this formula is unsatisfiable. |
72 | Formula.addClause(lits: Simplified); |
73 | return; |
74 | } |
75 | if (Simplified.size() == 1) { |
76 | // We have new unit clause. |
77 | const Literal lit = Simplified.front(); |
78 | const Variable v = var(L: lit); |
79 | if (isPosLit(L: lit)) |
80 | trueVars.insert(V: v); |
81 | else |
82 | falseVars.insert(V: v); |
83 | } |
84 | Formula.addClause(lits: Simplified); |
85 | } |
86 | |
87 | /// Returns true if we observed a contradiction while adding clauses. |
88 | /// In this case then the formula is already known to be unsatisfiable. |
89 | bool isKnownContradictory() { return Formula.knownContradictory(); } |
90 | |
91 | private: |
92 | CNFFormula &Formula; |
93 | llvm::DenseSet<Variable> trueVars; |
94 | llvm::DenseSet<Variable> falseVars; |
95 | }; |
96 | |
97 | } // namespace |
98 | |
99 | CNFFormula::CNFFormula(Variable LargestVar) |
100 | : LargestVar(LargestVar), KnownContradictory(false) { |
101 | Clauses.push_back(x: 0); |
102 | ClauseStarts.push_back(x: 0); |
103 | } |
104 | |
105 | void CNFFormula::addClause(ArrayRef<Literal> lits) { |
106 | assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; })); |
107 | |
108 | if (lits.empty()) |
109 | KnownContradictory = true; |
110 | |
111 | const size_t S = Clauses.size(); |
112 | ClauseStarts.push_back(x: S); |
113 | Clauses.insert(position: Clauses.end(), first: lits.begin(), last: lits.end()); |
114 | } |
115 | |
116 | CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Formulas, |
117 | llvm::DenseMap<Variable, Atom> &Atomics) { |
118 | // The general strategy of the algorithm implemented below is to map each |
119 | // of the sub-values in `Vals` to a unique variable and use these variables in |
120 | // the resulting CNF expression to avoid exponential blow up. The number of |
121 | // literals in the resulting formula is guaranteed to be linear in the number |
122 | // of sub-formulas in `Vals`. |
123 | |
124 | // Map each sub-formula in `Vals` to a unique variable. |
125 | llvm::DenseMap<const Formula *, Variable> FormulaToVar; |
126 | // Store variable identifiers and Atom of atomic booleans. |
127 | Variable NextVar = 1; |
128 | { |
129 | std::queue<const Formula *> UnprocessedFormulas; |
130 | for (const Formula *F : Formulas) |
131 | UnprocessedFormulas.push(x: F); |
132 | while (!UnprocessedFormulas.empty()) { |
133 | Variable Var = NextVar; |
134 | const Formula *F = UnprocessedFormulas.front(); |
135 | UnprocessedFormulas.pop(); |
136 | |
137 | if (!FormulaToVar.try_emplace(Key: F, Args&: Var).second) |
138 | continue; |
139 | ++NextVar; |
140 | |
141 | for (const Formula *Op : F->operands()) |
142 | UnprocessedFormulas.push(x: Op); |
143 | if (F->kind() == Formula::AtomRef) |
144 | Atomics[Var] = F->getAtom(); |
145 | } |
146 | } |
147 | |
148 | auto GetVar = [&FormulaToVar](const Formula *F) { |
149 | auto ValIt = FormulaToVar.find(Val: F); |
150 | assert(ValIt != FormulaToVar.end()); |
151 | return ValIt->second; |
152 | }; |
153 | |
154 | CNFFormula CNF(NextVar - 1); |
155 | std::vector<bool> ProcessedSubVals(NextVar, false); |
156 | CNFFormulaBuilder builder(CNF); |
157 | |
158 | // Add a conjunct for each variable that represents a top-level conjunction |
159 | // value in `Vals`. |
160 | for (const Formula *F : Formulas) |
161 | builder.addClause(Literals: posLit(V: GetVar(F))); |
162 | |
163 | // Add conjuncts that represent the mapping between newly-created variables |
164 | // and their corresponding sub-formulas. |
165 | std::queue<const Formula *> UnprocessedFormulas; |
166 | for (const Formula *F : Formulas) |
167 | UnprocessedFormulas.push(x: F); |
168 | while (!UnprocessedFormulas.empty()) { |
169 | const Formula *F = UnprocessedFormulas.front(); |
170 | UnprocessedFormulas.pop(); |
171 | const Variable Var = GetVar(F); |
172 | |
173 | if (ProcessedSubVals[Var]) |
174 | continue; |
175 | ProcessedSubVals[Var] = true; |
176 | |
177 | switch (F->kind()) { |
178 | case Formula::AtomRef: |
179 | break; |
180 | case Formula::Literal: |
181 | CNF.addClause(lits: F->literal() ? posLit(V: Var) : negLit(V: Var)); |
182 | break; |
183 | case Formula::And: { |
184 | const Variable LHS = GetVar(F->operands()[0]); |
185 | const Variable RHS = GetVar(F->operands()[1]); |
186 | |
187 | if (LHS == RHS) { |
188 | // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is |
189 | // already in conjunctive normal form. Below we add each of the |
190 | // conjuncts of the latter expression to the result. |
191 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS)}); |
192 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS)}); |
193 | } else { |
194 | // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v |
195 | // !B)` which is already in conjunctive normal form. Below we add each |
196 | // of the conjuncts of the latter expression to the result. |
197 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS)}); |
198 | builder.addClause(Literals: {negLit(V: Var), posLit(V: RHS)}); |
199 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS), negLit(V: RHS)}); |
200 | } |
201 | break; |
202 | } |
203 | case Formula::Or: { |
204 | const Variable LHS = GetVar(F->operands()[0]); |
205 | const Variable RHS = GetVar(F->operands()[1]); |
206 | |
207 | if (LHS == RHS) { |
208 | // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is |
209 | // already in conjunctive normal form. Below we add each of the |
210 | // conjuncts of the latter expression to the result. |
211 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS)}); |
212 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS)}); |
213 | } else { |
214 | // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v |
215 | // !B)` which is already in conjunctive normal form. Below we add each |
216 | // of the conjuncts of the latter expression to the result. |
217 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS), posLit(V: RHS)}); |
218 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS)}); |
219 | builder.addClause(Literals: {posLit(V: Var), negLit(V: RHS)}); |
220 | } |
221 | break; |
222 | } |
223 | case Formula::Not: { |
224 | const Variable Operand = GetVar(F->operands()[0]); |
225 | |
226 | // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is |
227 | // already in conjunctive normal form. Below we add each of the |
228 | // conjuncts of the latter expression to the result. |
229 | builder.addClause(Literals: {negLit(V: Var), negLit(V: Operand)}); |
230 | builder.addClause(Literals: {posLit(V: Var), posLit(V: Operand)}); |
231 | break; |
232 | } |
233 | case Formula::Implies: { |
234 | const Variable LHS = GetVar(F->operands()[0]); |
235 | const Variable RHS = GetVar(F->operands()[1]); |
236 | |
237 | // `X <=> (A => B)` is equivalent to |
238 | // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in |
239 | // conjunctive normal form. Below we add each of the conjuncts of |
240 | // the latter expression to the result. |
241 | builder.addClause(Literals: {posLit(V: Var), posLit(V: LHS)}); |
242 | builder.addClause(Literals: {posLit(V: Var), negLit(V: RHS)}); |
243 | builder.addClause(Literals: {negLit(V: Var), negLit(V: LHS), posLit(V: RHS)}); |
244 | break; |
245 | } |
246 | case Formula::Equal: { |
247 | const Variable LHS = GetVar(F->operands()[0]); |
248 | const Variable RHS = GetVar(F->operands()[1]); |
249 | |
250 | if (LHS == RHS) { |
251 | // `X <=> (A <=> A)` is equivalent to `X` which is already in |
252 | // conjunctive normal form. Below we add each of the conjuncts of the |
253 | // latter expression to the result. |
254 | builder.addClause(Literals: posLit(V: Var)); |
255 | |
256 | // No need to visit the sub-values of `Val`. |
257 | continue; |
258 | } |
259 | // `X <=> (A <=> B)` is equivalent to |
260 | // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which |
261 | // is already in conjunctive normal form. Below we add each of the |
262 | // conjuncts of the latter expression to the result. |
263 | builder.addClause(Literals: {posLit(V: Var), posLit(V: LHS), posLit(V: RHS)}); |
264 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS), negLit(V: RHS)}); |
265 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS), negLit(V: RHS)}); |
266 | builder.addClause(Literals: {negLit(V: Var), negLit(V: LHS), posLit(V: RHS)}); |
267 | break; |
268 | } |
269 | } |
270 | if (builder.isKnownContradictory()) { |
271 | return CNF; |
272 | } |
273 | for (const Formula *Child : F->operands()) |
274 | UnprocessedFormulas.push(x: Child); |
275 | } |
276 | |
277 | // Unit clauses that were added later were not |
278 | // considered for the simplification of earlier clauses. Do a final |
279 | // pass to find more opportunities for simplification. |
280 | CNFFormula FinalCNF(NextVar - 1); |
281 | CNFFormulaBuilder FinalBuilder(FinalCNF); |
282 | |
283 | // Collect unit clauses. |
284 | for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { |
285 | if (CNF.clauseSize(C) == 1) { |
286 | FinalBuilder.addClause(Literals: CNF.clauseLiterals(C)[0]); |
287 | } |
288 | } |
289 | |
290 | // Add all clauses that were added previously, preserving the order. |
291 | for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { |
292 | FinalBuilder.addClause(Literals: CNF.clauseLiterals(C)); |
293 | if (FinalBuilder.isKnownContradictory()) { |
294 | break; |
295 | } |
296 | } |
297 | // It is possible there were new unit clauses again, but |
298 | // we stop here and leave the rest to the solver algorithm. |
299 | return FinalCNF; |
300 | } |
301 | |
302 | } // namespace dataflow |
303 | } // namespace clang |
304 | |