1//===- CNFFormula.cpp -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// A representation of a boolean formula in 3-CNF.
10//
11//===----------------------------------------------------------------------===//
12
13#include "clang/Analysis/FlowSensitive/CNFFormula.h"
14#include "llvm/ADT/DenseSet.h"
15
16#include <queue>
17
18namespace clang {
19namespace dataflow {
20
21namespace {
22
23/// Applies simplifications while building up a BooleanFormula.
24/// We keep track of unit clauses, which tell us variables that must be
25/// true/false in any model that satisfies the overall formula.
26/// Such variables can be dropped from subsequently-added clauses, which
27/// may in turn yield more unit clauses or even a contradiction.
28/// The total added complexity of this preprocessing is O(N) where we
29/// for every clause, we do a lookup for each unit clauses.
30/// The lookup is O(1) on average. This method won't catch all
31/// contradictory formulas, more passes can in principle catch
32/// more cases but we leave all these and the general case to the
33/// proper SAT solver.
34struct CNFFormulaBuilder {
35 // Formula should outlive CNFFormulaBuilder.
36 explicit CNFFormulaBuilder(CNFFormula &CNF) : Formula(CNF) {}
37
38 /// Adds the `L1 v ... v Ln` clause to the formula. Applies
39 /// simplifications, based on single-literal clauses.
40 ///
41 /// Requirements:
42 ///
43 /// `Li` must not be `NullLit`.
44 ///
45 /// All literals must be distinct.
46 void addClause(ArrayRef<Literal> Literals) {
47 // We generate clauses with up to 3 literals in this file.
48 assert(!Literals.empty() && Literals.size() <= 3);
49 // Contains literals of the simplified clause.
50 llvm::SmallVector<Literal> Simplified;
51 for (auto L : Literals) {
52 assert(L != NullLit &&
53 llvm::all_of(Simplified, [L](Literal S) { return S != L; }));
54 auto X = var(L);
55 if (trueVars.contains(V: X)) { // X must be true
56 if (isPosLit(L))
57 return; // Omit clause `(... v X v ...)`, it is `true`.
58 else
59 continue; // Omit `!X` from `(... v !X v ...)`.
60 }
61 if (falseVars.contains(V: X)) { // X must be false
62 if (isNegLit(L))
63 return; // Omit clause `(... v !X v ...)`, it is `true`.
64 else
65 continue; // Omit `X` from `(... v X v ...)`.
66 }
67 Simplified.push_back(Elt: L);
68 }
69 if (Simplified.empty()) {
70 // Simplification made the clause empty, which is equivalent to `false`.
71 // We already know that this formula is unsatisfiable.
72 Formula.addClause(lits: Simplified);
73 return;
74 }
75 if (Simplified.size() == 1) {
76 // We have new unit clause.
77 const Literal lit = Simplified.front();
78 const Variable v = var(L: lit);
79 if (isPosLit(L: lit))
80 trueVars.insert(V: v);
81 else
82 falseVars.insert(V: v);
83 }
84 Formula.addClause(lits: Simplified);
85 }
86
87 /// Returns true if we observed a contradiction while adding clauses.
88 /// In this case then the formula is already known to be unsatisfiable.
89 bool isKnownContradictory() { return Formula.knownContradictory(); }
90
91private:
92 CNFFormula &Formula;
93 llvm::DenseSet<Variable> trueVars;
94 llvm::DenseSet<Variable> falseVars;
95};
96
97} // namespace
98
99CNFFormula::CNFFormula(Variable LargestVar)
100 : LargestVar(LargestVar), KnownContradictory(false) {
101 Clauses.push_back(x: 0);
102 ClauseStarts.push_back(x: 0);
103}
104
105void CNFFormula::addClause(ArrayRef<Literal> lits) {
106 assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; }));
107
108 if (lits.empty())
109 KnownContradictory = true;
110
111 const size_t S = Clauses.size();
112 ClauseStarts.push_back(x: S);
113 Clauses.insert(position: Clauses.end(), first: lits.begin(), last: lits.end());
114}
115
116CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Formulas,
117 llvm::DenseMap<Variable, Atom> &Atomics) {
118 // The general strategy of the algorithm implemented below is to map each
119 // of the sub-values in `Vals` to a unique variable and use these variables in
120 // the resulting CNF expression to avoid exponential blow up. The number of
121 // literals in the resulting formula is guaranteed to be linear in the number
122 // of sub-formulas in `Vals`.
123
124 // Map each sub-formula in `Vals` to a unique variable.
125 llvm::DenseMap<const Formula *, Variable> FormulaToVar;
126 // Store variable identifiers and Atom of atomic booleans.
127 Variable NextVar = 1;
128 {
129 std::queue<const Formula *> UnprocessedFormulas;
130 for (const Formula *F : Formulas)
131 UnprocessedFormulas.push(x: F);
132 while (!UnprocessedFormulas.empty()) {
133 Variable Var = NextVar;
134 const Formula *F = UnprocessedFormulas.front();
135 UnprocessedFormulas.pop();
136
137 if (!FormulaToVar.try_emplace(Key: F, Args&: Var).second)
138 continue;
139 ++NextVar;
140
141 for (const Formula *Op : F->operands())
142 UnprocessedFormulas.push(x: Op);
143 if (F->kind() == Formula::AtomRef)
144 Atomics[Var] = F->getAtom();
145 }
146 }
147
148 auto GetVar = [&FormulaToVar](const Formula *F) {
149 auto ValIt = FormulaToVar.find(Val: F);
150 assert(ValIt != FormulaToVar.end());
151 return ValIt->second;
152 };
153
154 CNFFormula CNF(NextVar - 1);
155 std::vector<bool> ProcessedSubVals(NextVar, false);
156 CNFFormulaBuilder builder(CNF);
157
158 // Add a conjunct for each variable that represents a top-level conjunction
159 // value in `Vals`.
160 for (const Formula *F : Formulas)
161 builder.addClause(Literals: posLit(V: GetVar(F)));
162
163 // Add conjuncts that represent the mapping between newly-created variables
164 // and their corresponding sub-formulas.
165 std::queue<const Formula *> UnprocessedFormulas;
166 for (const Formula *F : Formulas)
167 UnprocessedFormulas.push(x: F);
168 while (!UnprocessedFormulas.empty()) {
169 const Formula *F = UnprocessedFormulas.front();
170 UnprocessedFormulas.pop();
171 const Variable Var = GetVar(F);
172
173 if (ProcessedSubVals[Var])
174 continue;
175 ProcessedSubVals[Var] = true;
176
177 switch (F->kind()) {
178 case Formula::AtomRef:
179 break;
180 case Formula::Literal:
181 CNF.addClause(lits: F->literal() ? posLit(V: Var) : negLit(V: Var));
182 break;
183 case Formula::And: {
184 const Variable LHS = GetVar(F->operands()[0]);
185 const Variable RHS = GetVar(F->operands()[1]);
186
187 if (LHS == RHS) {
188 // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is
189 // already in conjunctive normal form. Below we add each of the
190 // conjuncts of the latter expression to the result.
191 builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS)});
192 builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS)});
193 } else {
194 // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v
195 // !B)` which is already in conjunctive normal form. Below we add each
196 // of the conjuncts of the latter expression to the result.
197 builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS)});
198 builder.addClause(Literals: {negLit(V: Var), posLit(V: RHS)});
199 builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS), negLit(V: RHS)});
200 }
201 break;
202 }
203 case Formula::Or: {
204 const Variable LHS = GetVar(F->operands()[0]);
205 const Variable RHS = GetVar(F->operands()[1]);
206
207 if (LHS == RHS) {
208 // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is
209 // already in conjunctive normal form. Below we add each of the
210 // conjuncts of the latter expression to the result.
211 builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS)});
212 builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS)});
213 } else {
214 // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v
215 // !B)` which is already in conjunctive normal form. Below we add each
216 // of the conjuncts of the latter expression to the result.
217 builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS), posLit(V: RHS)});
218 builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS)});
219 builder.addClause(Literals: {posLit(V: Var), negLit(V: RHS)});
220 }
221 break;
222 }
223 case Formula::Not: {
224 const Variable Operand = GetVar(F->operands()[0]);
225
226 // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is
227 // already in conjunctive normal form. Below we add each of the
228 // conjuncts of the latter expression to the result.
229 builder.addClause(Literals: {negLit(V: Var), negLit(V: Operand)});
230 builder.addClause(Literals: {posLit(V: Var), posLit(V: Operand)});
231 break;
232 }
233 case Formula::Implies: {
234 const Variable LHS = GetVar(F->operands()[0]);
235 const Variable RHS = GetVar(F->operands()[1]);
236
237 // `X <=> (A => B)` is equivalent to
238 // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in
239 // conjunctive normal form. Below we add each of the conjuncts of
240 // the latter expression to the result.
241 builder.addClause(Literals: {posLit(V: Var), posLit(V: LHS)});
242 builder.addClause(Literals: {posLit(V: Var), negLit(V: RHS)});
243 builder.addClause(Literals: {negLit(V: Var), negLit(V: LHS), posLit(V: RHS)});
244 break;
245 }
246 case Formula::Equal: {
247 const Variable LHS = GetVar(F->operands()[0]);
248 const Variable RHS = GetVar(F->operands()[1]);
249
250 if (LHS == RHS) {
251 // `X <=> (A <=> A)` is equivalent to `X` which is already in
252 // conjunctive normal form. Below we add each of the conjuncts of the
253 // latter expression to the result.
254 builder.addClause(Literals: posLit(V: Var));
255
256 // No need to visit the sub-values of `Val`.
257 continue;
258 }
259 // `X <=> (A <=> B)` is equivalent to
260 // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which
261 // is already in conjunctive normal form. Below we add each of the
262 // conjuncts of the latter expression to the result.
263 builder.addClause(Literals: {posLit(V: Var), posLit(V: LHS), posLit(V: RHS)});
264 builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS), negLit(V: RHS)});
265 builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS), negLit(V: RHS)});
266 builder.addClause(Literals: {negLit(V: Var), negLit(V: LHS), posLit(V: RHS)});
267 break;
268 }
269 }
270 if (builder.isKnownContradictory()) {
271 return CNF;
272 }
273 for (const Formula *Child : F->operands())
274 UnprocessedFormulas.push(x: Child);
275 }
276
277 // Unit clauses that were added later were not
278 // considered for the simplification of earlier clauses. Do a final
279 // pass to find more opportunities for simplification.
280 CNFFormula FinalCNF(NextVar - 1);
281 CNFFormulaBuilder FinalBuilder(FinalCNF);
282
283 // Collect unit clauses.
284 for (ClauseID C = 1; C <= CNF.numClauses(); ++C) {
285 if (CNF.clauseSize(C) == 1) {
286 FinalBuilder.addClause(Literals: CNF.clauseLiterals(C)[0]);
287 }
288 }
289
290 // Add all clauses that were added previously, preserving the order.
291 for (ClauseID C = 1; C <= CNF.numClauses(); ++C) {
292 FinalBuilder.addClause(Literals: CNF.clauseLiterals(C));
293 if (FinalBuilder.isKnownContradictory()) {
294 break;
295 }
296 }
297 // It is possible there were new unit clauses again, but
298 // we stop here and leave the rest to the solver algorithm.
299 return FinalCNF;
300}
301
302} // namespace dataflow
303} // namespace clang
304