| 1 | //===- CNFFormula.cpp -------------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // A representation of a boolean formula in 3-CNF. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "clang/Analysis/FlowSensitive/CNFFormula.h" |
| 14 | #include "llvm/ADT/DenseSet.h" |
| 15 | |
| 16 | #include <queue> |
| 17 | |
| 18 | namespace clang { |
| 19 | namespace dataflow { |
| 20 | |
| 21 | namespace { |
| 22 | |
| 23 | /// Applies simplifications while building up a BooleanFormula. |
| 24 | /// We keep track of unit clauses, which tell us variables that must be |
| 25 | /// true/false in any model that satisfies the overall formula. |
| 26 | /// Such variables can be dropped from subsequently-added clauses, which |
| 27 | /// may in turn yield more unit clauses or even a contradiction. |
| 28 | /// The total added complexity of this preprocessing is O(N) where we |
| 29 | /// for every clause, we do a lookup for each unit clauses. |
| 30 | /// The lookup is O(1) on average. This method won't catch all |
| 31 | /// contradictory formulas, more passes can in principle catch |
| 32 | /// more cases but we leave all these and the general case to the |
| 33 | /// proper SAT solver. |
| 34 | struct CNFFormulaBuilder { |
| 35 | // Formula should outlive CNFFormulaBuilder. |
| 36 | explicit CNFFormulaBuilder(CNFFormula &CNF) : Formula(CNF) {} |
| 37 | |
| 38 | /// Adds the `L1 v ... v Ln` clause to the formula. Applies |
| 39 | /// simplifications, based on single-literal clauses. |
| 40 | /// |
| 41 | /// Requirements: |
| 42 | /// |
| 43 | /// `Li` must not be `NullLit`. |
| 44 | /// |
| 45 | /// All literals must be distinct. |
| 46 | void addClause(ArrayRef<Literal> Literals) { |
| 47 | // We generate clauses with up to 3 literals in this file. |
| 48 | assert(!Literals.empty() && Literals.size() <= 3); |
| 49 | // Contains literals of the simplified clause. |
| 50 | llvm::SmallVector<Literal> Simplified; |
| 51 | for (auto L : Literals) { |
| 52 | assert(L != NullLit && !llvm::is_contained(Simplified, L)); |
| 53 | auto X = var(L); |
| 54 | if (trueVars.contains(V: X)) { // X must be true |
| 55 | if (isPosLit(L)) |
| 56 | return; // Omit clause `(... v X v ...)`, it is `true`. |
| 57 | else |
| 58 | continue; // Omit `!X` from `(... v !X v ...)`. |
| 59 | } |
| 60 | if (falseVars.contains(V: X)) { // X must be false |
| 61 | if (isNegLit(L)) |
| 62 | return; // Omit clause `(... v !X v ...)`, it is `true`. |
| 63 | else |
| 64 | continue; // Omit `X` from `(... v X v ...)`. |
| 65 | } |
| 66 | Simplified.push_back(Elt: L); |
| 67 | } |
| 68 | if (Simplified.empty()) { |
| 69 | // Simplification made the clause empty, which is equivalent to `false`. |
| 70 | // We already know that this formula is unsatisfiable. |
| 71 | Formula.addClause(lits: Simplified); |
| 72 | return; |
| 73 | } |
| 74 | if (Simplified.size() == 1) { |
| 75 | // We have new unit clause. |
| 76 | const Literal lit = Simplified.front(); |
| 77 | const Variable v = var(L: lit); |
| 78 | if (isPosLit(L: lit)) |
| 79 | trueVars.insert(V: v); |
| 80 | else |
| 81 | falseVars.insert(V: v); |
| 82 | } |
| 83 | Formula.addClause(lits: Simplified); |
| 84 | } |
| 85 | |
| 86 | /// Returns true if we observed a contradiction while adding clauses. |
| 87 | /// In this case then the formula is already known to be unsatisfiable. |
| 88 | bool isKnownContradictory() { return Formula.knownContradictory(); } |
| 89 | |
| 90 | private: |
| 91 | CNFFormula &Formula; |
| 92 | llvm::DenseSet<Variable> trueVars; |
| 93 | llvm::DenseSet<Variable> falseVars; |
| 94 | }; |
| 95 | |
| 96 | } // namespace |
| 97 | |
| 98 | CNFFormula::CNFFormula(Variable LargestVar) |
| 99 | : LargestVar(LargestVar), KnownContradictory(false) { |
| 100 | Clauses.push_back(x: 0); |
| 101 | ClauseStarts.push_back(x: 0); |
| 102 | } |
| 103 | |
| 104 | void CNFFormula::addClause(ArrayRef<Literal> lits) { |
| 105 | assert(!llvm::is_contained(lits, NullLit)); |
| 106 | |
| 107 | if (lits.empty()) |
| 108 | KnownContradictory = true; |
| 109 | |
| 110 | const size_t S = Clauses.size(); |
| 111 | ClauseStarts.push_back(x: S); |
| 112 | llvm::append_range(C&: Clauses, R&: lits); |
| 113 | } |
| 114 | |
| 115 | CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Formulas, |
| 116 | llvm::DenseMap<Variable, Atom> &Atomics) { |
| 117 | // The general strategy of the algorithm implemented below is to map each |
| 118 | // of the sub-values in `Vals` to a unique variable and use these variables in |
| 119 | // the resulting CNF expression to avoid exponential blow up. The number of |
| 120 | // literals in the resulting formula is guaranteed to be linear in the number |
| 121 | // of sub-formulas in `Vals`. |
| 122 | |
| 123 | // Map each sub-formula in `Vals` to a unique variable. |
| 124 | llvm::DenseMap<const Formula *, Variable> FormulaToVar; |
| 125 | // Store variable identifiers and Atom of atomic booleans. |
| 126 | Variable NextVar = 1; |
| 127 | { |
| 128 | std::queue<const Formula *> UnprocessedFormulas; |
| 129 | for (const Formula *F : Formulas) |
| 130 | UnprocessedFormulas.push(x: F); |
| 131 | while (!UnprocessedFormulas.empty()) { |
| 132 | Variable Var = NextVar; |
| 133 | const Formula *F = UnprocessedFormulas.front(); |
| 134 | UnprocessedFormulas.pop(); |
| 135 | |
| 136 | if (!FormulaToVar.try_emplace(Key: F, Args&: Var).second) |
| 137 | continue; |
| 138 | ++NextVar; |
| 139 | |
| 140 | for (const Formula *Op : F->operands()) |
| 141 | UnprocessedFormulas.push(x: Op); |
| 142 | if (F->kind() == Formula::AtomRef) |
| 143 | Atomics[Var] = F->getAtom(); |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | auto GetVar = [&FormulaToVar](const Formula *F) { |
| 148 | auto ValIt = FormulaToVar.find(Val: F); |
| 149 | assert(ValIt != FormulaToVar.end()); |
| 150 | return ValIt->second; |
| 151 | }; |
| 152 | |
| 153 | CNFFormula CNF(NextVar - 1); |
| 154 | std::vector<bool> ProcessedSubVals(NextVar, false); |
| 155 | CNFFormulaBuilder builder(CNF); |
| 156 | |
| 157 | // Add a conjunct for each variable that represents a top-level conjunction |
| 158 | // value in `Vals`. |
| 159 | for (const Formula *F : Formulas) |
| 160 | builder.addClause(Literals: posLit(V: GetVar(F))); |
| 161 | |
| 162 | // Add conjuncts that represent the mapping between newly-created variables |
| 163 | // and their corresponding sub-formulas. |
| 164 | std::queue<const Formula *> UnprocessedFormulas; |
| 165 | for (const Formula *F : Formulas) |
| 166 | UnprocessedFormulas.push(x: F); |
| 167 | while (!UnprocessedFormulas.empty()) { |
| 168 | const Formula *F = UnprocessedFormulas.front(); |
| 169 | UnprocessedFormulas.pop(); |
| 170 | const Variable Var = GetVar(F); |
| 171 | |
| 172 | if (ProcessedSubVals[Var]) |
| 173 | continue; |
| 174 | ProcessedSubVals[Var] = true; |
| 175 | |
| 176 | switch (F->kind()) { |
| 177 | case Formula::AtomRef: |
| 178 | break; |
| 179 | case Formula::Literal: |
| 180 | CNF.addClause(lits: F->literal() ? posLit(V: Var) : negLit(V: Var)); |
| 181 | break; |
| 182 | case Formula::And: { |
| 183 | const Variable LHS = GetVar(F->operands()[0]); |
| 184 | const Variable RHS = GetVar(F->operands()[1]); |
| 185 | |
| 186 | if (LHS == RHS) { |
| 187 | // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is |
| 188 | // already in conjunctive normal form. Below we add each of the |
| 189 | // conjuncts of the latter expression to the result. |
| 190 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS)}); |
| 191 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS)}); |
| 192 | } else { |
| 193 | // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v |
| 194 | // !B)` which is already in conjunctive normal form. Below we add each |
| 195 | // of the conjuncts of the latter expression to the result. |
| 196 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS)}); |
| 197 | builder.addClause(Literals: {negLit(V: Var), posLit(V: RHS)}); |
| 198 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS), negLit(V: RHS)}); |
| 199 | } |
| 200 | break; |
| 201 | } |
| 202 | case Formula::Or: { |
| 203 | const Variable LHS = GetVar(F->operands()[0]); |
| 204 | const Variable RHS = GetVar(F->operands()[1]); |
| 205 | |
| 206 | if (LHS == RHS) { |
| 207 | // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is |
| 208 | // already in conjunctive normal form. Below we add each of the |
| 209 | // conjuncts of the latter expression to the result. |
| 210 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS)}); |
| 211 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS)}); |
| 212 | } else { |
| 213 | // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v |
| 214 | // !B)` which is already in conjunctive normal form. Below we add each |
| 215 | // of the conjuncts of the latter expression to the result. |
| 216 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS), posLit(V: RHS)}); |
| 217 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS)}); |
| 218 | builder.addClause(Literals: {posLit(V: Var), negLit(V: RHS)}); |
| 219 | } |
| 220 | break; |
| 221 | } |
| 222 | case Formula::Not: { |
| 223 | const Variable Operand = GetVar(F->operands()[0]); |
| 224 | |
| 225 | // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is |
| 226 | // already in conjunctive normal form. Below we add each of the |
| 227 | // conjuncts of the latter expression to the result. |
| 228 | builder.addClause(Literals: {negLit(V: Var), negLit(V: Operand)}); |
| 229 | builder.addClause(Literals: {posLit(V: Var), posLit(V: Operand)}); |
| 230 | break; |
| 231 | } |
| 232 | case Formula::Implies: { |
| 233 | const Variable LHS = GetVar(F->operands()[0]); |
| 234 | const Variable RHS = GetVar(F->operands()[1]); |
| 235 | |
| 236 | // `X <=> (A => B)` is equivalent to |
| 237 | // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in |
| 238 | // conjunctive normal form. Below we add each of the conjuncts of |
| 239 | // the latter expression to the result. |
| 240 | builder.addClause(Literals: {posLit(V: Var), posLit(V: LHS)}); |
| 241 | builder.addClause(Literals: {posLit(V: Var), negLit(V: RHS)}); |
| 242 | builder.addClause(Literals: {negLit(V: Var), negLit(V: LHS), posLit(V: RHS)}); |
| 243 | break; |
| 244 | } |
| 245 | case Formula::Equal: { |
| 246 | const Variable LHS = GetVar(F->operands()[0]); |
| 247 | const Variable RHS = GetVar(F->operands()[1]); |
| 248 | |
| 249 | if (LHS == RHS) { |
| 250 | // `X <=> (A <=> A)` is equivalent to `X` which is already in |
| 251 | // conjunctive normal form. Below we add each of the conjuncts of the |
| 252 | // latter expression to the result. |
| 253 | builder.addClause(Literals: posLit(V: Var)); |
| 254 | |
| 255 | // No need to visit the sub-values of `Val`. |
| 256 | continue; |
| 257 | } |
| 258 | // `X <=> (A <=> B)` is equivalent to |
| 259 | // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which |
| 260 | // is already in conjunctive normal form. Below we add each of the |
| 261 | // conjuncts of the latter expression to the result. |
| 262 | builder.addClause(Literals: {posLit(V: Var), posLit(V: LHS), posLit(V: RHS)}); |
| 263 | builder.addClause(Literals: {posLit(V: Var), negLit(V: LHS), negLit(V: RHS)}); |
| 264 | builder.addClause(Literals: {negLit(V: Var), posLit(V: LHS), negLit(V: RHS)}); |
| 265 | builder.addClause(Literals: {negLit(V: Var), negLit(V: LHS), posLit(V: RHS)}); |
| 266 | break; |
| 267 | } |
| 268 | } |
| 269 | if (builder.isKnownContradictory()) { |
| 270 | return CNF; |
| 271 | } |
| 272 | for (const Formula *Child : F->operands()) |
| 273 | UnprocessedFormulas.push(x: Child); |
| 274 | } |
| 275 | |
| 276 | // Unit clauses that were added later were not |
| 277 | // considered for the simplification of earlier clauses. Do a final |
| 278 | // pass to find more opportunities for simplification. |
| 279 | CNFFormula FinalCNF(NextVar - 1); |
| 280 | CNFFormulaBuilder FinalBuilder(FinalCNF); |
| 281 | |
| 282 | // Collect unit clauses. |
| 283 | for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { |
| 284 | if (CNF.clauseSize(C) == 1) { |
| 285 | FinalBuilder.addClause(Literals: CNF.clauseLiterals(C)[0]); |
| 286 | } |
| 287 | } |
| 288 | |
| 289 | // Add all clauses that were added previously, preserving the order. |
| 290 | for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { |
| 291 | FinalBuilder.addClause(Literals: CNF.clauseLiterals(C)); |
| 292 | if (FinalBuilder.isKnownContradictory()) { |
| 293 | break; |
| 294 | } |
| 295 | } |
| 296 | // It is possible there were new unit clauses again, but |
| 297 | // we stop here and leave the rest to the solver algorithm. |
| 298 | return FinalCNF; |
| 299 | } |
| 300 | |
| 301 | } // namespace dataflow |
| 302 | } // namespace clang |
| 303 | |