| 1 | //===- WatchedLiteralsSolver.cpp --------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines a SAT solver implementation that can be used by dataflow |
| 10 | // analyses. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include <cassert> |
| 15 | #include <vector> |
| 16 | |
| 17 | #include "clang/Analysis/FlowSensitive/CNFFormula.h" |
| 18 | #include "clang/Analysis/FlowSensitive/Formula.h" |
| 19 | #include "clang/Analysis/FlowSensitive/Solver.h" |
| 20 | #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" |
| 21 | #include "llvm/ADT/ArrayRef.h" |
| 22 | #include "llvm/ADT/DenseMap.h" |
| 23 | #include "llvm/ADT/DenseSet.h" |
| 24 | #include "llvm/ADT/STLExtras.h" |
| 25 | |
| 26 | |
| 27 | namespace clang { |
| 28 | namespace dataflow { |
| 29 | |
| 30 | namespace { |
| 31 | |
| 32 | class WatchedLiteralsSolverImpl { |
| 33 | /// Stores the variable identifier and Atom for atomic booleans in the |
| 34 | /// formula. |
| 35 | llvm::DenseMap<Variable, Atom> Atomics; |
| 36 | |
| 37 | /// A boolean formula in conjunctive normal form that the solver will attempt |
| 38 | /// to prove satisfiable. The formula will be modified in the process. |
| 39 | CNFFormula CNF; |
| 40 | |
| 41 | /// Maps literals (indices of the vector) to clause identifiers (elements of |
| 42 | /// the vector) that watch the respective literals. |
| 43 | /// |
| 44 | /// For a given clause, its watched literal is always its first literal in |
| 45 | /// `Clauses`. This invariant is maintained when watched literals change. |
| 46 | std::vector<ClauseID> WatchedHead; |
| 47 | |
| 48 | /// Maps clause identifiers (elements of the vector) to identifiers of other |
| 49 | /// clauses that watch the same literals, forming a set of linked lists. |
| 50 | /// |
| 51 | /// The element at index 0 stands for the identifier of the clause that |
| 52 | /// follows the null clause. It is set to 0 and isn't used. Identifiers of |
| 53 | /// clauses in the formula start from the element at index 1. |
| 54 | std::vector<ClauseID> NextWatched; |
| 55 | |
| 56 | /// The search for a satisfying assignment of the variables in `Formula` will |
| 57 | /// proceed in levels, starting from 1 and going up to `Formula.LargestVar` |
| 58 | /// (inclusive). The current level is stored in `Level`. At each level the |
| 59 | /// solver will assign a value to an unassigned variable. If this leads to a |
| 60 | /// consistent partial assignment, `Level` will be incremented. Otherwise, if |
| 61 | /// it results in a conflict, the solver will backtrack by decrementing |
| 62 | /// `Level` until it reaches the most recent level where a decision was made. |
| 63 | size_t Level = 0; |
| 64 | |
| 65 | /// Maps levels (indices of the vector) to variables (elements of the vector) |
| 66 | /// that are assigned values at the respective levels. |
| 67 | /// |
| 68 | /// The element at index 0 isn't used. Variables start from the element at |
| 69 | /// index 1. |
| 70 | std::vector<Variable> LevelVars; |
| 71 | |
| 72 | /// State of the solver at a particular level. |
| 73 | enum class State : uint8_t { |
| 74 | /// Indicates that the solver made a decision. |
| 75 | Decision = 0, |
| 76 | |
| 77 | /// Indicates that the solver made a forced move. |
| 78 | Forced = 1, |
| 79 | }; |
| 80 | |
| 81 | /// State of the solver at a particular level. It keeps track of previous |
| 82 | /// decisions that the solver can refer to when backtracking. |
| 83 | /// |
| 84 | /// The element at index 0 isn't used. States start from the element at index |
| 85 | /// 1. |
| 86 | std::vector<State> LevelStates; |
| 87 | |
| 88 | enum class Assignment : int8_t { |
| 89 | Unassigned = -1, |
| 90 | AssignedFalse = 0, |
| 91 | AssignedTrue = 1 |
| 92 | }; |
| 93 | |
| 94 | /// Maps variables (indices of the vector) to their assignments (elements of |
| 95 | /// the vector). |
| 96 | /// |
| 97 | /// The element at index 0 isn't used. Variable assignments start from the |
| 98 | /// element at index 1. |
| 99 | std::vector<Assignment> VarAssignments; |
| 100 | |
| 101 | /// A set of unassigned variables that appear in watched literals in |
| 102 | /// `Formula`. The vector is guaranteed to contain unique elements. |
| 103 | std::vector<Variable> ActiveVars; |
| 104 | |
| 105 | public: |
| 106 | explicit WatchedLiteralsSolverImpl( |
| 107 | const llvm::ArrayRef<const Formula *> &Vals) |
| 108 | // `Atomics` needs to be initialized first so that we can use it as an |
| 109 | // output argument of `buildCNF()`. |
| 110 | : Atomics(), CNF(buildCNF(Formulas: Vals, Atomics)), |
| 111 | LevelVars(CNF.largestVar() + 1), LevelStates(CNF.largestVar() + 1) { |
| 112 | assert(!Vals.empty()); |
| 113 | |
| 114 | // Skip initialization if the formula is known to be contradictory. |
| 115 | if (CNF.knownContradictory()) |
| 116 | return; |
| 117 | |
| 118 | // Initialize `NextWatched` and `WatchedHead`. |
| 119 | NextWatched.push_back(x: 0); |
| 120 | const size_t NumLiterals = 2 * CNF.largestVar() + 1; |
| 121 | WatchedHead.resize(new_size: NumLiterals + 1, x: 0); |
| 122 | for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { |
| 123 | // Designate the first literal as the "watched" literal of the clause. |
| 124 | Literal FirstLit = CNF.clauseLiterals(C).front(); |
| 125 | NextWatched.push_back(x: WatchedHead[FirstLit]); |
| 126 | WatchedHead[FirstLit] = C; |
| 127 | } |
| 128 | |
| 129 | // Initialize the state at the root level to a decision so that in |
| 130 | // `reverseForcedMoves` we don't have to check that `Level >= 0` on each |
| 131 | // iteration. |
| 132 | LevelStates[0] = State::Decision; |
| 133 | |
| 134 | // Initialize all variables as unassigned. |
| 135 | VarAssignments.resize(new_size: CNF.largestVar() + 1, x: Assignment::Unassigned); |
| 136 | |
| 137 | // Initialize the active variables. |
| 138 | for (Variable Var = CNF.largestVar(); Var != NullVar; --Var) { |
| 139 | if (isWatched(Lit: posLit(V: Var)) || isWatched(Lit: negLit(V: Var))) |
| 140 | ActiveVars.push_back(x: Var); |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | // Returns the `Result` and the number of iterations "remaining" from |
| 145 | // `MaxIterations` (that is, `MaxIterations` - iterations in this call). |
| 146 | std::pair<Solver::Result, std::int64_t> solve(std::int64_t MaxIterations) && { |
| 147 | if (CNF.knownContradictory()) { |
| 148 | // Short-cut the solving process. We already found out at CNF |
| 149 | // construction time that the formula is unsatisfiable. |
| 150 | return std::make_pair(x: Solver::Result::Unsatisfiable(), y&: MaxIterations); |
| 151 | } |
| 152 | size_t I = 0; |
| 153 | while (I < ActiveVars.size()) { |
| 154 | if (MaxIterations == 0) |
| 155 | return std::make_pair(x: Solver::Result::TimedOut(), y: 0); |
| 156 | --MaxIterations; |
| 157 | |
| 158 | // Assert that the following invariants hold: |
| 159 | // 1. All active variables are unassigned. |
| 160 | // 2. All active variables form watched literals. |
| 161 | // 3. Unassigned variables that form watched literals are active. |
| 162 | // FIXME: Consider replacing these with test cases that fail if the any |
| 163 | // of the invariants is broken. That might not be easy due to the |
| 164 | // transformations performed by `buildCNF`. |
| 165 | assert(activeVarsAreUnassigned()); |
| 166 | assert(activeVarsFormWatchedLiterals()); |
| 167 | assert(unassignedVarsFormingWatchedLiteralsAreActive()); |
| 168 | |
| 169 | const Variable ActiveVar = ActiveVars[I]; |
| 170 | |
| 171 | // Look for unit clauses that contain the active variable. |
| 172 | const bool unitPosLit = watchedByUnitClause(Lit: posLit(V: ActiveVar)); |
| 173 | const bool unitNegLit = watchedByUnitClause(Lit: negLit(V: ActiveVar)); |
| 174 | if (unitPosLit && unitNegLit) { |
| 175 | // We found a conflict! |
| 176 | |
| 177 | // Backtrack and rewind the `Level` until the most recent non-forced |
| 178 | // assignment. |
| 179 | reverseForcedMoves(); |
| 180 | |
| 181 | // If the root level is reached, then all possible assignments lead to |
| 182 | // a conflict. |
| 183 | if (Level == 0) |
| 184 | return std::make_pair(x: Solver::Result::Unsatisfiable(), y&: MaxIterations); |
| 185 | |
| 186 | // Otherwise, take the other branch at the most recent level where a |
| 187 | // decision was made. |
| 188 | LevelStates[Level] = State::Forced; |
| 189 | const Variable Var = LevelVars[Level]; |
| 190 | VarAssignments[Var] = VarAssignments[Var] == Assignment::AssignedTrue |
| 191 | ? Assignment::AssignedFalse |
| 192 | : Assignment::AssignedTrue; |
| 193 | |
| 194 | updateWatchedLiterals(); |
| 195 | } else if (unitPosLit || unitNegLit) { |
| 196 | // We found a unit clause! The value of its unassigned variable is |
| 197 | // forced. |
| 198 | ++Level; |
| 199 | |
| 200 | LevelVars[Level] = ActiveVar; |
| 201 | LevelStates[Level] = State::Forced; |
| 202 | VarAssignments[ActiveVar] = |
| 203 | unitPosLit ? Assignment::AssignedTrue : Assignment::AssignedFalse; |
| 204 | |
| 205 | // Remove the variable that was just assigned from the set of active |
| 206 | // variables. |
| 207 | if (I + 1 < ActiveVars.size()) { |
| 208 | // Replace the variable that was just assigned with the last active |
| 209 | // variable for efficient removal. |
| 210 | ActiveVars[I] = ActiveVars.back(); |
| 211 | } else { |
| 212 | // This was the last active variable. Repeat the process from the |
| 213 | // beginning. |
| 214 | I = 0; |
| 215 | } |
| 216 | ActiveVars.pop_back(); |
| 217 | |
| 218 | updateWatchedLiterals(); |
| 219 | } else if (I + 1 == ActiveVars.size()) { |
| 220 | // There are no remaining unit clauses in the formula! Make a decision |
| 221 | // for one of the active variables at the current level. |
| 222 | ++Level; |
| 223 | |
| 224 | LevelVars[Level] = ActiveVar; |
| 225 | LevelStates[Level] = State::Decision; |
| 226 | VarAssignments[ActiveVar] = decideAssignment(Var: ActiveVar); |
| 227 | |
| 228 | // Remove the variable that was just assigned from the set of active |
| 229 | // variables. |
| 230 | ActiveVars.pop_back(); |
| 231 | |
| 232 | updateWatchedLiterals(); |
| 233 | |
| 234 | // This was the last active variable. Repeat the process from the |
| 235 | // beginning. |
| 236 | I = 0; |
| 237 | } else { |
| 238 | ++I; |
| 239 | } |
| 240 | } |
| 241 | return std::make_pair(x: Solver::Result::Satisfiable(Solution: buildSolution()), |
| 242 | y&: MaxIterations); |
| 243 | } |
| 244 | |
| 245 | private: |
| 246 | /// Returns a satisfying truth assignment to the atoms in the boolean formula. |
| 247 | llvm::DenseMap<Atom, Solver::Result::Assignment> buildSolution() { |
| 248 | llvm::DenseMap<Atom, Solver::Result::Assignment> Solution; |
| 249 | for (auto &Atomic : Atomics) { |
| 250 | // A variable may have a definite true/false assignment, or it may be |
| 251 | // unassigned indicating its truth value does not affect the result of |
| 252 | // the formula. Unassigned variables are assigned to true as a default. |
| 253 | Solution[Atomic.second] = |
| 254 | VarAssignments[Atomic.first] == Assignment::AssignedFalse |
| 255 | ? Solver::Result::Assignment::AssignedFalse |
| 256 | : Solver::Result::Assignment::AssignedTrue; |
| 257 | } |
| 258 | return Solution; |
| 259 | } |
| 260 | |
| 261 | /// Reverses forced moves until the most recent level where a decision was |
| 262 | /// made on the assignment of a variable. |
| 263 | void reverseForcedMoves() { |
| 264 | for (; LevelStates[Level] == State::Forced; --Level) { |
| 265 | const Variable Var = LevelVars[Level]; |
| 266 | |
| 267 | VarAssignments[Var] = Assignment::Unassigned; |
| 268 | |
| 269 | // If the variable that we pass through is watched then we add it to the |
| 270 | // active variables. |
| 271 | if (isWatched(Lit: posLit(V: Var)) || isWatched(Lit: negLit(V: Var))) |
| 272 | ActiveVars.push_back(x: Var); |
| 273 | } |
| 274 | } |
| 275 | |
| 276 | /// Updates watched literals that are affected by a variable assignment. |
| 277 | void updateWatchedLiterals() { |
| 278 | const Variable Var = LevelVars[Level]; |
| 279 | |
| 280 | // Update the watched literals of clauses that currently watch the literal |
| 281 | // that falsifies `Var`. |
| 282 | const Literal FalseLit = VarAssignments[Var] == Assignment::AssignedTrue |
| 283 | ? negLit(V: Var) |
| 284 | : posLit(V: Var); |
| 285 | ClauseID FalseLitWatcher = WatchedHead[FalseLit]; |
| 286 | WatchedHead[FalseLit] = NullClause; |
| 287 | while (FalseLitWatcher != NullClause) { |
| 288 | const ClauseID NextFalseLitWatcher = NextWatched[FalseLitWatcher]; |
| 289 | |
| 290 | // Pick the first non-false literal as the new watched literal. |
| 291 | const CNFFormula::Iterator FalseLitWatcherStart = |
| 292 | CNF.startOfClause(C: FalseLitWatcher); |
| 293 | CNFFormula::Iterator NewWatchedLitIter = FalseLitWatcherStart.next(); |
| 294 | while (isCurrentlyFalse(Lit: *NewWatchedLitIter)) |
| 295 | ++NewWatchedLitIter; |
| 296 | const Literal NewWatchedLit = *NewWatchedLitIter; |
| 297 | const Variable NewWatchedLitVar = var(L: NewWatchedLit); |
| 298 | |
| 299 | // Swap the old watched literal for the new one in `FalseLitWatcher` to |
| 300 | // maintain the invariant that the watched literal is at the beginning of |
| 301 | // the clause. |
| 302 | *NewWatchedLitIter = FalseLit; |
| 303 | *FalseLitWatcherStart = NewWatchedLit; |
| 304 | |
| 305 | // If the new watched literal isn't watched by any other clause and its |
| 306 | // variable isn't assigned we need to add it to the active variables. |
| 307 | if (!isWatched(Lit: NewWatchedLit) && !isWatched(Lit: notLit(L: NewWatchedLit)) && |
| 308 | VarAssignments[NewWatchedLitVar] == Assignment::Unassigned) |
| 309 | ActiveVars.push_back(x: NewWatchedLitVar); |
| 310 | |
| 311 | NextWatched[FalseLitWatcher] = WatchedHead[NewWatchedLit]; |
| 312 | WatchedHead[NewWatchedLit] = FalseLitWatcher; |
| 313 | |
| 314 | // Go to the next clause that watches `FalseLit`. |
| 315 | FalseLitWatcher = NextFalseLitWatcher; |
| 316 | } |
| 317 | } |
| 318 | |
| 319 | /// Returns true if and only if one of the clauses that watch `Lit` is a unit |
| 320 | /// clause. |
| 321 | bool watchedByUnitClause(Literal Lit) const { |
| 322 | for (ClauseID LitWatcher = WatchedHead[Lit]; LitWatcher != NullClause; |
| 323 | LitWatcher = NextWatched[LitWatcher]) { |
| 324 | llvm::ArrayRef<Literal> Clause = CNF.clauseLiterals(C: LitWatcher); |
| 325 | |
| 326 | // Assert the invariant that the watched literal is always the first one |
| 327 | // in the clause. |
| 328 | // FIXME: Consider replacing this with a test case that fails if the |
| 329 | // invariant is broken by `updateWatchedLiterals`. That might not be easy |
| 330 | // due to the transformations performed by `buildCNF`. |
| 331 | assert(Clause.front() == Lit); |
| 332 | |
| 333 | if (isUnit(Clause)) |
| 334 | return true; |
| 335 | } |
| 336 | return false; |
| 337 | } |
| 338 | |
| 339 | /// Returns true if and only if `Clause` is a unit clause. |
| 340 | bool isUnit(llvm::ArrayRef<Literal> Clause) const { |
| 341 | return llvm::all_of(Range: Clause.drop_front(), |
| 342 | P: [this](Literal L) { return isCurrentlyFalse(Lit: L); }); |
| 343 | } |
| 344 | |
| 345 | /// Returns true if and only if `Lit` evaluates to `false` in the current |
| 346 | /// partial assignment. |
| 347 | bool isCurrentlyFalse(Literal Lit) const { |
| 348 | return static_cast<int8_t>(VarAssignments[var(L: Lit)]) == |
| 349 | static_cast<int8_t>(Lit & 1); |
| 350 | } |
| 351 | |
| 352 | /// Returns true if and only if `Lit` is watched by a clause in `Formula`. |
| 353 | bool isWatched(Literal Lit) const { return WatchedHead[Lit] != NullClause; } |
| 354 | |
| 355 | /// Returns an assignment for an unassigned variable. |
| 356 | Assignment decideAssignment(Variable Var) const { |
| 357 | return !isWatched(Lit: posLit(V: Var)) || isWatched(Lit: negLit(V: Var)) |
| 358 | ? Assignment::AssignedFalse |
| 359 | : Assignment::AssignedTrue; |
| 360 | } |
| 361 | |
| 362 | /// Returns a set of all watched literals. |
| 363 | llvm::DenseSet<Literal> watchedLiterals() const { |
| 364 | llvm::DenseSet<Literal> WatchedLiterals; |
| 365 | for (Literal Lit = 2; Lit < WatchedHead.size(); Lit++) { |
| 366 | if (WatchedHead[Lit] == NullClause) |
| 367 | continue; |
| 368 | WatchedLiterals.insert(V: Lit); |
| 369 | } |
| 370 | return WatchedLiterals; |
| 371 | } |
| 372 | |
| 373 | /// Returns true if and only if all active variables are unassigned. |
| 374 | bool activeVarsAreUnassigned() const { |
| 375 | return llvm::all_of(Range: ActiveVars, P: [this](Variable Var) { |
| 376 | return VarAssignments[Var] == Assignment::Unassigned; |
| 377 | }); |
| 378 | } |
| 379 | |
| 380 | /// Returns true if and only if all active variables form watched literals. |
| 381 | bool activeVarsFormWatchedLiterals() const { |
| 382 | const llvm::DenseSet<Literal> WatchedLiterals = watchedLiterals(); |
| 383 | return llvm::all_of(Range: ActiveVars, P: [&WatchedLiterals](Variable Var) { |
| 384 | return WatchedLiterals.contains(V: posLit(V: Var)) || |
| 385 | WatchedLiterals.contains(V: negLit(V: Var)); |
| 386 | }); |
| 387 | } |
| 388 | |
| 389 | /// Returns true if and only if all unassigned variables that are forming |
| 390 | /// watched literals are active. |
| 391 | bool unassignedVarsFormingWatchedLiteralsAreActive() const { |
| 392 | const llvm::DenseSet<Variable> (ActiveVars.begin(), |
| 393 | ActiveVars.end()); |
| 394 | for (Literal Lit : watchedLiterals()) { |
| 395 | const Variable Var = var(L: Lit); |
| 396 | if (VarAssignments[Var] != Assignment::Unassigned) |
| 397 | continue; |
| 398 | if (ActiveVarsSet.contains(V: Var)) |
| 399 | continue; |
| 400 | return false; |
| 401 | } |
| 402 | return true; |
| 403 | } |
| 404 | }; |
| 405 | |
| 406 | } // namespace |
| 407 | |
| 408 | Solver::Result |
| 409 | WatchedLiteralsSolver::solve(llvm::ArrayRef<const Formula *> Vals) { |
| 410 | if (Vals.empty()) |
| 411 | return Solver::Result::Satisfiable(Solution: {{}}); |
| 412 | auto [Res, Iterations] = WatchedLiteralsSolverImpl(Vals).solve(MaxIterations); |
| 413 | MaxIterations = Iterations; |
| 414 | return Res; |
| 415 | } |
| 416 | |
| 417 | } // namespace dataflow |
| 418 | } // namespace clang |
| 419 | |