[PATCH] D158407: [clang][dataflow] #llvm #flow-analysis Simplify formula at CNF construction time, and short-cut solving of known contradictory formulas.
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rGb50c87d1e63f: [clang][dataflow] #llvm #flow-analysis Simplify formula at CNF construction… (authored by burakemir, committed by mboehme). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D158407/new/ https://reviews.llvm.org/D158407 Files: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp clang/unittests/Analysis/FlowSensitive/SolverTest.cpp Index: clang/unittests/Analysis/FlowSensitive/SolverTest.cpp === --- clang/unittests/Analysis/FlowSensitive/SolverTest.cpp +++ clang/unittests/Analysis/FlowSensitive/SolverTest.cpp @@ -369,4 +369,32 @@ EXPECT_TRUE(solver.reachedLimit()); } +TEST(SolverTest, SimpleButLargeContradiction) { + // This test ensures that the solver takes a short-cut on known + // contradictory inputs, without using max_iterations. At the time + // this test is added, formulas that are easily recognized to be + // contradictory at CNF construction time would lead to timeout. + WatchedLiteralsSolver solver(10); + ConstraintContext Ctx; + auto first = Ctx.atom(); + auto last = first; + for (int i = 1; i < 1; ++i) { +last = Ctx.conj(last, Ctx.atom()); + } + last = Ctx.conj(Ctx.neg(first), last); + ASSERT_EQ(solver.solve({last}).getStatus(), +Solver::Result::Status::Unsatisfiable); + EXPECT_FALSE(solver.reachedLimit()); + + first = Ctx.atom(); + last = Ctx.neg(first); + for (int i = 1; i < 1; ++i) { +last = Ctx.conj(last, Ctx.neg(Ctx.atom())); + } + last = Ctx.conj(first, last); + ASSERT_EQ(solver.solve({last}).getStatus(), +Solver::Result::Status::Unsatisfiable); + EXPECT_FALSE(solver.reachedLimit()); +} + } // namespace Index: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp === --- clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp +++ clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp @@ -12,8 +12,8 @@ //===--===// #include +#include #include -#include #include #include @@ -23,8 +23,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" + namespace clang { namespace dataflow { @@ -62,6 +64,10 @@ /// Returns the positive literal `V`. static constexpr Literal posLit(Variable V) { return 2 * V; } +static constexpr bool isPosLit(Literal L) { return 0 == (L & 1); } + +static constexpr bool isNegLit(Literal L) { return 1 == (L & 1); } + /// Returns the negative literal `!V`. static constexpr Literal negLit(Variable V) { return 2 * V + 1; } @@ -125,9 +131,14 @@ /// formula. llvm::DenseMap Atomics; + /// Indicates that we already know the formula is unsatisfiable. + /// During construction, we catch simple cases of conflicting unit-clauses. + bool KnownContradictory; + explicit CNFFormula(Variable LargestVar, llvm::DenseMap Atomics) - : LargestVar(LargestVar), Atomics(std::move(Atomics)) { + : LargestVar(LargestVar), Atomics(std::move(Atomics)), +KnownContradictory(false) { Clauses.push_back(0); ClauseStarts.push_back(0); NextWatched.push_back(0); @@ -135,33 +146,24 @@ WatchedHead.resize(NumLiterals + 1, 0); } - /// Adds the `L1 v L2 v L3` clause to the formula. If `L2` or `L3` are - /// `NullLit` they are respectively omitted from the clause. - /// + /// Adds the `L1 v ... v Ln` clause to the formula. /// Requirements: /// - /// `L1` must not be `NullLit`. + /// `Li` must not be `NullLit`. /// /// All literals in the input that are not `NullLit` must be distinct. - void addClause(Literal L1, Literal L2 = NullLit, Literal L3 = NullLit) { -// The literals are guaranteed to be distinct from properties of Formula -// and the construction in `buildCNF`. -assert(L1 != NullLit && L1 != L2 && L1 != L3 && - (L2 != L3 || L2 == NullLit)); + void addClause(ArrayRef lits) { +assert(!lits.empty()); +assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; })); const ClauseID C = ClauseStarts.size(); const size_t S = Clauses.size(); ClauseStarts.push_back(S); - -Clauses.push_back(L1); -if (L2 != NullLit) - Clauses.push_back(L2); -if (L3 != NullLit) - Clauses.push_back(L3); +Clauses.insert(Clauses.end(), lits.begin(), lits.end()); // Designate the first literal as the "watched" literal of the clause. -NextWatched.push_back(WatchedHead[L1]); -WatchedHead[L1] = C; +NextWatched.push_back(WatchedHead[lits.front()]); +WatchedHead[lits.front()] = C; } /// Returns the number of
[PATCH] D158407: [clang][dataflow] #llvm #flow-analysis Simplify formula at CNF construction time, and short-cut solving of known contradictory formulas.
burakemir added inline comments. Comment at: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp:184 +/// may in turn yield more unit clauses or even a contradiction. +/// The complexity of this preprocessing is O(log(K)) where K is the number +/// of unit clauses. Assuming K << N, it is negligible. This method sammccall wrote: > I'm confused about this comment: the preprocessing looks like O(N): we > process every clause once, and addClauseLiterals() runs in constant time. > What am I missing? > I am sorry for the misleading text: I only talked about addClause complexity. And I estimated the lookup at a conservative O(log(K)) worst case complexity. And this is just completely wrong, since we can expect lookup to be on a hash-table. It would be O(1) average and worst case O(K). I guess my mind was wandering off thinking about improving worst case lookup complexity. Changed to O(N). Comment at: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp:202 + /// All literals in the input that are not `NullLit` must be distinct. + void addClause(Literal L1, Literal L2 = NullLit, Literal L3 = NullLit) { +// The literals are guaranteed to be distinct from properties of BoolValue sammccall wrote: > if you're going to form an ArrayRef in any case, might as well skip this > indirection and have the callsites pass `addClause({L1, L2})` or so? Thanks, this looks much nicer. I chose to preserve the assert condition to check that there are <= 3 literals. I believe the solver way work with clauses that have more literals but I don't know whether any trade-offs were made to focus on 3SAT in the solver. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D158407/new/ https://reviews.llvm.org/D158407 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D158407: [clang][dataflow] #llvm #flow-analysis Simplify formula at CNF construction time, and short-cut solving of known contradictory formulas.
burakemir updated this revision to Diff 555363. burakemir marked 4 inline comments as done. burakemir added a comment. Applied reviewer comments. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D158407/new/ https://reviews.llvm.org/D158407 Files: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp clang/unittests/Analysis/FlowSensitive/SolverTest.cpp Index: clang/unittests/Analysis/FlowSensitive/SolverTest.cpp === --- clang/unittests/Analysis/FlowSensitive/SolverTest.cpp +++ clang/unittests/Analysis/FlowSensitive/SolverTest.cpp @@ -369,4 +369,32 @@ EXPECT_TRUE(solver.reachedLimit()); } +TEST(SolverTest, SimpleButLargeContradiction) { + // This test ensures that the solver takes a short-cut on known + // contradictory inputs, without using max_iterations. At the time + // this test is added, formulas that are easily recognized to be + // contradictory at CNF construction time would lead to timeout. + WatchedLiteralsSolver solver(10); + ConstraintContext Ctx; + auto first = Ctx.atom(); + auto last = first; + for (int i = 1; i < 1; ++i) { +last = Ctx.conj(last, Ctx.atom()); + } + last = Ctx.conj(Ctx.neg(first), last); + ASSERT_EQ(solver.solve({last}).getStatus(), +Solver::Result::Status::Unsatisfiable); + EXPECT_FALSE(solver.reachedLimit()); + + first = Ctx.atom(); + last = Ctx.neg(first); + for (int i = 1; i < 1; ++i) { +last = Ctx.conj(last, Ctx.neg(Ctx.atom())); + } + last = Ctx.conj(first, last); + ASSERT_EQ(solver.solve({last}).getStatus(), +Solver::Result::Status::Unsatisfiable); + EXPECT_FALSE(solver.reachedLimit()); +} + } // namespace Index: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp === --- clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp +++ clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp @@ -12,8 +12,8 @@ //===--===// #include +#include #include -#include #include #include @@ -23,8 +23,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" + namespace clang { namespace dataflow { @@ -62,6 +64,10 @@ /// Returns the positive literal `V`. static constexpr Literal posLit(Variable V) { return 2 * V; } +static constexpr bool isPosLit(Literal L) { return 0 == (L & 1); } + +static constexpr bool isNegLit(Literal L) { return 1 == (L & 1); } + /// Returns the negative literal `!V`. static constexpr Literal negLit(Variable V) { return 2 * V + 1; } @@ -125,9 +131,14 @@ /// formula. llvm::DenseMap Atomics; + /// Indicates that we already know the formula is unsatisfiable. + /// During construction, we catch simple cases of conflicting unit-clauses. + bool KnownContradictory; + explicit CNFFormula(Variable LargestVar, llvm::DenseMap Atomics) - : LargestVar(LargestVar), Atomics(std::move(Atomics)) { + : LargestVar(LargestVar), Atomics(std::move(Atomics)), +KnownContradictory(false) { Clauses.push_back(0); ClauseStarts.push_back(0); NextWatched.push_back(0); @@ -135,33 +146,24 @@ WatchedHead.resize(NumLiterals + 1, 0); } - /// Adds the `L1 v L2 v L3` clause to the formula. If `L2` or `L3` are - /// `NullLit` they are respectively omitted from the clause. - /// + /// Adds the `L1 v ... v Ln` clause to the formula. /// Requirements: /// - /// `L1` must not be `NullLit`. + /// `Li` must not be `NullLit`. /// /// All literals in the input that are not `NullLit` must be distinct. - void addClause(Literal L1, Literal L2 = NullLit, Literal L3 = NullLit) { -// The literals are guaranteed to be distinct from properties of Formula -// and the construction in `buildCNF`. -assert(L1 != NullLit && L1 != L2 && L1 != L3 && - (L2 != L3 || L2 == NullLit)); + void addClause(ArrayRef lits) { +assert(!lits.empty()); +assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; })); const ClauseID C = ClauseStarts.size(); const size_t S = Clauses.size(); ClauseStarts.push_back(S); - -Clauses.push_back(L1); -if (L2 != NullLit) - Clauses.push_back(L2); -if (L3 != NullLit) - Clauses.push_back(L3); +Clauses.insert(Clauses.end(), lits.begin(), lits.end()); // Designate the first literal as the "watched" literal of the clause. -NextWatched.push_back(WatchedHead[L1]); -WatchedHead[L1] = C; +NextWatched.push_back(WatchedHead[lits.front()]); +WatchedHead[lits.front()] = C; } /// Returns the number of literals in clause `C`. @@ -176,6 +178,84 @@ } }; +/// Applies simplifications while building up a BooleanFormula. +/// We keep track of
[PATCH] D158407: [clang][dataflow] #llvm #flow-analysis Simplify formula at CNF construction time, and short-cut solving of known contradictory formulas.
sammccall accepted this revision. sammccall added inline comments. This revision is now accepted and ready to land. Comment at: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp:153 /// All literals in the input that are not `NullLit` must be distinct. - void addClause(Literal L1, Literal L2 = NullLit, Literal L3 = NullLit) { -// The literals are guaranteed to be distinct from properties of Formula -// and the construction in `buildCNF`. -assert(L1 != NullLit && L1 != L2 && L1 != L3 && - (L2 != L3 || L2 == NullLit)); + void addClause(ArrayRef lits) { +assert(!lits.empty()); lits => Lits Comment at: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp:184 +/// may in turn yield more unit clauses or even a contradiction. +/// The complexity of this preprocessing is O(log(K)) where K is the number +/// of unit clauses. Assuming K << N, it is negligible. This method I'm confused about this comment: the preprocessing looks like O(N): we process every clause once, and addClauseLiterals() runs in constant time. What am I missing? Comment at: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp:202 + /// All literals in the input that are not `NullLit` must be distinct. + void addClause(Literal L1, Literal L2 = NullLit, Literal L3 = NullLit) { +// The literals are guaranteed to be distinct from properties of BoolValue if you're going to form an ArrayRef in any case, might as well skip this indirection and have the callsites pass `addClause({L1, L2})` or so? Comment at: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp:221 +llvm::SmallVector Simplified; +for (auto literal : Literals) { + auto v = var(literal); literal => L or so Comment at: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp:259 + /// In this case then the formula is already known to be unsatisfiable. + bool IsKnownContradictory() { return Formula.KnownContradictory; } + IsKnownContradictory => isKnownContradictory Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D158407/new/ https://reviews.llvm.org/D158407 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D158407: [clang][dataflow] #llvm #flow-analysis Simplify formula at CNF construction time, and short-cut solving of known contradictory formulas.
burakemir added a comment. Thanks for the review. PTAL. Comment at: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp:426 + // considered for the simplification of earlier clauses. Do a final + // pass to find more opportunities for simplification. + CNFFormula FinalCNF(NextVar - 1, std::move(CNF.Atomics)); sammccall wrote: > the issue is that info only propagates forward (earlier to later clauses, > right?) > > so by running this again, and sorting units first, we allow simplifications > that propagate info backwards *once*, but we still don't have all > simplifications. > > ``` > D > Av!B > Bv!C > Cv!D > > // first simplification pass > > Av!B > Bv!C > C // hoist new unit > > // second simplification pass > > Av!B > B // hoist new unit > > // third simplification pass > A > ``` > > I think this is worth being explicit about: we're going to find some more > simplifications, but we won't find them all, because running this to fixed > point is too inefficient. > > Is 2 experimentally determined to be the right number of passes? a guess? or > am I misunderstanding :-) You are right that one could do more work but it is better to leave this to the solver algorithm. We know empirically that there will be a few unit clauses, so might as well spend *linear time* (in number of unit clauses) to save some work. This won't be enough to determine whether all formulas are satisfiable, but it catches a few obvious contradictions. Doing this twice (as opposed to once) catches more formulas that are obvious contradictions in our unit tests and some real sources. I picked two simply because when we obtain unit clauses "later", we had no opportunity to apply them to earlier clauses. Doing full-blow mutations seems more complicated, esp. given that the Clauses data structure has been written for the actual solver algorithm. I think your concern on optimizing for a certain pattern of input formulas, which may well change in the future, is valid; therefore one should leave the "real" solving work to the solver algorithm, which systematically explores all cases. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D158407/new/ https://reviews.llvm.org/D158407 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D158407: [clang][dataflow] #llvm #flow-analysis Simplify formula at CNF construction time, and short-cut solving of known contradictory formulas.
burakemir updated this revision to Diff 554040. burakemir marked 2 inline comments as done. burakemir added a comment. Addressing reviewer comments. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D158407/new/ https://reviews.llvm.org/D158407 Files: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp clang/unittests/Analysis/FlowSensitive/SolverTest.cpp Index: clang/unittests/Analysis/FlowSensitive/SolverTest.cpp === --- clang/unittests/Analysis/FlowSensitive/SolverTest.cpp +++ clang/unittests/Analysis/FlowSensitive/SolverTest.cpp @@ -369,4 +369,32 @@ EXPECT_TRUE(solver.reachedLimit()); } +TEST(SolverTest, SimpleButLargeContradiction) { + // This test ensures that the solver takes a short-cut on known + // contradictory inputs, without using max_iterations. At the time + // this test is added, formulas that are easily recognized to be + // contradictory at CNF construction time would lead to timeout. + WatchedLiteralsSolver solver(10); + ConstraintContext Ctx; + auto first = Ctx.atom(); + auto last = first; + for (int i = 1; i < 1; ++i) { +last = Ctx.conj(last, Ctx.atom()); + } + last = Ctx.conj(Ctx.neg(first), last); + ASSERT_EQ(solver.solve({last}).getStatus(), +Solver::Result::Status::Unsatisfiable); + EXPECT_FALSE(solver.reachedLimit()); + + first = Ctx.atom(); + last = Ctx.neg(first); + for (int i = 1; i < 1; ++i) { +last = Ctx.conj(last, Ctx.neg(Ctx.atom())); + } + last = Ctx.conj(first, last); + ASSERT_EQ(solver.solve({last}).getStatus(), +Solver::Result::Status::Unsatisfiable); + EXPECT_FALSE(solver.reachedLimit()); +} + } // namespace Index: clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp === --- clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp +++ clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp @@ -12,8 +12,8 @@ //===--===// #include +#include #include -#include #include #include @@ -62,6 +62,10 @@ /// Returns the positive literal `V`. static constexpr Literal posLit(Variable V) { return 2 * V; } +static constexpr bool isPosLit(Literal L) { return 0 == (L & 1); } + +static constexpr bool isNegLit(Literal L) { return 1 == (L & 1); } + /// Returns the negative literal `!V`. static constexpr Literal negLit(Variable V) { return 2 * V + 1; } @@ -125,9 +129,14 @@ /// formula. llvm::DenseMap Atomics; + /// Indicates that we already know the formula is unsatisfiable. + /// During construction, we catch simple cases of conflicting unit-clauses. + bool KnownContradictory; + explicit CNFFormula(Variable LargestVar, llvm::DenseMap Atomics) - : LargestVar(LargestVar), Atomics(std::move(Atomics)) { + : LargestVar(LargestVar), Atomics(std::move(Atomics)), +KnownContradictory(false) { Clauses.push_back(0); ClauseStarts.push_back(0); NextWatched.push_back(0); @@ -135,33 +144,24 @@ WatchedHead.resize(NumLiterals + 1, 0); } - /// Adds the `L1 v L2 v L3` clause to the formula. If `L2` or `L3` are - /// `NullLit` they are respectively omitted from the clause. - /// + /// Adds the `L1 v ... v Ln` clause to the formula. /// Requirements: /// - /// `L1` must not be `NullLit`. + /// `Li` must not be `NullLit`. /// /// All literals in the input that are not `NullLit` must be distinct. - void addClause(Literal L1, Literal L2 = NullLit, Literal L3 = NullLit) { -// The literals are guaranteed to be distinct from properties of Formula -// and the construction in `buildCNF`. -assert(L1 != NullLit && L1 != L2 && L1 != L3 && - (L2 != L3 || L2 == NullLit)); + void addClause(ArrayRef lits) { +assert(!lits.empty()); +assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; })); const ClauseID C = ClauseStarts.size(); const size_t S = Clauses.size(); ClauseStarts.push_back(S); - -Clauses.push_back(L1); -if (L2 != NullLit) - Clauses.push_back(L2); -if (L3 != NullLit) - Clauses.push_back(L3); +Clauses.insert(Clauses.end(), lits.begin(), lits.end()); // Designate the first literal as the "watched" literal of the clause. -NextWatched.push_back(WatchedHead[L1]); -WatchedHead[L1] = C; +NextWatched.push_back(WatchedHead[lits.front()]); +WatchedHead[lits.front()] = C; } /// Returns the number of literals in clause `C`. @@ -176,6 +176,94 @@ } }; +/// Applies simplifications while building up a BooleanFormula. +/// We keep track of unit clauses, which tell us variables that must be +/// true/false in any model that satisfies the overall formula. +/// Such variables can be dropped from subsequently-added clauses, which +/// may in turn yield more unit