[PATCH] D154948: [dataflow] improve determinism of generated SAT system
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG7d935d083659: [dataflow] improve determinism of generated SAT system (authored by sammccall). Changed prior to commit: https://reviews.llvm.org/D154948?vs=539012=539393#toc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D154948/new/ https://reviews.llvm.org/D154948 Files: clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp clang/unittests/Analysis/FlowSensitive/CMakeLists.txt clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp Index: clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp === --- /dev/null +++ clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp @@ -0,0 +1,137 @@ +//===- unittests/Analysis/FlowSensitive/DeterminismTest.cpp ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "TestingSupport.h" +#include "clang/AST/Decl.h" +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Analysis/FlowSensitive/NoopAnalysis.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" +#include "clang/Basic/LLVM.h" +#include "clang/Testing/TestAST.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" +#include +#include + +namespace clang::dataflow { + +// Run a no-op analysis, and return a textual representation of the +// flow-condition at function exit. +std::string analyzeAndPrintExitCondition(llvm::StringRef Code) { + DataflowAnalysisContext DACtx(std::make_unique()); + clang::TestAST AST(Code); + const auto *Target = + cast(test::findValueDecl(AST.context(), "target")); + Environment InitEnv(DACtx, *Target); + auto CFCtx = cantFail(ControlFlowContext::build(*Target)); + + NoopAnalysis Analysis(AST.context(), DataflowAnalysisOptions{}); + + auto Result = runDataflowAnalysis(CFCtx, Analysis, InitEnv); + EXPECT_FALSE(!Result) << Result.takeError(); + + Atom FinalFC = (*Result)[CFCtx.getCFG().getExit().getBlockID()] + ->Env.getFlowConditionToken(); + std::string Textual; + llvm::raw_string_ostream OS(Textual); + DACtx.dumpFlowCondition(FinalFC, OS); + return Textual; +} + +TEST(DeterminismTest, NestedSwitch) { + // Example extracted from real-world code that had wildly nondeterministic + // analysis times. + // Its flow condition depends on the order we join predecessor blocks. + const char *Code = R"cpp( +struct Tree; +struct Rep { + Tree *tree(); + int length; +}; +struct Tree { + int height(); + Rep *edge(int); + int length; +}; +struct RetVal {}; +int getInt(); +bool maybe(); + +RetVal make(int size); +inline RetVal target(int size, Tree& self) { + Tree* tree = + const int height = self.height(); + Tree* n1 = tree; + Tree* n2 = tree; + switch (height) { +case 3: + tree = tree->edge(0)->tree(); + if (maybe()) return {}; + n2 = tree; +case 2: + tree = tree->edge(0)->tree(); + n1 = tree; + if (maybe()) return {}; +case 1: + tree = tree->edge(0)->tree(); + if (maybe()) return {}; +case 0: + Rep* edge = tree->edge(0); + if (maybe()) return {}; + int avail = getInt(); + if (avail == 0) return {}; + int delta = getInt(); + RetVal span = {}; + edge->length += delta; + switch (height) { +case 3: + n1->length += delta; +case 2: + n1->length += delta; +case 1: + n1->length += delta; +case 0: + n1->length += delta; + return span; + } + break; + } + return make(size); +} + )cpp"; + + std::string Cond = analyzeAndPrintExitCondition(Code); + for (unsigned I = 0; I < 10; ++I) +EXPECT_EQ(Cond, analyzeAndPrintExitCondition(Code)); +} + +TEST(DeterminismTest, ValueMergeOrder) { + // Artificial example whose final flow condition variable numbering depends + // on the order in which we merge
[PATCH] D154948: [dataflow] improve determinism of generated SAT system
ymandel accepted this revision. ymandel added a comment. Thank you, Sam! Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D154948/new/ https://reviews.llvm.org/D154948 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D154948: [dataflow] improve determinism of generated SAT system
gribozavr2 accepted this revision. gribozavr2 added inline comments. This revision is now accepted and ready to land. Comment at: clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp:62 +}; +struct Tree{ + int height(); Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D154948/new/ https://reviews.llvm.org/D154948 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D154948: [dataflow] improve determinism of generated SAT system
sammccall created this revision. sammccall added reviewers: ymandel, xazax.hun. Herald added subscribers: ChuanqiXu, martong, mgrang. Herald added a reviewer: NoQ. Herald added a project: All. sammccall requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits. Fixes two places where we relied on map iteration order when processing values, which leaked nondeterminism into the generated SAT formulas. Adds a couple of tests that directly assert that the SAT system is equivalent on each run. It's desirable that the formulas are deterministic based on the input: - our SAT solver is naive and perfermance is sensitive to even simple semantics-preserving transformations like A|B to B|A. (e.g. it's likely to choose a different variable to split on). Timeout failures are bad, but *flaky* ones are terrible to debug. - similarly when debugging, it's important to have a consistent understanding of what e.g. "V23" means across runs. --- Both changes in this patch were isolated from a nullability analysis of real-world code which was extremely slow, spending ages in the SAT solver at "random" points that varied on each run. I've included a reduced version of the code as a regression test. One of the changes shows up directly as flow-condition nondeterminism with a no-op analysis, the other relied on bits of the nullability analysis but I found a synthetic example to show the problem. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D154948 Files: clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp clang/unittests/Analysis/FlowSensitive/CMakeLists.txt clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp Index: clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp === --- /dev/null +++ clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp @@ -0,0 +1,137 @@ +//===- unittests/Analysis/FlowSensitive/DeterminismTest.cpp ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#include "TestingSupport.h" +#include "clang/AST/Decl.h" +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/Formula.h" +#include "clang/Analysis/FlowSensitive/NoopAnalysis.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" +#include "clang/Basic/LLVM.h" +#include "clang/Testing/TestAST.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" +#include +#include + +namespace clang::dataflow { + +// Run a no-op analysis, and return a textual representation of the +// flow-condition at function exit. +std::string analyzeAndPrintExitCondition(llvm::StringRef Code) { + DataflowAnalysisContext DACtx(std::make_unique()); + clang::TestAST AST(Code); + const auto *Target = + cast(test::findValueDecl(AST.context(), "target")); + Environment InitEnv(DACtx, *Target); + auto CFCtx = cantFail(ControlFlowContext::build(*Target)); + + NoopAnalysis Analysis(AST.context(), DataflowAnalysisOptions{}); + + auto Result = runDataflowAnalysis(CFCtx, Analysis, InitEnv); + EXPECT_FALSE(!Result) << Result.takeError(); + + Atom FinalFC = (*Result)[CFCtx.getCFG().getExit().getBlockID()] + ->Env.getFlowConditionToken(); + std::string Textual; + llvm::raw_string_ostream OS(Textual); + DACtx.dumpFlowCondition(FinalFC, OS); + return Textual; +} + +TEST(DeterminismTest, NestedSwitch) { + // Example extracted from real-world code that had wildly nondeterministic + // analysis times. + // Its flow condition depends on the order we join predecessor blocks. + const char *Code = R"cpp( +struct Tree; +struct Rep { + Tree *tree(); + int length; +}; +struct Tree{ + int height(); + Rep *edge(int); + int length; +}; +struct RetVal {}; +int getInt(); +bool maybe(); + +RetVal make(int size); +inline RetVal target(int size, Tree& self) { + Tree* tree = + const int height = self.height(); + Tree* n1 = tree; + Tree* n2 = tree; + switch (height) { +case 3: + tree = tree->edge(0)->tree(); + if (maybe()) return {}; + n2 = tree; +case 2: + tree = tree->edge(0)->tree(); + n1 = tree; + if