[PATCH] D154948: [dataflow] improve determinism of generated SAT system

2023-07-12 Thread Sam McCall via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG7d935d083659: [dataflow] improve determinism of generated 
SAT system (authored by sammccall).

Changed prior to commit:
  https://reviews.llvm.org/D154948?vs=539012=539393#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154948/new/

https://reviews.llvm.org/D154948

Files:
  clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
  clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
  clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
  clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
  clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp

Index: clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp
===
--- /dev/null
+++ clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp
@@ -0,0 +1,137 @@
+//===- unittests/Analysis/FlowSensitive/DeterminismTest.cpp ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "TestingSupport.h"
+#include "clang/AST/Decl.h"
+#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
+#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/Formula.h"
+#include "clang/Analysis/FlowSensitive/NoopAnalysis.h"
+#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Testing/TestAST.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include 
+#include 
+
+namespace clang::dataflow {
+
+// Run a no-op analysis, and return a textual representation of the
+// flow-condition at function exit.
+std::string analyzeAndPrintExitCondition(llvm::StringRef Code) {
+  DataflowAnalysisContext DACtx(std::make_unique());
+  clang::TestAST AST(Code);
+  const auto *Target =
+  cast(test::findValueDecl(AST.context(), "target"));
+  Environment InitEnv(DACtx, *Target);
+  auto CFCtx = cantFail(ControlFlowContext::build(*Target));
+
+  NoopAnalysis Analysis(AST.context(), DataflowAnalysisOptions{});
+
+  auto Result = runDataflowAnalysis(CFCtx, Analysis, InitEnv);
+  EXPECT_FALSE(!Result) << Result.takeError();
+
+  Atom FinalFC = (*Result)[CFCtx.getCFG().getExit().getBlockID()]
+ ->Env.getFlowConditionToken();
+  std::string Textual;
+  llvm::raw_string_ostream OS(Textual);
+  DACtx.dumpFlowCondition(FinalFC, OS);
+  return Textual;
+}
+
+TEST(DeterminismTest, NestedSwitch) {
+  // Example extracted from real-world code that had wildly nondeterministic
+  // analysis times.
+  // Its flow condition depends on the order we join predecessor blocks.
+  const char *Code = R"cpp(
+struct Tree;
+struct Rep {
+  Tree *tree();
+  int length;
+};
+struct Tree {
+  int height();
+  Rep *edge(int);
+  int length;
+};
+struct RetVal {};
+int getInt();
+bool maybe();
+
+RetVal make(int size);
+inline RetVal target(int size, Tree& self) {
+  Tree* tree = 
+  const int height = self.height();
+  Tree* n1 = tree;
+  Tree* n2 = tree;
+  switch (height) {
+case 3:
+  tree = tree->edge(0)->tree();
+  if (maybe()) return {};
+  n2 = tree;
+case 2:
+  tree = tree->edge(0)->tree();
+  n1 = tree;
+  if (maybe()) return {};
+case 1:
+  tree = tree->edge(0)->tree();
+  if (maybe()) return {};
+case 0:
+  Rep* edge = tree->edge(0);
+  if (maybe()) return {};
+  int avail = getInt();
+  if (avail == 0) return {};
+  int delta = getInt();
+  RetVal span = {};
+  edge->length += delta;
+  switch (height) {
+case 3:
+  n1->length += delta;
+case 2:
+  n1->length += delta;
+case 1:
+  n1->length += delta;
+case 0:
+  n1->length += delta;
+  return span;
+  }
+  break;
+  }
+  return make(size);
+}
+  )cpp";
+
+  std::string Cond = analyzeAndPrintExitCondition(Code);
+  for (unsigned I = 0; I < 10; ++I)
+EXPECT_EQ(Cond, analyzeAndPrintExitCondition(Code));
+}
+
+TEST(DeterminismTest, ValueMergeOrder) {
+  // Artificial example whose final flow condition variable numbering depends
+  // on the order in which we merge 

[PATCH] D154948: [dataflow] improve determinism of generated SAT system

2023-07-11 Thread Yitzhak Mandelbaum via Phabricator via cfe-commits
ymandel accepted this revision.
ymandel added a comment.

Thank you, Sam!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154948/new/

https://reviews.llvm.org/D154948

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154948: [dataflow] improve determinism of generated SAT system

2023-07-11 Thread Dmitri Gribenko via Phabricator via cfe-commits
gribozavr2 accepted this revision.
gribozavr2 added inline comments.
This revision is now accepted and ready to land.



Comment at: clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp:62
+};
+struct Tree{
+  int height();




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154948/new/

https://reviews.llvm.org/D154948

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154948: [dataflow] improve determinism of generated SAT system

2023-07-11 Thread Sam McCall via Phabricator via cfe-commits
sammccall created this revision.
sammccall added reviewers: ymandel, xazax.hun.
Herald added subscribers: ChuanqiXu, martong, mgrang.
Herald added a reviewer: NoQ.
Herald added a project: All.
sammccall requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Fixes two places where we relied on map iteration order when processing
values, which leaked nondeterminism into the generated SAT formulas.
Adds a couple of tests that directly assert that the SAT system is
equivalent on each run.

It's desirable that the formulas are deterministic based on the input:

- our SAT solver is naive and perfermance is sensitive to even simple 
semantics-preserving transformations like A|B to B|A. (e.g. it's likely to 
choose a different variable to split on). Timeout failures are bad, but *flaky* 
ones are terrible to debug.
- similarly when debugging, it's important to have a consistent understanding 
of what e.g. "V23" means across runs.

---

Both changes in this patch were isolated from a nullability analysis of
real-world code which was extremely slow, spending ages in the SAT
solver at "random" points that varied on each run.
I've included a reduced version of the code as a regression test.

One of the changes shows up directly as flow-condition nondeterminism
with a no-op analysis, the other relied on bits of the nullability
analysis but I found a synthetic example to show the problem.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D154948

Files:
  clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
  clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
  clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
  clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
  clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp

Index: clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp
===
--- /dev/null
+++ clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp
@@ -0,0 +1,137 @@
+//===- unittests/Analysis/FlowSensitive/DeterminismTest.cpp ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "TestingSupport.h"
+#include "clang/AST/Decl.h"
+#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
+#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/Formula.h"
+#include "clang/Analysis/FlowSensitive/NoopAnalysis.h"
+#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Testing/TestAST.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include 
+#include 
+
+namespace clang::dataflow {
+
+// Run a no-op analysis, and return a textual representation of the
+// flow-condition at function exit.
+std::string analyzeAndPrintExitCondition(llvm::StringRef Code) {
+  DataflowAnalysisContext DACtx(std::make_unique());
+  clang::TestAST AST(Code);
+  const auto *Target =
+  cast(test::findValueDecl(AST.context(), "target"));
+  Environment InitEnv(DACtx, *Target);
+  auto CFCtx = cantFail(ControlFlowContext::build(*Target));
+
+  NoopAnalysis Analysis(AST.context(), DataflowAnalysisOptions{});
+
+  auto Result = runDataflowAnalysis(CFCtx, Analysis, InitEnv);
+  EXPECT_FALSE(!Result) << Result.takeError();
+
+  Atom FinalFC = (*Result)[CFCtx.getCFG().getExit().getBlockID()]
+ ->Env.getFlowConditionToken();
+  std::string Textual;
+  llvm::raw_string_ostream OS(Textual);
+  DACtx.dumpFlowCondition(FinalFC, OS);
+  return Textual;
+}
+
+TEST(DeterminismTest, NestedSwitch) {
+  // Example extracted from real-world code that had wildly nondeterministic
+  // analysis times.
+  // Its flow condition depends on the order we join predecessor blocks.
+  const char *Code = R"cpp(
+struct Tree;
+struct Rep {
+  Tree *tree();
+  int length;
+};
+struct Tree{
+  int height();
+  Rep *edge(int);
+  int length;
+};
+struct RetVal {};
+int getInt();
+bool maybe();
+
+RetVal make(int size);
+inline RetVal target(int size, Tree& self) {
+  Tree* tree = 
+  const int height = self.height();
+  Tree* n1 = tree;
+  Tree* n2 = tree;
+  switch (height) {
+case 3:
+  tree = tree->edge(0)->tree();
+  if (maybe()) return {};
+  n2 = tree;
+case 2:
+  tree = tree->edge(0)->tree();
+  n1 = tree;
+  if