This revision was automatically updated to reflect the committed changes.
Closed by commit rG806329da0700: [analyzer][solver] Iterate to a fixpoint 
during symbol simplification with… (authored by martong).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106823/new/

https://reviews.llvm.org/D106823

Files:
  
clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
  clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
  clang/test/Analysis/expr-inspection-printState-eq-classes.c
  clang/test/Analysis/symbol-simplification-disequality-info.cpp
  
clang/test/Analysis/symbol-simplification-fixpoint-iteration-unreachable-code.cpp
  clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp
  clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp
  clang/test/Analysis/symbol-simplification-reassume.cpp

Index: clang/test/Analysis/symbol-simplification-reassume.cpp
===================================================================
--- /dev/null
+++ clang/test/Analysis/symbol-simplification-reassume.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_analyze_cc1 %s \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   -analyzer-config eagerly-assume=false \
+// RUN:   -verify
+
+// In this test we check whether the solver's symbol simplification mechanism
+// is capable of re-assuming simiplified symbols.
+
+void clang_analyzer_eval(bool);
+void clang_analyzer_warnIfReached();
+
+void test_reassume_false_range(int x, int y) {
+  if (x + y != 0) // x + y == 0
+    return;
+  if (x != 1)     // x == 1
+    return;
+  clang_analyzer_eval(y == -1); // expected-warning{{TRUE}}
+}
+
+void test_reassume_true_range(int x, int y) {
+  if (x + y != 1) // x + y == 1
+    return;
+  if (x != 1)     // x == 1
+    return;
+  clang_analyzer_eval(y == 0); // expected-warning{{TRUE}}
+}
+
+void test_reassume_inclusive_range(int x, int y) {
+  if (x + y < 0 || x + y > 1) // x + y: [0, 1]
+    return;
+  if (x != 1)                 // x == 1
+    return;
+                              // y: [-1, 0]
+  clang_analyzer_eval(y > 0); // expected-warning{{FALSE}}
+  clang_analyzer_eval(y < -1);// expected-warning{{FALSE}}
+}
Index: clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp
===================================================================
--- /dev/null
+++ clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_analyze_cc1 %s \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   2>&1 | FileCheck %s
+
+// In this test we check whether the solver's symbol simplification mechanism
+// is capable of reaching a fixpoint. This should be done after TWO iterations.
+
+void clang_analyzer_printState();
+
+void test(int a, int b, int c, int d) {
+  if (a + b + c != d)
+    return;
+  if (c + b != 0)
+    return;
+  clang_analyzer_printState();
+  // CHECK:      "constraints": [
+  // CHECK-NEXT:   { "symbol": "(((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)) != (reg_$3<int d>)", "range": "{ [0, 0] }" },
+  // CHECK-NEXT:   { "symbol": "(reg_$2<int c>) + (reg_$1<int b>)", "range": "{ [0, 0] }" }
+  // CHECK-NEXT: ],
+  // CHECK-NEXT: "equivalence_classes": [
+  // CHECK-NEXT:   [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)", "reg_$3<int d>" ]
+  // CHECK-NEXT: ],
+  // CHECK-NEXT: "disequality_info": null,
+
+  // Simplification starts here.
+  if (b != 0)
+    return;
+  clang_analyzer_printState();
+  // CHECK:       "constraints": [
+  // CHECK-NEXT:    { "symbol": "(reg_$0<int a>) != (reg_$3<int d>)", "range": "{ [0, 0] }" },
+  // CHECK-NEXT:    { "symbol": "reg_$1<int b>", "range": "{ [0, 0] }" },
+  // CHECK-NEXT:    { "symbol": "reg_$2<int c>", "range": "{ [0, 0] }" }
+  // CHECK-NEXT:  ],
+  // CHECK-NEXT:  "equivalence_classes": [
+  // CHECK-NEXT:    [ "(reg_$0<int a>) != (reg_$3<int d>)" ],
+  // CHECK-NEXT:    [ "reg_$0<int a>", "reg_$3<int d>" ],
+  // CHECK-NEXT:    [ "reg_$2<int c>" ]
+  // CHECK-NEXT:  ],
+  // CHECK-NEXT:  "disequality_info": null,
+
+  // Keep the symbols and the constraints! alive.
+  (void)(a * b * c * d);
+  return;
+}
Index: clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp
===================================================================
--- /dev/null
+++ clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp
@@ -0,0 +1,40 @@
+// RUN: %clang_analyze_cc1 %s \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   2>&1 | FileCheck %s
+
+// In this test we check whether the solver's symbol simplification mechanism
+// is capable of reaching a fixpoint. This should be done after one iteration.
+
+void clang_analyzer_printState();
+
+void test(int a, int b, int c) {
+  if (a + b != c)
+    return;
+  clang_analyzer_printState();
+  // CHECK:      "constraints": [
+  // CHECK-NEXT:   { "symbol": "((reg_$0<int a>) + (reg_$1<int b>)) != (reg_$2<int c>)", "range": "{ [0, 0] }" }
+  // CHECK-NEXT: ],
+  // CHECK-NEXT: "equivalence_classes": [
+  // CHECK-NEXT:   [ "(reg_$0<int a>) + (reg_$1<int b>)", "reg_$2<int c>" ]
+  // CHECK-NEXT: ],
+  // CHECK-NEXT: "disequality_info": null,
+
+  // Simplification starts here.
+  if (b != 0)
+    return;
+  clang_analyzer_printState();
+  // CHECK:        "constraints": [
+  // CHECK-NEXT:     { "symbol": "(reg_$0<int a>) != (reg_$2<int c>)", "range": "{ [0, 0] }" },
+  // CHECK-NEXT:     { "symbol": "reg_$1<int b>", "range": "{ [0, 0] }" }
+  // CHECK-NEXT:   ],
+  // CHECK-NEXT:   "equivalence_classes": [
+  // CHECK-NEXT:     [ "(reg_$0<int a>) != (reg_$2<int c>)" ],
+  // CHECK-NEXT:     [ "reg_$0<int a>", "reg_$2<int c>" ]
+  // CHECK-NEXT:   ],
+  // CHECK-NEXT: "disequality_info": null,
+
+  // Keep the symbols and the constraints! alive.
+  (void)(a * b * c);
+  return;
+}
Index: clang/test/Analysis/symbol-simplification-fixpoint-iteration-unreachable-code.cpp
===================================================================
--- /dev/null
+++ clang/test/Analysis/symbol-simplification-fixpoint-iteration-unreachable-code.cpp
@@ -0,0 +1,55 @@
+// RUN: %clang_analyze_cc1 %s \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   -verify
+
+// In this test we check whether the solver's symbol simplification mechanism
+// is capable of reaching a fixpoint.
+
+void clang_analyzer_warnIfReached();
+
+void test_contradiction(int a, int b, int c, int d, int x) {
+  if (a + b + c != d)
+    return;
+  if (a == d)
+    return;
+  if (b + c != 0)
+    return;
+  clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+
+  // Bring in the contradiction.
+  if (b != 0)
+    return;
+
+  // After the simplification of `b == 0` we have:
+  //   b == 0
+  //   a + c == d
+  //   a != d
+  //   c == 0
+  // Doing another iteration we reach the fixpoint (with a contradiction):
+  //   b == 0
+  //   a == d
+  //   a != d
+  //   c == 0
+  clang_analyzer_warnIfReached(); // no-warning, i.e. UNREACHABLE
+
+  // Enabling expensive checks would trigger an assertion failure here without
+  // the fixpoint iteration.
+  if (a + c == x)
+    return;
+
+  // Keep the symbols and the constraints! alive.
+  (void)(a * b * c * d * x);
+  return;
+}
+
+void test_true_range_contradiction(int a, unsigned b) {
+  if (!(b > a))   // unsigned b > int a
+    return;
+  if (a != -1)    // int a == -1
+    return;       // Starts a simplification of `unsigned b > int a`,
+                  // that results in `unsigned b > UINT_MAX`,
+                  // which is always false, so the State is infeasible.
+  clang_analyzer_warnIfReached(); // no-warning
+  (void)(a * b);
+}
Index: clang/test/Analysis/symbol-simplification-disequality-info.cpp
===================================================================
--- /dev/null
+++ clang/test/Analysis/symbol-simplification-disequality-info.cpp
@@ -0,0 +1,65 @@
+// RUN: %clang_analyze_cc1 %s \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   2>&1 | FileCheck %s
+
+// In this test we check how the solver's symbol simplification mechanism
+// simplifies the disequality info.
+
+void clang_analyzer_printState();
+
+void test(int a, int b, int c, int d) {
+  if (a + b + c == d)
+    return;
+  clang_analyzer_printState();
+  // CHECK:       "disequality_info": [
+  // CHECK-NEXT:    {
+  // CHECK-NEXT:      "class": [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)" ],
+  // CHECK-NEXT:      "disequal_to": [
+  // CHECK-NEXT:        [ "reg_$3<int d>" ]]
+  // CHECK-NEXT:    },
+  // CHECK-NEXT:    {
+  // CHECK-NEXT:      "class": [ "reg_$3<int d>" ],
+  // CHECK-NEXT:      "disequal_to": [
+  // CHECK-NEXT:        [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)" ]]
+  // CHECK-NEXT:    }
+  // CHECK-NEXT:  ],
+
+
+  // Simplification starts here.
+  if (b != 0)
+    return;
+  clang_analyzer_printState();
+  // CHECK:      "disequality_info": [
+  // CHECK-NEXT:   {
+  // CHECK-NEXT:     "class": [ "(reg_$0<int a>) + (reg_$2<int c>)" ],
+  // CHECK-NEXT:     "disequal_to": [
+  // CHECK-NEXT:       [ "reg_$3<int d>" ]]
+  // CHECK-NEXT:   },
+  // CHECK-NEXT:   {
+  // CHECK-NEXT:     "class": [ "reg_$3<int d>" ],
+  // CHECK-NEXT:     "disequal_to": [
+  // CHECK-NEXT:        [ "(reg_$0<int a>) + (reg_$2<int c>)" ]]
+  // CHECK-NEXT:    }
+  // CHECK-NEXT:  ],
+
+  if (c != 0)
+    return;
+  clang_analyzer_printState();
+  // CHECK:       "disequality_info": [
+  // CHECK-NEXT:    {
+  // CHECK-NEXT:      "class": [ "reg_$0<int a>" ],
+  // CHECK-NEXT:      "disequal_to": [
+  // CHECK-NEXT:        [ "reg_$3<int d>" ]]
+  // CHECK-NEXT:    },
+  // CHECK-NEXT:    {
+  // CHECK-NEXT:      "class": [ "reg_$3<int d>" ],
+  // CHECK-NEXT:      "disequal_to": [
+  // CHECK-NEXT:        [ "reg_$0<int a>" ]]
+  // CHECK-NEXT:    }
+  // CHECK-NEXT:  ],
+
+  // Keep the symbols and the constraints! alive.
+  (void)(a * b * c * d);
+  return;
+}
Index: clang/test/Analysis/expr-inspection-printState-eq-classes.c
===================================================================
--- clang/test/Analysis/expr-inspection-printState-eq-classes.c
+++ clang/test/Analysis/expr-inspection-printState-eq-classes.c
@@ -15,7 +15,7 @@
   return;
 }
 
-  // CHECK:      "equivalence_classes": [
-  // CHECK-NEXT:   [ "((reg_$0<int a>) + (reg_$1<int b>)) != (reg_$2<int c>)", "(reg_$0<int a>) != (reg_$2<int c>)" ],
-  // CHECK-NEXT:   [ "(reg_$0<int a>) + (reg_$1<int b>)", "reg_$0<int a>", "reg_$2<int c>", "reg_$3<int d>" ]
-  // CHECK-NEXT: ],
+// CHECK:      "equivalence_classes": [
+// CHECK-NEXT:     [ "(reg_$0<int a>) != (reg_$2<int c>)" ],
+// CHECK-NEXT:     [ "reg_$0<int a>", "reg_$2<int c>", "reg_$3<int d>" ]
+// CHECK-NEXT: ],
Index: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -601,6 +601,10 @@
   LLVM_NODISCARD static inline Optional<bool>
   areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
 
+  /// Remove one member from the class.
+  LLVM_NODISCARD ProgramStateRef removeMember(ProgramStateRef State,
+                                              const SymbolRef Old);
+
   /// Iterate over all symbols and try to simplify them.
   LLVM_NODISCARD static inline ProgramStateRef
   simplify(SValBuilder &SVB, RangeSet::Factory &F, RangedConstraintManager &RCM,
@@ -656,6 +660,7 @@
   inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State,
                                    SymbolSet Members, EquivalenceClass Other,
                                    SymbolSet OtherMembers);
+
   static inline bool
   addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
                        RangeSet::Factory &F, ProgramStateRef State,
@@ -1749,6 +1754,19 @@
       return false;
   }
 
+  // We may have trivial equivalence classes in the disequality info as
+  // well, and we need to simplify them.
+  DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
+  for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry :
+       DisequalityInfo) {
+    EquivalenceClass Class = DisequalityEntry.first;
+    ClassSet DisequalClasses = DisequalityEntry.second;
+    State =
+        EquivalenceClass::simplify(Builder, RangeFactory, RCM, State, Class);
+    if (!State)
+      return false;
+  }
+
   return true;
 }
 
@@ -2122,6 +2140,61 @@
   return llvm::None;
 }
 
+LLVM_NODISCARD ProgramStateRef
+EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
+
+  SymbolSet ClsMembers = getClassMembers(State);
+  assert(ClsMembers.contains(Old));
+
+  // We don't remove `Old`'s Sym->Class relation for two reasons:
+  // 1) This way constraints for the old symbol can still be found via it's
+  // equivalence class that it used to be the member of.
+  // 2) Performance and resource reasons. We can spare one removal and thus one
+  // additional tree in the forest of `ClassMap`.
+
+  // Remove `Old`'s Class->Sym relation.
+  SymbolSet::Factory &F = getMembersFactory(State);
+  ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
+  ClsMembers = F.remove(ClsMembers, Old);
+  // Ensure another precondition of the removeMember function (we can check
+  // this only with isEmpty, thus we have to do the remove first).
+  assert(!ClsMembers.isEmpty() &&
+         "Class should have had at least two members before member removal");
+  // Overwrite the existing members assigned to this class.
+  ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
+  ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
+  State = State->set<ClassMembers>(ClassMembersMap);
+
+  return State;
+}
+
+// Re-evaluate an SVal with top-level `State->assume` logic.
+LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State,
+                                        const RangeSet *Constraint,
+                                        SVal TheValue) {
+  if (!Constraint)
+    return State;
+
+  const auto DefinedVal = TheValue.castAs<DefinedSVal>();
+
+  // If the SVal is 0, we can simply interpret that as `false`.
+  if (Constraint->encodesFalseRange())
+    return State->assume(DefinedVal, false);
+
+  // If the constraint does not encode 0 then we can interpret that as `true`
+  // AND as a Range(Set).
+  if (Constraint->encodesTrueRange()) {
+    State = State->assume(DefinedVal, true);
+    if (!State)
+      return nullptr;
+    // Fall through, re-assume based on the range values as well.
+  }
+  // Overestimate the individual Ranges with the RangeSet' lowest and
+  // highest values.
+  return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(),
+                                     Constraint->getMaxValue(), true);
+}
+
 // Iterate over all symbols and try to simplify them. Once a symbol is
 // simplified then we check if we can merge the simplified symbol's equivalence
 // class to this class. This way, we simplify not just the symbols but the
@@ -2158,22 +2231,36 @@
       if (OldState == State)
         continue;
 
-      // Initiate the reorganization of the equality information. E.g., if we
-      // have `c + 1 == 0` then we'd like to express that `c == -1`. It makes
-      // sense to do this only with `SymIntExpr`s.
-      // TODO Handle `IntSymExpr` as well, once computeAdjustment can handle
-      // them.
-      if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SimplifiedMemberSym)) {
-        if (const RangeSet *ClassConstraint = getConstraint(State, Class)) {
-          // Overestimate the individual Ranges with the RangeSet' lowest and
-          // highest values.
-          State = RCM.assumeSymInclusiveRange(
-              State, SIE, ClassConstraint->getMinValue(),
-              ClassConstraint->getMaxValue(), /*InRange=*/true);
-          if (!State)
-            return nullptr;
-        }
-      }
+      assert(find(State, MemberSym) == find(State, SimplifiedMemberSym));
+      // Remove the old and more complex symbol.
+      State = find(State, MemberSym).removeMember(State, MemberSym);
+
+      // Query the class constraint again b/c that may have changed during the
+      // merge above.
+      const RangeSet *ClassConstraint = getConstraint(State, Class);
+
+      // Re-evaluate an SVal with top-level `State->assume`, this ignites
+      // a RECURSIVE algorithm that will reach a FIXPOINT.
+      //
+      // About performance and complexity: Let us assume that in a State we
+      // have N non-trivial equivalence classes and that all constraints and
+      // disequality info is related to non-trivial classes. In the worst case,
+      // we can simplify only one symbol of one class in each iteration. The
+      // number of symbols in one class cannot grow b/c we replace the old
+      // symbol with the simplified one. Also, the number of the equivalence
+      // classes can decrease only, b/c the algorithm does a merge operation
+      // optionally. We need N iterations in this case to reach the fixpoint.
+      // Thus, the steps needed to be done in the worst case is proportional to
+      // N*N.
+      //
+      // This worst case scenario can be extended to that case when we have
+      // trivial classes in the constraints and in the disequality map. This
+      // case can be reduced to the case with a State where there are only
+      // non-trivial classes. This is because a merge operation on two trivial
+      // classes results in one non-trivial class.
+      State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
+      if (!State)
+        return nullptr;
     }
   }
   return State;
Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
+++ clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
@@ -287,6 +287,20 @@
     return contains(T.getZeroValue());
   }
 
+  /// Test if the range is the [0,0] range.
+  ///
+  /// Complexity: O(1)
+  bool encodesFalseRange() const {
+    const llvm::APSInt *Constant = getConcreteValue();
+    return Constant && Constant->isZero();
+  }
+
+  /// Test if the range doesn't contain zero.
+  ///
+  /// Complexity: O(logN)
+  ///             where N = size(this)
+  bool encodesTrueRange() const { return !containsZero(); }
+
   void dump(raw_ostream &OS) const;
   void dump() const;
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to