[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-28 Thread Valeriy Savchenko via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG1f57d76a8dd0: [analyzer] Refactor range inference for 
symbolic expressions (authored by vsavchenko).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232

Files:
  
clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
  clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
  clang/test/Analysis/constant-folding.c
  clang/test/Analysis/double-ranges-bug.c

Index: clang/test/Analysis/double-ranges-bug.c
===
--- /dev/null
+++ clang/test/Analysis/double-ranges-bug.c
@@ -0,0 +1,22 @@
+// RUN: %clang_analyze_cc1 -verify %s -analyzer-checker=core
+
+// expected-no-diagnostics
+
+typedef unsigned long int A;
+
+extern int fill(A **values, int *nvalues);
+
+void foo() {
+  A *values;
+  int nvalues;
+  fill(, );
+
+  int i = 1;
+  double x, y;
+
+  y = values[i - 1];
+  x = values[i];
+
+  if (x <= y) {
+  }
+}
Index: clang/test/Analysis/constant-folding.c
===
--- clang/test/Analysis/constant-folding.c
+++ clang/test/Analysis/constant-folding.c
@@ -115,7 +115,22 @@
 #endif
 
   // Check that dynamically computed constants also work.
-  int constant = 1 << 3;
+  unsigned int constant = 1 << 3;
   unsigned int d = a | constant;
-  clang_analyzer_eval(constant > 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(d >= constant); // expected-warning{{TRUE}}
+
+  // Check that nested expressions also work.
+  clang_analyzer_eval(((a | 10) | 5) >= 10); // expected-warning{{TRUE}}
+
+  // TODO: We misuse intersection of ranges for bitwise AND and OR operators.
+  //   Resulting ranges for the following cases are infeasible.
+  //   This is what causes paradoxical results below.
+  if (a > 10) {
+clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
+clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+  }
+  if (a < 10) {
+clang_analyzer_eval((a | 20) >= 20); // expected-warning{{FALSE}}
+clang_analyzer_eval((a | 20) < 20);  // expected-warning{{FALSE}}
+  }
 }
Index: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
===
--- clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -16,6 +16,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ImmutableSet.h"
 #include "llvm/Support/raw_ostream.h"
@@ -23,10 +24,16 @@
 using namespace clang;
 using namespace ento;
 
+//===--===//
+//   RangeSet implementation
+//===--===//
+
 void RangeSet::IntersectInRange(BasicValueFactory , Factory ,
-  const llvm::APSInt , const llvm::APSInt ,
-  PrimRangeSet , PrimRangeSet::iterator ,
-  PrimRangeSet::iterator ) const {
+const llvm::APSInt ,
+const llvm::APSInt ,
+PrimRangeSet ,
+PrimRangeSet::iterator ,
+PrimRangeSet::iterator ) const {
   // There are six cases for each range R in the set:
   //   1. R is entirely before the intersection range.
   //   2. R is entirely after the intersection range.
@@ -66,6 +73,11 @@
 }
 
 bool RangeSet::pin(llvm::APSInt , llvm::APSInt ) const {
+  if (isEmpty()) {
+// This range is already infeasible.
+return false;
+  }
+
   // This function has nine cases, the cartesian product of range-testing
   // both the upper and lower bounds against the symbol's type.
   // Each case requires a different pinning operation.
@@ -283,6 +295,207 @@
 }
 
 namespace {
+
+/// A little component aggregating all of the reasoning we have about
+/// the ranges of symbolic expressions.
+///
+/// Even when we don't know the exact values of the operands, we still
+/// can get a pretty good estimate of the result's range.
+class SymbolicRangeInferrer
+: public SymExprVisitor {
+public:
+  static RangeSet inferRange(BasicValueFactory , RangeSet::Factory ,
+ ProgramStateRef State, SymbolRef Sym) {
+SymbolicRangeInferrer Inferrer(BV, F, State);
+return Inferrer.infer(Sym);
+  }
+
+  RangeSet VisitSymExpr(SymbolRef Sym) {
+// If we got to this function, the actual type of the symbolic
+// expression is 

[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-27 Thread Valeriy Savchenko via Phabricator via cfe-commits
vsavchenko updated this revision to Diff 266571.
vsavchenko added a comment.

Rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232

Files:
  
clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
  clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
  clang/test/Analysis/constant-folding.c
  clang/test/Analysis/double-ranges-bug.c

Index: clang/test/Analysis/double-ranges-bug.c
===
--- /dev/null
+++ clang/test/Analysis/double-ranges-bug.c
@@ -0,0 +1,22 @@
+// RUN: %clang_analyze_cc1 -verify %s -analyzer-checker=core
+
+// expected-no-diagnostics
+
+typedef unsigned long int A;
+
+extern int fill(A **values, int *nvalues);
+
+void foo() {
+  A *values;
+  int nvalues;
+  fill(, );
+
+  int i = 1;
+  double x, y;
+
+  y = values[i - 1];
+  x = values[i];
+
+  if (x <= y) {
+  }
+}
Index: clang/test/Analysis/constant-folding.c
===
--- clang/test/Analysis/constant-folding.c
+++ clang/test/Analysis/constant-folding.c
@@ -115,7 +115,22 @@
 #endif
 
   // Check that dynamically computed constants also work.
-  int constant = 1 << 3;
+  unsigned int constant = 1 << 3;
   unsigned int d = a | constant;
-  clang_analyzer_eval(constant > 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(d >= constant); // expected-warning{{TRUE}}
+
+  // Check that nested expressions also work.
+  clang_analyzer_eval(((a | 10) | 5) >= 10); // expected-warning{{TRUE}}
+
+  // TODO: We misuse intersection of ranges for bitwise AND and OR operators.
+  //   Resulting ranges for the following cases are infeasible.
+  //   This is what causes paradoxical results below.
+  if (a > 10) {
+clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
+clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+  }
+  if (a < 10) {
+clang_analyzer_eval((a | 20) >= 20); // expected-warning{{FALSE}}
+clang_analyzer_eval((a | 20) < 20);  // expected-warning{{FALSE}}
+  }
 }
Index: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
===
--- clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -16,6 +16,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ImmutableSet.h"
 #include "llvm/Support/raw_ostream.h"
@@ -23,10 +24,16 @@
 using namespace clang;
 using namespace ento;
 
+//===--===//
+//   RangeSet implementation
+//===--===//
+
 void RangeSet::IntersectInRange(BasicValueFactory , Factory ,
-  const llvm::APSInt , const llvm::APSInt ,
-  PrimRangeSet , PrimRangeSet::iterator ,
-  PrimRangeSet::iterator ) const {
+const llvm::APSInt ,
+const llvm::APSInt ,
+PrimRangeSet ,
+PrimRangeSet::iterator ,
+PrimRangeSet::iterator ) const {
   // There are six cases for each range R in the set:
   //   1. R is entirely before the intersection range.
   //   2. R is entirely after the intersection range.
@@ -66,6 +73,11 @@
 }
 
 bool RangeSet::pin(llvm::APSInt , llvm::APSInt ) const {
+  if (isEmpty()) {
+// This range is already infeasible.
+return false;
+  }
+
   // This function has nine cases, the cartesian product of range-testing
   // both the upper and lower bounds against the symbol's type.
   // Each case requires a different pinning operation.
@@ -283,6 +295,207 @@
 }
 
 namespace {
+
+/// A little component aggregating all of the reasoning we have about
+/// the ranges of symbolic expressions.
+///
+/// Even when we don't know the exact values of the operands, we still
+/// can get a pretty good estimate of the result's range.
+class SymbolicRangeInferrer
+: public SymExprVisitor {
+public:
+  static RangeSet inferRange(BasicValueFactory , RangeSet::Factory ,
+ ProgramStateRef State, SymbolRef Sym) {
+SymbolicRangeInferrer Inferrer(BV, F, State);
+return Inferrer.infer(Sym);
+  }
+
+  RangeSet VisitSymExpr(SymbolRef Sym) {
+// If we got to this function, the actual type of the symbolic
+// expression is not supported for advanced inference.
+// In this case, we simply backoff to the default "let's simply
+  

[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-18 Thread Valeriy Savchenko via Phabricator via cfe-commits
vsavchenko updated this revision to Diff 264614.
vsavchenko added a comment.

Fix a problem with doubles sneaking into integer symbolic expressions


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232

Files:
  
clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
  clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
  clang/test/Analysis/constant-folding.c
  clang/test/Analysis/double-ranges-bug.c

Index: clang/test/Analysis/double-ranges-bug.c
===
--- /dev/null
+++ clang/test/Analysis/double-ranges-bug.c
@@ -0,0 +1,22 @@
+// RUN: %clang_analyze_cc1 -verify %s -analyzer-checker=core
+
+// expected-no-diagnostics
+
+typedef unsigned long int A;
+
+extern int fill(A **values, int *nvalues);
+
+void foo() {
+  A *values;
+  int nvalues;
+  fill(, );
+
+  int i = 1;
+  double x, y;
+
+  y = values[i - 1];
+  x = values[i];
+
+  if (x <= y) {
+  }
+}
Index: clang/test/Analysis/constant-folding.c
===
--- clang/test/Analysis/constant-folding.c
+++ clang/test/Analysis/constant-folding.c
@@ -115,7 +115,22 @@
 #endif
 
   // Check that dynamically computed constants also work.
-  int constant = 1 << 3;
+  unsigned int constant = 1 << 3;
   unsigned int d = a | constant;
-  clang_analyzer_eval(constant > 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(d >= constant); // expected-warning{{TRUE}}
+
+  // Check that nested expressions also work.
+  clang_analyzer_eval(((a | 10) | 5) >= 10); // expected-warning{{TRUE}}
+
+  // TODO: We misuse intersection of ranges for bitwise AND and OR operators.
+  //   Resulting ranges for the following cases are infeasible.
+  //   This is what causes paradoxical results below.
+  if (a > 10) {
+clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
+clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+  }
+  if (a < 10) {
+clang_analyzer_eval((a | 20) >= 20); // expected-warning{{FALSE}}
+clang_analyzer_eval((a | 20) < 20);  // expected-warning{{FALSE}}
+  }
 }
Index: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
===
--- clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -16,6 +16,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ImmutableSet.h"
 #include "llvm/Support/raw_ostream.h"
@@ -23,10 +24,16 @@
 using namespace clang;
 using namespace ento;
 
+//===--===//
+//   RangeSet implementation
+//===--===//
+
 void RangeSet::IntersectInRange(BasicValueFactory , Factory ,
-  const llvm::APSInt , const llvm::APSInt ,
-  PrimRangeSet , PrimRangeSet::iterator ,
-  PrimRangeSet::iterator ) const {
+const llvm::APSInt ,
+const llvm::APSInt ,
+PrimRangeSet ,
+PrimRangeSet::iterator ,
+PrimRangeSet::iterator ) const {
   // There are six cases for each range R in the set:
   //   1. R is entirely before the intersection range.
   //   2. R is entirely after the intersection range.
@@ -66,6 +73,11 @@
 }
 
 bool RangeSet::pin(llvm::APSInt , llvm::APSInt ) const {
+  if (isEmpty()) {
+// This range is already infeasible.
+return false;
+  }
+
   // This function has nine cases, the cartesian product of range-testing
   // both the upper and lower bounds against the symbol's type.
   // Each case requires a different pinning operation.
@@ -238,6 +250,207 @@
 }
 
 namespace {
+
+/// A little component aggregating all of the reasoning we have about
+/// the ranges of symbolic expressions.
+///
+/// Even when we don't know the exact values of the operands, we still
+/// can get a pretty good estimate of the result's range.
+class SymbolicRangeInferrer
+: public SymExprVisitor {
+public:
+  static RangeSet inferRange(BasicValueFactory , RangeSet::Factory ,
+ ProgramStateRef State, SymbolRef Sym) {
+SymbolicRangeInferrer Inferrer(BV, F, State);
+return Inferrer.infer(Sym);
+  }
+
+  RangeSet VisitSymExpr(SymbolRef Sym) {
+// If we got to this function, the actual type of the symbolic
+// expression is not supported for advanced inference.
+// 

[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-09 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ accepted this revision.
NoQ added inline comments.
This revision is now accepted and ready to land.



Comment at: clang/test/Analysis/constant-folding.c:127-128
+  if (a > 10) {
+clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
+clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+  }

vsavchenko wrote:
> NoQ wrote:
> > vsavchenko wrote:
> > > NoQ wrote:
> > > > vsavchenko wrote:
> > > > > NoQ wrote:
> > > > > > How can both of these be false? o.o
> > > > > Yeah :) I realized how weird it is.
> > > > > Anything is possible in the land of infeasible ranges.
> > > > > 
> > > > > I changed a comment there to address this
> > > > I mean, this pretty much never happened before. How are you not 
> > > > tripping on [[ 
> > > > https://github.com/llvm/llvm-project/blob/1a4421a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h#L100
> > > >  | this assert ]]? (probably it's simply been disabled in normal debug 
> > > > builds now that it's under "expensive checks")
> > > > 
> > > > The correct thing to do is to detect the paradox earlier and mark the 
> > > > path as infeasible. What prevents us from doing it right away here?
> > > Before we didn't really care about constraints on the operands and I 
> > > changed it :)
> > > So, now `Intersect` (which is logically not a correct way to do what is 
> > > meant) can cause this type of behaviour
> > [visible confusion]
> > 
> > Could you elaborate? I see that only constraint so far is `$a: [11; 
> > UINT_MAX]`. I don't see any infeasible ranges here. `(a & 1) <= 1` is 
> > clearly true. If we were previously thinking that it's unknown and now we 
> > think that it's false, then it's a regression.
> `a` is indeed `[11, UINT_MAX]`.
> Current implementation checks a constant (i.e. `1`) and intersects the range 
> for LHS `[11, UINT_MAX]` with `[UINT_MIN, 1]`, which produces empty range set 
> (aka infeasible).
> 
> This is why I'm saying that intersection is a bad choice, it's even plain 
> wrong.
> Before this patch we ignored constraints for `a` and considered it to be 
> `[UINT_MIN, UINT_MAX]`. In that setting, intersection does indeed work (which 
> doesn't make it correct).
> 
> Yes, it is a regression. I'm changing this implementation in the child 
> revisions.
> 
> 
> Yes, it is a regression. I'm changing this implementation in the child 
> revisions.

Oh, right, got it :D

Ok, let's land 'em together then!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-06 Thread Valeriy Savchenko via Phabricator via cfe-commits
vsavchenko marked an inline comment as done.
vsavchenko added inline comments.



Comment at: clang/test/Analysis/constant-folding.c:127-128
+  if (a > 10) {
+clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
+clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+  }

NoQ wrote:
> vsavchenko wrote:
> > NoQ wrote:
> > > vsavchenko wrote:
> > > > NoQ wrote:
> > > > > How can both of these be false? o.o
> > > > Yeah :) I realized how weird it is.
> > > > Anything is possible in the land of infeasible ranges.
> > > > 
> > > > I changed a comment there to address this
> > > I mean, this pretty much never happened before. How are you not tripping 
> > > on [[ 
> > > https://github.com/llvm/llvm-project/blob/1a4421a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h#L100
> > >  | this assert ]]? (probably it's simply been disabled in normal debug 
> > > builds now that it's under "expensive checks")
> > > 
> > > The correct thing to do is to detect the paradox earlier and mark the 
> > > path as infeasible. What prevents us from doing it right away here?
> > Before we didn't really care about constraints on the operands and I 
> > changed it :)
> > So, now `Intersect` (which is logically not a correct way to do what is 
> > meant) can cause this type of behaviour
> [visible confusion]
> 
> Could you elaborate? I see that only constraint so far is `$a: [11; 
> UINT_MAX]`. I don't see any infeasible ranges here. `(a & 1) <= 1` is clearly 
> true. If we were previously thinking that it's unknown and now we think that 
> it's false, then it's a regression.
`a` is indeed `[11, UINT_MAX]`.
Current implementation checks a constant (i.e. `1`) and intersects the range 
for LHS `[11, UINT_MAX]` with `[UINT_MIN, 1]`, which produces empty range set 
(aka infeasible).

This is why I'm saying that intersection is a bad choice, it's even plain wrong.
Before this patch we ignored constraints for `a` and considered it to be 
`[UINT_MIN, UINT_MAX]`. In that setting, intersection does indeed work (which 
doesn't make it correct).

Yes, it is a regression. I'm changing this implementation in the child 
revisions.




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-06 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added inline comments.



Comment at: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp:340-345
+// TODO #2: We didn't go into the nested expressions before, so it
+// might cause us spending much more time doing the inference.
+// This can be a problem for deeply nested expressions that are
+// involved in conditions and get tested continuously.  We definitely
+// need to address this issue and introduce some sort of caching
+// in here.

xazax.hun wrote:
> NoQ wrote:
> > vsavchenko wrote:
> > > NoQ wrote:
> > > > I think this is a must-have, at least in some form. We've been 
> > > > exploding like this before on real-world code, well, probably not with 
> > > > bitwise ops but i'm still worried.
> > > It will be pretty easy to introduce a limit on how deep we go into a tree 
> > > of the given symbolic expression. That can also be a solution.
> > I mean, doing something super trivial, like defining a map from symexprs to 
> > ranges in `SymbolicRangeInferrer` itself and find-or-inserting into it, 
> > will probably not be harder than counting depth(?)
> I am a bit ignorant of this topic, but I wonder what a good caching mechanism 
> would look like.
> A simple `symexpr -> range` mapping does not feel right as the same symexpr 
> might have a different range in a different program state (e.g., we might 
> learn new ranges for our symbols). But having a separate map for each state 
> state might do relatively little caching?
Even a simple `symexpr -> range` mapping with lifetime of 
`SymbolicRangeInferrer` should be able to improve algorithmic complexity 
dramatically. And it doesn't need to consider different states because it only 
lives for the duration of a single `assume()` operation.



Comment at: clang/test/Analysis/constant-folding.c:127-128
+  if (a > 10) {
+clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
+clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+  }

vsavchenko wrote:
> NoQ wrote:
> > vsavchenko wrote:
> > > NoQ wrote:
> > > > How can both of these be false? o.o
> > > Yeah :) I realized how weird it is.
> > > Anything is possible in the land of infeasible ranges.
> > > 
> > > I changed a comment there to address this
> > I mean, this pretty much never happened before. How are you not tripping on 
> > [[ 
> > https://github.com/llvm/llvm-project/blob/1a4421a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h#L100
> >  | this assert ]]? (probably it's simply been disabled in normal debug 
> > builds now that it's under "expensive checks")
> > 
> > The correct thing to do is to detect the paradox earlier and mark the path 
> > as infeasible. What prevents us from doing it right away here?
> Before we didn't really care about constraints on the operands and I changed 
> it :)
> So, now `Intersect` (which is logically not a correct way to do what is 
> meant) can cause this type of behaviour
[visible confusion]

Could you elaborate? I see that only constraint so far is `$a: [11; UINT_MAX]`. 
I don't see any infeasible ranges here. `(a & 1) <= 1` is clearly true. If we 
were previously thinking that it's unknown and now we think that it's false, 
then it's a regression.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-05 Thread Valeriy Savchenko via Phabricator via cfe-commits
vsavchenko marked an inline comment as done.
vsavchenko added inline comments.



Comment at: clang/test/Analysis/constant-folding.c:127-128
+  if (a > 10) {
+clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
+clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+  }

NoQ wrote:
> vsavchenko wrote:
> > NoQ wrote:
> > > How can both of these be false? o.o
> > Yeah :) I realized how weird it is.
> > Anything is possible in the land of infeasible ranges.
> > 
> > I changed a comment there to address this
> I mean, this pretty much never happened before. How are you not tripping on 
> [[ 
> https://github.com/llvm/llvm-project/blob/1a4421a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h#L100
>  | this assert ]]? (probably it's simply been disabled in normal debug builds 
> now that it's under "expensive checks")
> 
> The correct thing to do is to detect the paradox earlier and mark the path as 
> infeasible. What prevents us from doing it right away here?
Before we didn't really care about constraints on the operands and I changed it 
:)
So, now `Intersect` (which is logically not a correct way to do what is meant) 
can cause this type of behaviour


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-05 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added inline comments.



Comment at: clang/test/Analysis/constant-folding.c:127-128
+  if (a > 10) {
+clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
+clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+  }

vsavchenko wrote:
> NoQ wrote:
> > How can both of these be false? o.o
> Yeah :) I realized how weird it is.
> Anything is possible in the land of infeasible ranges.
> 
> I changed a comment there to address this
I mean, this pretty much never happened before. How are you not tripping on [[ 
https://github.com/llvm/llvm-project/blob/1a4421a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h#L100
 | this assert ]]? (probably it's simply been disabled in normal debug builds 
now that it's under "expensive checks")

The correct thing to do is to detect the paradox earlier and mark the path as 
infeasible. What prevents us from doing it right away here?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-05 Thread Valeriy Savchenko via Phabricator via cfe-commits
vsavchenko marked an inline comment as done.
vsavchenko added inline comments.



Comment at: clang/test/Analysis/constant-folding.c:127-128
+  if (a > 10) {
+clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
+clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+  }

NoQ wrote:
> How can both of these be false? o.o
Yeah :) I realized how weird it is.
Anything is possible in the land of infeasible ranges.

I changed a comment there to address this


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-05 Thread Valeriy Savchenko via Phabricator via cfe-commits
vsavchenko updated this revision to Diff 262170.
vsavchenko added a comment.

Add clarification on infeasible ranges in bitwise tests


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232

Files:
  
clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
  clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
  clang/test/Analysis/constant-folding.c

Index: clang/test/Analysis/constant-folding.c
===
--- clang/test/Analysis/constant-folding.c
+++ clang/test/Analysis/constant-folding.c
@@ -115,7 +115,22 @@
 #endif
 
   // Check that dynamically computed constants also work.
-  int constant = 1 << 3;
+  unsigned int constant = 1 << 3;
   unsigned int d = a | constant;
-  clang_analyzer_eval(constant > 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(d >= constant); // expected-warning{{TRUE}}
+
+  // Check that nested expressions also work.
+  clang_analyzer_eval(((a | 10) | 5) >= 10); // expected-warning{{TRUE}}
+
+  // TODO: We misuse intersection of ranges for bitwise AND and OR operators.
+  //   Resulting ranges for the following cases are infeasible.
+  //   This is what causes paradoxical results below.
+  if (a > 10) {
+clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
+clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+  }
+  if (a < 10) {
+clang_analyzer_eval((a | 20) >= 20); // expected-warning{{FALSE}}
+clang_analyzer_eval((a | 20) < 20);  // expected-warning{{FALSE}}
+  }
 }
Index: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
===
--- clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -16,6 +16,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ImmutableSet.h"
 #include "llvm/Support/raw_ostream.h"
@@ -23,10 +24,16 @@
 using namespace clang;
 using namespace ento;
 
+//===--===//
+//   RangeSet implementation
+//===--===//
+
 void RangeSet::IntersectInRange(BasicValueFactory , Factory ,
-  const llvm::APSInt , const llvm::APSInt ,
-  PrimRangeSet , PrimRangeSet::iterator ,
-  PrimRangeSet::iterator ) const {
+const llvm::APSInt ,
+const llvm::APSInt ,
+PrimRangeSet ,
+PrimRangeSet::iterator ,
+PrimRangeSet::iterator ) const {
   // There are six cases for each range R in the set:
   //   1. R is entirely before the intersection range.
   //   2. R is entirely after the intersection range.
@@ -66,6 +73,11 @@
 }
 
 bool RangeSet::pin(llvm::APSInt , llvm::APSInt ) const {
+  if (isEmpty()) {
+// This range is already infeasible.
+return false;
+  }
+
   // This function has nine cases, the cartesian product of range-testing
   // both the upper and lower bounds against the symbol's type.
   // Each case requires a different pinning operation.
@@ -238,6 +250,190 @@
 }
 
 namespace {
+
+/// A little component aggregating all of the reasoning we have about
+/// the ranges of symbolic expressions.
+///
+/// Even when we don't know the exact values of the operands, we still
+/// can get a pretty good estimate of the result's range.
+class SymbolicRangeInferrer
+: public SymExprVisitor {
+public:
+  static RangeSet inferRange(BasicValueFactory , RangeSet::Factory ,
+ ProgramStateRef State, SymbolRef Sym) {
+SymbolicRangeInferrer Inferrer(BV, F, State);
+return Inferrer.infer(Sym);
+  }
+
+  RangeSet VisitSymExpr(SymbolRef Sym) {
+// If we got to this function, the actual type of the symbolic
+// expression is not supported for advanced inference.
+// In this case, we simply backoff to the default "let's simply
+// infer the range from the expression's type".
+return infer(Sym->getType());
+  }
+
+  RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
+return VisitBinaryOperator(Sym);
+  }
+
+  RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
+return VisitBinaryOperator(Sym);
+  }
+
+  RangeSet VisitSymSymExpr(const SymSymExpr *Sym) {
+return VisitBinaryOperator(Sym);
+  }
+
+private:
+  SymbolicRangeInferrer(BasicValueFactory , RangeSet::Factory ,
+ProgramStateRef S)
+  

[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-04 Thread Gábor Horváth via Phabricator via cfe-commits
xazax.hun added inline comments.



Comment at: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp:385
+
+  RangeSet VisitAndOperator(RangeSet LHS, RangeSet RHS, QualType T) {
+// TODO: generalize for the ranged RHS.

I always get surprised when I read code like the one above seeing that only RHS 
is tested for being a concerte value. Later on, I vaguely start to remember 
that we only produce `SymIntExpr`s (is that correct?). I wonder if we should 
add an assert so this code blows up when someone is trying to add 
`IntSymExpr`s, so she will know what code needs modification. 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-04 Thread Gábor Horváth via Phabricator via cfe-commits
xazax.hun added inline comments.



Comment at: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp:340-345
+// TODO #2: We didn't go into the nested expressions before, so it
+// might cause us spending much more time doing the inference.
+// This can be a problem for deeply nested expressions that are
+// involved in conditions and get tested continuously.  We definitely
+// need to address this issue and introduce some sort of caching
+// in here.

NoQ wrote:
> vsavchenko wrote:
> > NoQ wrote:
> > > I think this is a must-have, at least in some form. We've been exploding 
> > > like this before on real-world code, well, probably not with bitwise ops 
> > > but i'm still worried.
> > It will be pretty easy to introduce a limit on how deep we go into a tree 
> > of the given symbolic expression. That can also be a solution.
> I mean, doing something super trivial, like defining a map from symexprs to 
> ranges in `SymbolicRangeInferrer` itself and find-or-inserting into it, will 
> probably not be harder than counting depth(?)
I am a bit ignorant of this topic, but I wonder what a good caching mechanism 
would look like.
A simple `symexpr -> range` mapping does not feel right as the same symexpr 
might have a different range in a different program state (e.g., we might learn 
new ranges for our symbols). But having a separate map for each state state 
might do relatively little caching?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-04 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added inline comments.



Comment at: clang/test/Analysis/constant-folding.c:127-128
+  if (a > 10) {
+clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
+clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+  }

How can both of these be false? o.o


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-04 Thread Valeriy Savchenko via Phabricator via cfe-commits
vsavchenko updated this revision to Diff 261833.
vsavchenko marked an inline comment as done.
vsavchenko added a comment.

Now `getRange` is more likely to return unfeasible range.  Calling `Intersect` 
and `pin` methods from such ranges might cause a crash.
Check for unfeasible ranges.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232

Files:
  
clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
  clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
  clang/test/Analysis/constant-folding.c

Index: clang/test/Analysis/constant-folding.c
===
--- clang/test/Analysis/constant-folding.c
+++ clang/test/Analysis/constant-folding.c
@@ -115,7 +115,16 @@
 #endif
 
   // Check that dynamically computed constants also work.
-  int constant = 1 << 3;
+  unsigned int constant = 1 << 3;
   unsigned int d = a | constant;
-  clang_analyzer_eval(constant > 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(d >= constant); // expected-warning{{TRUE}}
+
+  // Check that nested expressions also work.
+  clang_analyzer_eval(((a | 10) | 5) >= 10); // expected-warning{{TRUE}}
+
+  // Check correct processing of unfeasible ranges
+  if (a > 10) {
+clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
+clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+  }
 }
Index: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
===
--- clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -16,6 +16,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ImmutableSet.h"
 #include "llvm/Support/raw_ostream.h"
@@ -23,10 +24,16 @@
 using namespace clang;
 using namespace ento;
 
+//===--===//
+//   RangeSet implementation
+//===--===//
+
 void RangeSet::IntersectInRange(BasicValueFactory , Factory ,
-  const llvm::APSInt , const llvm::APSInt ,
-  PrimRangeSet , PrimRangeSet::iterator ,
-  PrimRangeSet::iterator ) const {
+const llvm::APSInt ,
+const llvm::APSInt ,
+PrimRangeSet ,
+PrimRangeSet::iterator ,
+PrimRangeSet::iterator ) const {
   // There are six cases for each range R in the set:
   //   1. R is entirely before the intersection range.
   //   2. R is entirely after the intersection range.
@@ -66,6 +73,11 @@
 }
 
 bool RangeSet::pin(llvm::APSInt , llvm::APSInt ) const {
+  if (isEmpty()) {
+// This range is already unfeasible.
+return false;
+  }
+
   // This function has nine cases, the cartesian product of range-testing
   // both the upper and lower bounds against the symbol's type.
   // Each case requires a different pinning operation.
@@ -238,6 +250,190 @@
 }
 
 namespace {
+
+/// A little component aggregating all of the reasoning we have about
+/// the ranges of symbolic expressions.
+///
+/// Even when we don't know the exact values of the operands, we still
+/// can get a pretty good estimate of the result's range.
+class SymbolicRangeInferrer
+: public SymExprVisitor {
+public:
+  static RangeSet inferRange(BasicValueFactory , RangeSet::Factory ,
+ ProgramStateRef State, SymbolRef Sym) {
+SymbolicRangeInferrer Inferrer(BV, F, State);
+return Inferrer.infer(Sym);
+  }
+
+  RangeSet VisitSymExpr(SymbolRef Sym) {
+// If we got to this function, the actual type of the symbolic
+// expression is not supported for advanced inference.
+// In this case, we simply backoff to the default "let's simply
+// infer the range from the expression's type".
+return infer(Sym->getType());
+  }
+
+  RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
+return VisitBinaryOperator(Sym);
+  }
+
+  RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
+return VisitBinaryOperator(Sym);
+  }
+
+  RangeSet VisitSymSymExpr(const SymSymExpr *Sym) {
+return VisitBinaryOperator(Sym);
+  }
+
+private:
+  SymbolicRangeInferrer(BasicValueFactory , RangeSet::Factory ,
+ProgramStateRef S)
+  : ValueFactory(BV), RangeFactory(F), State(S) {}
+
+  /// Infer range information from the given integer constant.
+  ///
+  /// It's not a real "inference", but is 

[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-03 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added inline comments.



Comment at: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp:340-345
+// TODO #2: We didn't go into the nested expressions before, so it
+// might cause us spending much more time doing the inference.
+// This can be a problem for deeply nested expressions that are
+// involved in conditions and get tested continuously.  We definitely
+// need to address this issue and introduce some sort of caching
+// in here.

vsavchenko wrote:
> NoQ wrote:
> > I think this is a must-have, at least in some form. We've been exploding 
> > like this before on real-world code, well, probably not with bitwise ops 
> > but i'm still worried.
> It will be pretty easy to introduce a limit on how deep we go into a tree of 
> the given symbolic expression. That can also be a solution.
I mean, doing something super trivial, like defining a map from symexprs to 
ranges in `SymbolicRangeInferrer` itself and find-or-inserting into it, will 
probably not be harder than counting depth(?)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-02 Thread Denys Petrov via Phabricator via cfe-commits
ASDenysPetrov added a comment.

In D79232#2016065 , @NoQ wrote:

> @baloghadamsoftware @steakhal @ASDenysPetrov will you be able to plug D49074 
> /D50256 
> /D77792 
> /D77802 
> /D78933  
> into this so that to separate algebra from pattern-matching and ensure no 
> performance regressions? Or is something still missing?


Sure. I'll plug my changes D77802 /D78933 
 as soon as this patch is available in the 
master.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-02 Thread Valeriy Savchenko via Phabricator via cfe-commits
vsavchenko marked 3 inline comments as done.
vsavchenko added a comment.

> Very nice, i like this architecture.

Aww, thanks 

> @baloghadamsoftware @steakhal @ASDenysPetrov will you be able to plug D49074 
> /D50256 
> /D77792 
> /D77802 
> /D78933  
> into this so that to separate algebra from pattern-matching and ensure no 
> performance regressions? Or is something still missing?

Yeah, I'll be happy to hear what will be good to have for all the different 
cases we have and might have in the future.




Comment at: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp:271-281
+  RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
+return VisitBinaryOperator(Sym);
+  }
+
+  RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
+return VisitBinaryOperator(Sym);
+  }

NoQ wrote:
> Can we replace these three with a single `VisitBinarySymExpr()`? Or is there 
> too much template duck typing involved?
Unfortunately no, we need to know more derived types in order to use `getLHS` 
and `getRHS` methods. And that's why `VisitBinaryOperator` function is a 
template.



Comment at: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp:340-345
+// TODO #2: We didn't go into the nested expressions before, so it
+// might cause us spending much more time doing the inference.
+// This can be a problem for deeply nested expressions that are
+// involved in conditions and get tested continuously.  We definitely
+// need to address this issue and introduce some sort of caching
+// in here.

NoQ wrote:
> I think this is a must-have, at least in some form. We've been exploding like 
> this before on real-world code, well, probably not with bitwise ops but i'm 
> still worried.
It will be pretty easy to introduce a limit on how deep we go into a tree of 
the given symbolic expression. That can also be a solution.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-02 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a subscriber: steakhal.
NoQ added a comment.

Very nice, i like this architecture.

@baloghadamsoftware @steakhal @ASDenysPetrov will you be able to plug D49074 
/D50256 
/D77792 
/D77802 
/D78933  into 
this so that to separate algebra from pattern-matching and ensure no 
performance regressions? Or is something still missing?




Comment at: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp:271-281
+  RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
+return VisitBinaryOperator(Sym);
+  }
+
+  RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
+return VisitBinaryOperator(Sym);
+  }

Can we replace these three with a single `VisitBinarySymExpr()`? Or is there 
too much template duck typing involved?



Comment at: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp:340-345
+// TODO #2: We didn't go into the nested expressions before, so it
+// might cause us spending much more time doing the inference.
+// This can be a problem for deeply nested expressions that are
+// involved in conditions and get tested continuously.  We definitely
+// need to address this issue and introduce some sort of caching
+// in here.

I think this is a must-have, at least in some form. We've been exploding like 
this before on real-world code, well, probably not with bitwise ops but i'm 
still worried.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79232/new/

https://reviews.llvm.org/D79232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D79232: [analyzer] Refactor range inference for symbolic expressions

2020-05-01 Thread Valeriy Savchenko via Phabricator via cfe-commits
vsavchenko created this revision.
vsavchenko added reviewers: NoQ, dcoughlin.
Herald added subscribers: cfe-commits, ASDenysPetrov, martong, Charusso, 
dkrupp, donat.nagy, Szelethus, mikhail.ramalho, a.sidorin, szepet, 
baloghadamsoftware, xazax.hun.
Herald added a project: clang.

This change introduces a new component to unite all of the reasoning
we have about operations on ranges in the analyzer's solver.
In many cases, we might conclude that the range for a symbolic operation
is much more narrow than the type implies.  While reasoning about
runtime conditions (especially in loops), we need to support more and
more of those little pieces of logic.  The new component mostly plays
a role of an organizer for those, and allows us to focus on the actual
reasoning about ranges and not dispatching manually on the types of the
nested symbolic expressions.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D79232

Files:
  
clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
  clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
  clang/test/Analysis/constant-folding.c

Index: clang/test/Analysis/constant-folding.c
===
--- clang/test/Analysis/constant-folding.c
+++ clang/test/Analysis/constant-folding.c
@@ -115,7 +115,10 @@
 #endif
 
   // Check that dynamically computed constants also work.
-  int constant = 1 << 3;
+  unsigned int constant = 1 << 3;
   unsigned int d = a | constant;
-  clang_analyzer_eval(constant > 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(d >= constant); // expected-warning{{TRUE}}
+
+  // Check that nested expressions also work.
+  clang_analyzer_eval(((a | 10) | 5) >= 10); // expected-warning{{TRUE}}
 }
Index: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
===
--- clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -16,6 +16,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ImmutableSet.h"
 #include "llvm/Support/raw_ostream.h"
@@ -23,10 +24,16 @@
 using namespace clang;
 using namespace ento;
 
+//===--===//
+//   RangeSet implementation
+//===--===//
+
 void RangeSet::IntersectInRange(BasicValueFactory , Factory ,
-  const llvm::APSInt , const llvm::APSInt ,
-  PrimRangeSet , PrimRangeSet::iterator ,
-  PrimRangeSet::iterator ) const {
+const llvm::APSInt ,
+const llvm::APSInt ,
+PrimRangeSet ,
+PrimRangeSet::iterator ,
+PrimRangeSet::iterator ) const {
   // There are six cases for each range R in the set:
   //   1. R is entirely before the intersection range.
   //   2. R is entirely after the intersection range.
@@ -238,6 +245,190 @@
 }
 
 namespace {
+
+/// A little component aggregating all of the reasoning we have about
+/// the ranges of symbolic expressions.
+///
+/// Even when we don't know the exact values of the operands, we still
+/// can get a pretty good estimate of the result's range.
+class SymbolicRangeInferrer
+: public SymExprVisitor {
+public:
+  static RangeSet inferRange(BasicValueFactory , RangeSet::Factory ,
+ ProgramStateRef State, SymbolRef Sym) {
+SymbolicRangeInferrer Inferrer(BV, F, State);
+return Inferrer.infer(Sym);
+  }
+
+  RangeSet VisitSymExpr(SymbolRef Sym) {
+// If we got to this function, the actual type of the symbolic
+// expression is not supported for advanced inference.
+// In this case, we simply backoff to the default "let's simply
+// infer the range from the expression's type".
+return infer(Sym->getType());
+  }
+
+  RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
+return VisitBinaryOperator(Sym);
+  }
+
+  RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
+return VisitBinaryOperator(Sym);
+  }
+
+  RangeSet VisitSymSymExpr(const SymSymExpr *Sym) {
+return VisitBinaryOperator(Sym);
+  }
+
+private:
+  SymbolicRangeInferrer(BasicValueFactory , RangeSet::Factory ,
+ProgramStateRef S)
+  : ValueFactory(BV), RangeFactory(F), State(S) {}
+
+  /// Infer range information from the given integer constant.
+  ///
+  /// It's not a real "inference", but is here for operating with
+  /// sub-expressions in a more polymorphic