[PATCH] D157104: [analyzer] Improve underflow handling in ArrayBoundV2

2023-08-21 Thread Donát Nagy via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG3e014038b373: [analyzer] Improve underflow handling in 
ArrayBoundV2 (authored by donat.nagy).

Changed prior to commit:
  https://reviews.llvm.org/D157104?vs=549881&id=552025#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157104/new/

https://reviews.llvm.org/D157104

Files:
  clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
  clang/test/Analysis/out-of-bounds.c


Index: clang/test/Analysis/out-of-bounds.c
===
--- clang/test/Analysis/out-of-bounds.c
+++ clang/test/Analysis/out-of-bounds.c
@@ -151,11 +151,23 @@
 // Don't warn when indexing below the start of a symbolic region's whose
 // base extent we don't know.
 int *get_symbolic(void);
-void test_index_below_symboloc(void) {
+void test_underflow_symbolic(void) {
   int *buf = get_symbolic();
   buf[-1] = 0; // no-warning;
 }
 
+// But warn if we understand the internal memory layout of a symbolic region.
+typedef struct {
+  int id;
+  char name[256];
+} user_t;
+
+user_t *get_symbolic_user(void);
+char test_underflow_symbolic_2() {
+  user_t *user = get_symbolic_user();
+  return user->name[-1]; // expected-warning{{Out of bound memory access}}
+}
+
 void test_incomplete_struct(void) {
   extern struct incomplete incomplete;
   int *p = (int *)&incomplete;
Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
===
--- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -172,13 +172,18 @@
 return;
 
   NonLoc ByteOffset = RawOffset->getByteOffset();
+  const SubRegion *Reg = RawOffset->getRegion();
 
   // CHECK LOWER BOUND
-  const MemSpaceRegion *SR = RawOffset->getRegion()->getMemorySpace();
-  if (!llvm::isa(SR)) {
-// A pointer to UnknownSpaceRegion may point to the middle of
-// an allocated region.
-
+  const MemSpaceRegion *Space = Reg->getMemorySpace();
+  if (!(isa(Reg) && isa(Space))) {
+// A symbolic region in unknown space represents an unknown pointer that
+// may point into the middle of an array, so we don't look for underflows.
+// Both conditions are significant because we want to check underflows in
+// symbolic regions on the heap (which may be introduced by checkers like
+// MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
+// non-symbolic regions (e.g. a field subregion of a symbolic region) in
+// unknown space.
 auto [state_precedesLowerBound, state_withinLowerBound] =
 compareValueToThreshold(state, ByteOffset,
 svalBuilder.makeZeroArrayIndex(), svalBuilder);
@@ -194,8 +199,7 @@
   }
 
   // CHECK UPPER BOUND
-  DefinedOrUnknownSVal Size =
-  getDynamicExtent(state, RawOffset->getRegion(), svalBuilder);
+  DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder);
   if (auto KnownSize = Size.getAs()) {
 auto [state_withinUpperBound, state_exceedsUpperBound] =
 compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);


Index: clang/test/Analysis/out-of-bounds.c
===
--- clang/test/Analysis/out-of-bounds.c
+++ clang/test/Analysis/out-of-bounds.c
@@ -151,11 +151,23 @@
 // Don't warn when indexing below the start of a symbolic region's whose
 // base extent we don't know.
 int *get_symbolic(void);
-void test_index_below_symboloc(void) {
+void test_underflow_symbolic(void) {
   int *buf = get_symbolic();
   buf[-1] = 0; // no-warning;
 }
 
+// But warn if we understand the internal memory layout of a symbolic region.
+typedef struct {
+  int id;
+  char name[256];
+} user_t;
+
+user_t *get_symbolic_user(void);
+char test_underflow_symbolic_2() {
+  user_t *user = get_symbolic_user();
+  return user->name[-1]; // expected-warning{{Out of bound memory access}}
+}
+
 void test_incomplete_struct(void) {
   extern struct incomplete incomplete;
   int *p = (int *)&incomplete;
Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
===
--- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -172,13 +172,18 @@
 return;
 
   NonLoc ByteOffset = RawOffset->getByteOffset();
+  const SubRegion *Reg = RawOffset->getRegion();
 
   // CHECK LOWER BOUND
-  const MemSpaceRegion *SR = RawOffset->getRegion()->getMemorySpace();
-  if (!llvm::isa(SR)) {
-// A pointer to UnknownSpaceRegion may point to the middle of
-// an allocated region.
-
+  const MemSpaceRegion *Space = Reg->getMemorySpace();
+  if (!(isa(Reg) && isa(Space))) {
+// A symbolic region in unknown space represents a

[PATCH] D157104: [analyzer] Improve underflow handling in ArrayBoundV2

2023-08-21 Thread Donát Nagy via Phabricator via cfe-commits
donat.nagy added a comment.

Yup, there was a superfluous line break that was corrected by git clang-format; 
thanks for bringing it to my attention. As this is a very trivial change, I'll 
merge the fixed commit directly, without uploading it into Phabricator.




Comment at: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp:202-203
   // CHECK UPPER BOUND
   DefinedOrUnknownSVal Size =
-  getDynamicExtent(state, RawOffset->getRegion(), svalBuilder);
+  getDynamicExtent(state, Reg, svalBuilder);
   if (auto KnownSize = Size.getAs()) {

This will be a single line in the commit that I'll merge.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157104/new/

https://reviews.llvm.org/D157104

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D157104: [analyzer] Improve underflow handling in ArrayBoundV2

2023-08-21 Thread Balázs Benics via Phabricator via cfe-commits
steakhal added a comment.

Hold on, make sure this is clang-format compliant before committing.
(Check buildkite)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157104/new/

https://reviews.llvm.org/D157104

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D157104: [analyzer] Improve underflow handling in ArrayBoundV2

2023-08-21 Thread Balázs Benics via Phabricator via cfe-commits
steakhal accepted this revision.
steakhal added a comment.

Thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157104/new/

https://reviews.llvm.org/D157104

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D157104: [analyzer] Improve underflow handling in ArrayBoundV2

2023-08-21 Thread Donát Nagy via Phabricator via cfe-commits
donat.nagy added a comment.

A (somewhat delayed) ping to @steakhal because you asked me to tell you when I 
have the results. (If you've already seen the table that I uploaded on Friday, 
then you've nothing to do.)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157104/new/

https://reviews.llvm.org/D157104

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D157104: [analyzer] Improve underflow handling in ArrayBoundV2

2023-08-18 Thread Donát Nagy via Phabricator via cfe-commits
donat.nagy added a comment.

The results on open-source projects are depressing, but acceptable. This 
checker is looking for a serious defect, so it doesn't find any true positives 
on stable versions of open-source projects; however it produces a steady 
trickle of false positives because the Clang SA engine regularly misinterprets 
complicated code. As this patch allows this checker to analyze more situations, 
it introduces no true positives and a manageable amount of false positives (on 
average ~1/project).

Table of raw results:

| memcached | New reports 

   | Lost reports 

   | no change  
 |
| tmux  | New reports 

 | Lost reports 

 | no change
   |
| twin  | New reports 

   | Lost reports 

   | no change  
 |
| vim   | New reports 

   | Lost reports 

   | no change  
 |
| openssl   | New reports 

 | Lost reports 

 | no change   |
| sqlite| New reports 

   | Lost reports 

   | no change  
 |
| ffmpeg| New reports 

   | Lost reports 

   | four new reports (probably FPs), two of them 
are from the same macro|
| postgres  | New reports 

   | Lost reports 

   | two new false positives

[PATCH] D157104: [analyzer] Improve underflow handling in ArrayBoundV2

2023-08-14 Thread Donát Nagy via Phabricator via cfe-commits
donat.nagy marked 2 inline comments as done.
donat.nagy added a comment.

I didn't upload the test results yet because right now there is some 
incompatibility between our local fork and the upstream. I hope that it'll be 
fixed soon, but until then I can't use our automated tools to run the analysis 
with revisions that are close to the upstream main branch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157104/new/

https://reviews.llvm.org/D157104

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D157104: [analyzer] Improve underflow handling in ArrayBoundV2

2023-08-14 Thread Donát Nagy via Phabricator via cfe-commits
donat.nagy updated this revision to Diff 549881.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157104/new/

https://reviews.llvm.org/D157104

Files:
  clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
  clang/test/Analysis/out-of-bounds.c


Index: clang/test/Analysis/out-of-bounds.c
===
--- clang/test/Analysis/out-of-bounds.c
+++ clang/test/Analysis/out-of-bounds.c
@@ -151,11 +151,23 @@
 // Don't warn when indexing below the start of a symbolic region's whose
 // base extent we don't know.
 int *get_symbolic(void);
-void test_index_below_symboloc(void) {
+void test_underflow_symbolic(void) {
   int *buf = get_symbolic();
   buf[-1] = 0; // no-warning;
 }
 
+// But warn if we understand the internal memory layout of a symbolic region.
+typedef struct {
+  int id;
+  char name[256];
+} user_t;
+
+user_t *get_symbolic_user(void);
+char test_underflow_symbolic_2() {
+  user_t *user = get_symbolic_user();
+  return user->name[-1]; // expected-warning{{Out of bound memory access}}
+}
+
 void test_incomplete_struct(void) {
   extern struct incomplete incomplete;
   int *p = (int *)&incomplete;
Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
===
--- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -172,13 +172,18 @@
 return;
 
   NonLoc ByteOffset = RawOffset->getByteOffset();
+  const SubRegion *Reg = RawOffset->getRegion();
 
   // CHECK LOWER BOUND
-  const MemSpaceRegion *SR = RawOffset->getRegion()->getMemorySpace();
-  if (!llvm::isa(SR)) {
-// A pointer to UnknownSpaceRegion may point to the middle of
-// an allocated region.
-
+  const MemSpaceRegion *Space = Reg->getMemorySpace();
+  if (!(isa(Reg) && isa(Space))) {
+// A symbolic region in unknown space represents an unknown pointer that
+// may point into the middle of an array, so we don't look for underflows.
+// Both conditions are significant because we want to check underflows in
+// symbolic regions on the heap (which may be introduced by checkers like
+// MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
+// non-symbolic regions (e.g. a field subregion of a symbolic region) in
+// unknown space.
 auto [state_precedesLowerBound, state_withinLowerBound] =
 compareValueToThreshold(state, ByteOffset,
 svalBuilder.makeZeroArrayIndex(), svalBuilder);
@@ -195,7 +200,7 @@
 
   // CHECK UPPER BOUND
   DefinedOrUnknownSVal Size =
-  getDynamicExtent(state, RawOffset->getRegion(), svalBuilder);
+  getDynamicExtent(state, Reg, svalBuilder);
   if (auto KnownSize = Size.getAs()) {
 auto [state_withinUpperBound, state_exceedsUpperBound] =
 compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);


Index: clang/test/Analysis/out-of-bounds.c
===
--- clang/test/Analysis/out-of-bounds.c
+++ clang/test/Analysis/out-of-bounds.c
@@ -151,11 +151,23 @@
 // Don't warn when indexing below the start of a symbolic region's whose
 // base extent we don't know.
 int *get_symbolic(void);
-void test_index_below_symboloc(void) {
+void test_underflow_symbolic(void) {
   int *buf = get_symbolic();
   buf[-1] = 0; // no-warning;
 }
 
+// But warn if we understand the internal memory layout of a symbolic region.
+typedef struct {
+  int id;
+  char name[256];
+} user_t;
+
+user_t *get_symbolic_user(void);
+char test_underflow_symbolic_2() {
+  user_t *user = get_symbolic_user();
+  return user->name[-1]; // expected-warning{{Out of bound memory access}}
+}
+
 void test_incomplete_struct(void) {
   extern struct incomplete incomplete;
   int *p = (int *)&incomplete;
Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
===
--- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -172,13 +172,18 @@
 return;
 
   NonLoc ByteOffset = RawOffset->getByteOffset();
+  const SubRegion *Reg = RawOffset->getRegion();
 
   // CHECK LOWER BOUND
-  const MemSpaceRegion *SR = RawOffset->getRegion()->getMemorySpace();
-  if (!llvm::isa(SR)) {
-// A pointer to UnknownSpaceRegion may point to the middle of
-// an allocated region.
-
+  const MemSpaceRegion *Space = Reg->getMemorySpace();
+  if (!(isa(Reg) && isa(Space))) {
+// A symbolic region in unknown space represents an unknown pointer that
+// may point into the middle of an array, so we don't look for underflows.
+// Both conditions are significant because we want to check underflows in
+// symbolic regions on the heap (which may be introduced by checkers like
+// MallocChecker that call SValBuilder::get

[PATCH] D157104: [analyzer] Improve underflow handling in ArrayBoundV2

2023-08-11 Thread Balázs Benics via Phabricator via cfe-commits
steakhal accepted this revision.
steakhal added a comment.

Looks safe and good. I'm interested in the diff though.
Let me know once you have the results.
I wanna have a look before we land this.




Comment at: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp:184
+// symbolic regions on the heap (which may be introduced by checkers that
+// call SValBuilder::getConjuredHeapSymbolVal()) and non-symbolic regions
+// (e.g. a field subregion of a symbolic region) in unknown space.

Such as the MallocChecker.



Comment at: clang/test/Analysis/out-of-bounds.c:176
 }
-

Try not to introduce unrelated hunks, as it might introduce more conflicts for 
downstream users than absolutely necessary.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157104/new/

https://reviews.llvm.org/D157104

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D157104: [analyzer] Improve underflow handling in ArrayBoundV2

2023-08-10 Thread Endre Fülöp via Phabricator via cfe-commits
gamesh411 accepted this revision.
gamesh411 added a comment.
This revision is now accepted and ready to land.

Seems like a straightforward extension, LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157104/new/

https://reviews.llvm.org/D157104

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D157104: [analyzer] Improve underflow handling in ArrayBoundV2

2023-08-04 Thread Donát Nagy via Phabricator via cfe-commits
donat.nagy added reviewers: dkrupp, steakhal, Szelethus, gamesh411.
donat.nagy added a comment.

I'll try to upload results on open source projects, probably on Tuesday (I'm on 
vacation on Monday).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157104/new/

https://reviews.llvm.org/D157104

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D157104: [analyzer] Improve underflow handling in ArrayBoundV2

2023-08-04 Thread Donát Nagy via Phabricator via cfe-commits
donat.nagy added a comment.

Note: this commit is split off from the larger commit 
https://reviews.llvm.org/D150446, which combined this simple improvement with 
other, more complex changes that had problematic side-effects.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157104/new/

https://reviews.llvm.org/D157104

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D157104: [analyzer] Improve underflow handling in ArrayBoundV2

2023-08-04 Thread Donát Nagy via Phabricator via cfe-commits
donat.nagy created this revision.
Herald added subscribers: steakhal, manas, ASDenysPetrov, martong, gamesh411, 
dkrupp, Szelethus, mikhail.ramalho, a.sidorin, szepet, baloghadamsoftware, 
xazax.hun.
Herald added a reviewer: NoQ.
Herald added a project: All.
donat.nagy requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

This minor change ensures that underflow errors are reported on arrays that are 
in unknown space but have well-defined size.

As a concrete example, the following test case did not produce a warning 
previously, but will produce a warning after this patch:

  typedef struct {
int id;
char name[256];
  } user_t;
  
  user_t *get_symbolic_user(void);
  char test_underflow_symbolic_2() {
user_t *user = get_symbolic_user();
return user->name[-1];
  }


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D157104

Files:
  clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
  clang/test/Analysis/out-of-bounds.c


Index: clang/test/Analysis/out-of-bounds.c
===
--- clang/test/Analysis/out-of-bounds.c
+++ clang/test/Analysis/out-of-bounds.c
@@ -151,11 +151,23 @@
 // Don't warn when indexing below the start of a symbolic region's whose
 // base extent we don't know.
 int *get_symbolic(void);
-void test_index_below_symboloc(void) {
+void test_underflow_symbolic(void) {
   int *buf = get_symbolic();
   buf[-1] = 0; // no-warning;
 }
 
+// But warn if we understand the internal memory layout of a symbolic region.
+typedef struct {
+  int id;
+  char name[256];
+} user_t;
+
+user_t *get_symbolic_user(void);
+char test_underflow_symbolic_2() {
+  user_t *user = get_symbolic_user();
+  return user->name[-1]; // expected-warning{{Out of bound memory access}}
+}
+
 void test_incomplete_struct(void) {
   extern struct incomplete incomplete;
   int *p = (int *)&incomplete;
@@ -173,4 +185,3 @@
   buf[x] = 1;
   clang_analyzer_eval(x <= 99); // expected-warning{{TRUE}}
 }
-
Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
===
--- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -172,13 +172,17 @@
 return;
 
   NonLoc ByteOffset = RawOffset->getByteOffset();
+  const SubRegion *Reg = RawOffset->getRegion();
 
   // CHECK LOWER BOUND
-  const MemSpaceRegion *SR = RawOffset->getRegion()->getMemorySpace();
-  if (!llvm::isa(SR)) {
-// A pointer to UnknownSpaceRegion may point to the middle of
-// an allocated region.
-
+  const MemSpaceRegion *Space = Reg->getMemorySpace();
+  if (!(isa(Reg) && isa(Space))) {
+// A symbolic region in unknown space represents an unknown pointer that
+// may point into the middle of an array, so we don't look for underflows.
+// Both conditions are significant because we want to check underflows in
+// symbolic regions on the heap (which may be introduced by checkers that
+// call SValBuilder::getConjuredHeapSymbolVal()) and non-symbolic regions
+// (e.g. a field subregion of a symbolic region) in unknown space.
 auto [state_precedesLowerBound, state_withinLowerBound] =
 compareValueToThreshold(state, ByteOffset,
 svalBuilder.makeZeroArrayIndex(), svalBuilder);
@@ -195,7 +199,7 @@
 
   // CHECK UPPER BOUND
   DefinedOrUnknownSVal Size =
-  getDynamicExtent(state, RawOffset->getRegion(), svalBuilder);
+  getDynamicExtent(state, Reg, svalBuilder);
   if (auto KnownSize = Size.getAs()) {
 auto [state_withinUpperBound, state_exceedsUpperBound] =
 compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);


Index: clang/test/Analysis/out-of-bounds.c
===
--- clang/test/Analysis/out-of-bounds.c
+++ clang/test/Analysis/out-of-bounds.c
@@ -151,11 +151,23 @@
 // Don't warn when indexing below the start of a symbolic region's whose
 // base extent we don't know.
 int *get_symbolic(void);
-void test_index_below_symboloc(void) {
+void test_underflow_symbolic(void) {
   int *buf = get_symbolic();
   buf[-1] = 0; // no-warning;
 }
 
+// But warn if we understand the internal memory layout of a symbolic region.
+typedef struct {
+  int id;
+  char name[256];
+} user_t;
+
+user_t *get_symbolic_user(void);
+char test_underflow_symbolic_2() {
+  user_t *user = get_symbolic_user();
+  return user->name[-1]; // expected-warning{{Out of bound memory access}}
+}
+
 void test_incomplete_struct(void) {
   extern struct incomplete incomplete;
   int *p = (int *)&incomplete;
@@ -173,4 +185,3 @@
   buf[x] = 1;
   clang_analyzer_eval(x <= 99); // expected-warning{{TRUE}}
 }
-
Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
===
--- clang/lib/StaticAnalyzer/Check