[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-10-05 Thread Phabricator via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL314975: [analyzer] Fix leak false positives on stuff put in 
C++/ObjC initializer lists. (authored by dergachev).

Changed prior to commit:
  https://reviews.llvm.org/D35216?vs=117699=117785#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D35216

Files:
  cfe/trunk/lib/StaticAnalyzer/Core/ExprEngine.cpp
  cfe/trunk/test/Analysis/initializer.cpp
  cfe/trunk/test/Analysis/objc-boxing.m
  cfe/trunk/test/Analysis/objc-for.m

Index: cfe/trunk/test/Analysis/initializer.cpp
===
--- cfe/trunk/test/Analysis/initializer.cpp
+++ cfe/trunk/test/Analysis/initializer.cpp
@@ -1,7 +1,9 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDeleteLeaks,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
 
 void clang_analyzer_eval(bool);
 
+#include "Inputs/system-header-simulator-cxx.h"
+
 class A {
   int x;
 public:
@@ -204,3 +206,17 @@
const char()[2];
 };
 }
+
+namespace CXX_initializer_lists {
+struct C {
+  C(std::initializer_list list);
+};
+void foo() {
+  C empty{}; // no-crash
+
+  // Do not warn that 'x' leaks. It might have been deleted by
+  // the destructor of 'c'.
+  int *x = new int;
+  C c{x}; // no-warning
+}
+}
Index: cfe/trunk/test/Analysis/objc-for.m
===
--- cfe/trunk/test/Analysis/objc-for.m
+++ cfe/trunk/test/Analysis/objc-for.m
@@ -1,6 +1,7 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,osx.cocoa.Loops,debug.ExprInspection -verify %s
 
 void clang_analyzer_eval(int);
+void clang_analyzer_warnIfReached();
 
 #define nil ((id)0)
 
@@ -20,11 +21,13 @@
 @interface NSArray : NSObject 
 - (NSUInteger)count;
 - (NSEnumerator *)objectEnumerator;
++ (NSArray *)arrayWithObjects:(const id [])objects count:(NSUInteger)count;
 @end
 
 @interface NSDictionary : NSObject 
 - (NSUInteger)count;
 - (id)objectForKey:(id)key;
++ (id)dictionaryWithObjects:(const id [])objects forKeys:(const id /*  */ [])keys count:(NSUInteger)count;
 @end
 
 @interface NSDictionary (SomeCategory)
@@ -324,3 +327,19 @@
   for (id key in array)
 clang_analyzer_eval(0); // expected-warning{{FALSE}}
 }
+
+NSArray *globalArray;
+NSDictionary *globalDictionary;
+void boxedArrayEscape(NSMutableArray *array) {
+  if ([array count])
+return;
+  globalArray = @[array];
+  for (id key in array)
+clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+
+  if ([array count])
+return;
+  globalDictionary = @{ @"array" : array };
+  for (id key in array)
+clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+}
Index: cfe/trunk/test/Analysis/objc-boxing.m
===
--- cfe/trunk/test/Analysis/objc-boxing.m
+++ cfe/trunk/test/Analysis/objc-boxing.m
@@ -5,6 +5,16 @@
 typedef signed char BOOL;
 typedef long NSInteger;
 typedef unsigned long NSUInteger;
+
+@protocol NSObject
+@end
+@interface NSObject  {}
+@end
+@protocol NSCopying
+@end
+@protocol NSCoding
+@end
+
 @interface NSString @end
 @interface NSString (NSStringExtensionMethods)
 + (id)stringWithUTF8String:(const char *)nullTerminatedCString;
@@ -28,7 +38,15 @@
 + (NSNumber *)numberWithUnsignedInteger:(NSUInteger)value ;
 @end
 
+@interface NSValue : NSObject 
+- (void)getValue:(void *)value;
++ (NSValue *)valueWithBytes:(const void *)value
+   objCType:(const char *)type;
+@end
 
+typedef typeof(sizeof(int)) size_t;
+extern void *malloc(size_t);
+extern void free(void *);
 extern char *strdup(const char *str);
 
 id constant_string() {
@@ -39,6 +57,23 @@
 return @(strdup("boxed dynamic string")); // expected-warning{{Potential memory leak}}
 }
 
+typedef struct __attribute__((objc_boxable)) {
+  const char *str;
+} BoxableStruct;
+
+id leak_within_boxed_struct() {
+  BoxableStruct bs;
+  bs.str = strdup("dynamic string"); // The duped string shall be owned by val.
+  NSValue *val = @(bs); // no-warning
+  return val;
+}
+
+id leak_of_boxed_struct() {
+  BoxableStruct *bs = malloc(sizeof(BoxableStruct)); // The pointer stored in bs isn't owned by val.
+  NSValue *val = @(*bs); // expected-warning{{Potential leak of memory pointed to by 'bs'}}
+  return val;
+}
+
 id const_char_pointer(int *x) {
   if (x)
 return @(3);
Index: cfe/trunk/lib/StaticAnalyzer/Core/ExprEngine.cpp
===
--- cfe/trunk/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ cfe/trunk/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -827,6 +827,21 @@
   }
 }
 
+namespace {
+class CollectReachableSymbolsCallback final : public SymbolVisitor {
+  InvalidatedSymbols Symbols;
+
+public:
+  explicit 

[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-10-04 Thread Devin Coughlin via Phabricator via cfe-commits
dcoughlin added a comment.

In https://reviews.llvm.org/D35216#888506, @NoQ wrote:

> Do you think this patch should be blocked in favor of complete modeling?


Please, let's get this landed. We can add more precise modeling when the need 
arises.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-10-04 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

In https://reviews.llvm.org/D35216#886212, @rsmith wrote:

> This is precisely how the rest of the compiler handles 
> `CXXStdInitializerListExpr`


Wow. Cool. I'd see what I can do. Yeah, it seems that this is a great case for 
us to pattern-match the implementations as well (the problems are still there 
for other STL stuff).

Do you think this patch should be blocked? Or i can land that and follow up 
with complete modeling later; it'd be larger.




Comment at: lib/StaticAnalyzer/Core/ExprEngine.cpp:1132
+  for (auto Child : Ex->children()) {
+if (!Child)
+  continue;

dcoughlin wrote:
> Is this 'if' needed?
Not sure, wanted to be defensive. It seems that `CXXStdInitializerList` always 
contains a single child (`InitListExpr`) so it's irrelevant if the list is 
empty, while boxed expressions contain no children when empty, and hanging 
commas are ignored. Replaced with an assertion just in case.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-10-04 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added inline comments.



Comment at: test/Analysis/objc-for.m:328
   for (id key in array)
 clang_analyzer_eval(0); // expected-warning{{FALSE}}
 }

I guess these old tests should be replaced with `warnIfReached()`.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-10-04 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ updated this revision to Diff 117699.
NoQ added a comment.
Herald added a subscriber: szepet.

Escape into array and dictionary literals, add relevant tests. Fix the null 
statement check.


https://reviews.llvm.org/D35216

Files:
  lib/StaticAnalyzer/Core/ExprEngine.cpp
  test/Analysis/initializer.cpp
  test/Analysis/objc-boxing.m
  test/Analysis/objc-for.m

Index: test/Analysis/objc-for.m
===
--- test/Analysis/objc-for.m
+++ test/Analysis/objc-for.m
@@ -1,6 +1,7 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,osx.cocoa.Loops,debug.ExprInspection -verify %s
 
 void clang_analyzer_eval(int);
+void clang_analyzer_warnIfReached();
 
 #define nil ((id)0)
 
@@ -20,11 +21,13 @@
 @interface NSArray : NSObject 
 - (NSUInteger)count;
 - (NSEnumerator *)objectEnumerator;
++ (NSArray *)arrayWithObjects:(const id [])objects count:(NSUInteger)count;
 @end
 
 @interface NSDictionary : NSObject 
 - (NSUInteger)count;
 - (id)objectForKey:(id)key;
++ (id)dictionaryWithObjects:(const id [])objects forKeys:(const id /*  */ [])keys count:(NSUInteger)count;
 @end
 
 @interface NSDictionary (SomeCategory)
@@ -324,3 +327,19 @@
   for (id key in array)
 clang_analyzer_eval(0); // expected-warning{{FALSE}}
 }
+
+NSArray *globalArray;
+NSDictionary *globalDictionary;
+void boxedArrayEscape(NSMutableArray *array) {
+  if ([array count])
+return;
+  globalArray = @[array];
+  for (id key in array)
+clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+
+  if ([array count])
+return;
+  globalDictionary = @{ @"array" : array };
+  for (id key in array)
+clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+}
Index: test/Analysis/objc-boxing.m
===
--- test/Analysis/objc-boxing.m
+++ test/Analysis/objc-boxing.m
@@ -5,6 +5,16 @@
 typedef signed char BOOL;
 typedef long NSInteger;
 typedef unsigned long NSUInteger;
+
+@protocol NSObject
+@end
+@interface NSObject  {}
+@end
+@protocol NSCopying
+@end
+@protocol NSCoding
+@end
+
 @interface NSString @end
 @interface NSString (NSStringExtensionMethods)
 + (id)stringWithUTF8String:(const char *)nullTerminatedCString;
@@ -28,7 +38,15 @@
 + (NSNumber *)numberWithUnsignedInteger:(NSUInteger)value ;
 @end
 
+@interface NSValue : NSObject 
+- (void)getValue:(void *)value;
++ (NSValue *)valueWithBytes:(const void *)value
+   objCType:(const char *)type;
+@end
 
+typedef typeof(sizeof(int)) size_t;
+extern void *malloc(size_t);
+extern void free(void *);
 extern char *strdup(const char *str);
 
 id constant_string() {
@@ -39,6 +57,23 @@
 return @(strdup("boxed dynamic string")); // expected-warning{{Potential memory leak}}
 }
 
+typedef struct __attribute__((objc_boxable)) {
+  const char *str;
+} BoxableStruct;
+
+id leak_within_boxed_struct() {
+  BoxableStruct bs;
+  bs.str = strdup("dynamic string"); // The duped string shall be owned by val.
+  NSValue *val = @(bs); // no-warning
+  return val;
+}
+
+id leak_of_boxed_struct() {
+  BoxableStruct *bs = malloc(sizeof(BoxableStruct)); // The pointer stored in bs isn't owned by val.
+  NSValue *val = @(*bs); // expected-warning{{Potential leak of memory pointed to by 'bs'}}
+  return val;
+}
+
 id const_char_pointer(int *x) {
   if (x)
 return @(3);
Index: test/Analysis/initializer.cpp
===
--- test/Analysis/initializer.cpp
+++ test/Analysis/initializer.cpp
@@ -1,7 +1,9 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDeleteLeaks,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
 
 void clang_analyzer_eval(bool);
 
+#include "Inputs/system-header-simulator-cxx.h"
+
 class A {
   int x;
 public:
@@ -204,3 +206,17 @@
const char()[2];
 };
 }
+
+namespace CXX_initializer_lists {
+struct C {
+  C(std::initializer_list list);
+};
+void foo() {
+  C empty{}; // no-crash
+
+  // Do not warn that 'x' leaks. It might have been deleted by
+  // the destructor of 'c'.
+  int *x = new int;
+  C c{x}; // no-warning
+}
+}
Index: lib/StaticAnalyzer/Core/ExprEngine.cpp
===
--- lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -827,6 +827,21 @@
   }
 }
 
+namespace {
+class CollectReachableSymbolsCallback final : public SymbolVisitor {
+  InvalidatedSymbols Symbols;
+
+public:
+  explicit CollectReachableSymbolsCallback(ProgramStateRef State) {}
+  const InvalidatedSymbols () const { return Symbols; }
+
+  bool VisitSymbol(SymbolRef Sym) override {
+Symbols.insert(Sym);
+return true;
+  }
+};
+} // end anonymous namespace
+
 void ExprEngine::Visit(const Stmt 

[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-10-04 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ marked 3 inline comments as done.
NoQ added a comment.

In https://reviews.llvm.org/D35216#886212, @rsmith wrote:

> This is precisely how the rest of the compiler handles 
> `CXXStdInitializerListExpr`


Wow. Cool. I'd see what I can do. Yeah, it seems that this is a great case for 
us to pattern-match the implementations as well (the problems are still there 
for other STL stuff).

Do you think this patch should be blocked in favor of complete modeling?
Or i can land that and follow up with complete modeling later; it'd be a 
larger, but not much to discuss indeed.
Or i can try always "inlining" initializer_list constructor and methods during 
analysis, if all methods are available in the header in all implementations.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-10-02 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith added a comment.

In https://reviews.llvm.org/D35216#856415, @NoQ wrote:

> We already have the object's fields in the AST, with its AST record layout, 
> however field names and layouts are implementation-dependent, and it is 
> unsafe to try to understand how the specific standard library 
> implementation's layout works and what specific private fields we see in the 
> AST mean.


This is precisely how the rest of the compiler handles 
`CXXStdInitializerListExpr`, is how it's intended to be used, and is really the 
only way it can work if you want to evaluate the members of 
`initializer_list` rather than hardcoding their meaning. Consumers of 
`CXXStdInitializerListExpr` are expected to handle two cases: 1) the class has 
no base classes and two fields of type `const T*` and `size_t` (begin and 
size), and 2) the class has no base classes and two fields, both of type `const 
T*` (begin and end). (Unfortunately, we reject all other cases during CodeGen 
rather than in Sema, so you may still see them for now.)


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-10-02 Thread Devin Coughlin via Phabricator via cfe-commits
dcoughlin accepted this revision.
dcoughlin added inline comments.
This revision is now accepted and ready to land.



Comment at: test/Analysis/objc-boxing.m:66
+  BoxableStruct bs;
+  bs.str = strdup("dynamic string"); // The duped string shall be owned by val.
+  NSValue *val = @(bs); // no-warning

NoQ wrote:
> dcoughlin wrote:
> > In this case the duped string is not owned by `val`. NSValue doesn't take 
> > ownership of the string, so this *will* leak and we should warn about it.
> I mean, the pointer to the raw string is stored inside the `NSValue`, and can 
> be used or freed from there. The caller can free this string by looking into 
> the `val`, even though `val` itself won't release the pointer (i guess i 
> messed up the comment again). From MallocChecker's perspective, this is an 
> escape and no-warning. If we free the string in this function, it'd most 
> likely cause use-after-free in the caller.
> 
> I tested that the string is indeed not strduped during boxing:
> 
> **`$ cat test.m`**
> ```
> #import 
> 
> typedef struct __attribute__((objc_boxable)) {
>   const char *str;
> } BoxableStruct;
> 
> int main() {
>   BoxableStruct bs;
>   bs.str = strdup("dynamic string");
>   NSLog(@"%p\n", bs.str);
> 
>   NSValue *val = @(bs);
> 
>   BoxableStruct bs2;
>   [val getValue:];
>   NSLog(@"%p\n", bs2.str);
> 
>   return 0;
> }
> ```
> **`$ clang test.m -framework Foundation`**
> **`$ ./a.out`**
> ```
> 2017-10-02 17:56:00.004 a.out[17933:1083757] 0x7ffd23407380
> 2017-10-02 17:56:00.004 a.out[17933:1083757] 0x7ffd23407380
> ```
> So it's possible to retrieve the exact same pointer from the boxed value. So 
> if `val` is returned to the caller, like in the test, it shouldn't be freed.
> 
> If the `NSValue` itself dies and never escapes, then of course it's a leak, 
> but in order to see that we'd need to model contents of `NSValue`.
You're absolutely right about this. The documentation for NSValue even states: 
"Important: The NSValue object doesn’t copy the contents of the string, but the 
pointer itself. If you create an NSValue object with an allocated data item, 
don’t free the data’s memory while the NSValue object exists."

 Sorry! My mistake.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-10-02 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added inline comments.



Comment at: lib/StaticAnalyzer/Core/ExprEngine.cpp:1127
+// only consist of ObjC objects, and escapes of ObjC objects
+// aren't so important (eg., retain count checker ignores them).
+if (isa(Ex) ||

dcoughlin wrote:
> Note that we do have other ObjC checkers that rely on escaping of ObjC 
> objects, such as the ObjCLoopChecker and ObjCDeallocChecker. I think having 
> the TODO is great, but I'd like you to remove the the bit about "escapes of 
> ObjC objects aren't so important".
Hmm, should have double-checked. Will fix.



Comment at: test/Analysis/objc-boxing.m:66
+  BoxableStruct bs;
+  bs.str = strdup("dynamic string"); // The duped string shall be owned by val.
+  NSValue *val = @(bs); // no-warning

dcoughlin wrote:
> In this case the duped string is not owned by `val`. NSValue doesn't take 
> ownership of the string, so this *will* leak and we should warn about it.
I mean, the pointer to the raw string is stored inside the `NSValue`, and can 
be used or freed from there. The caller can free this string by looking into 
the `val`, even though `val` itself won't release the pointer (i guess i messed 
up the comment again). From MallocChecker's perspective, this is an escape and 
no-warning. If we free the string in this function, it'd most likely cause 
use-after-free in the caller.

I tested that the string is indeed not strduped during boxing:

**`$ cat test.m`**
```
#import 

typedef struct __attribute__((objc_boxable)) {
  const char *str;
} BoxableStruct;

int main() {
  BoxableStruct bs;
  bs.str = strdup("dynamic string");
  NSLog(@"%p\n", bs.str);

  NSValue *val = @(bs);

  BoxableStruct bs2;
  [val getValue:];
  NSLog(@"%p\n", bs2.str);

  return 0;
}
```
**`$ clang test.m -framework Foundation`**
**`$ ./a.out`**
```
2017-10-02 17:56:00.004 a.out[17933:1083757] 0x7ffd23407380
2017-10-02 17:56:00.004 a.out[17933:1083757] 0x7ffd23407380
```
So it's possible to retrieve the exact same pointer from the boxed value. So if 
`val` is returned to the caller, like in the test, it shouldn't be freed.

If the `NSValue` itself dies and never escapes, then of course it's a leak, but 
in order to see that we'd need to model contents of `NSValue`.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-09-28 Thread Devin Coughlin via Phabricator via cfe-commits
dcoughlin added a comment.

Artem: Sorry it too me so long to review this! For CXXStdInitializerListExpr 
this looks great. However, I don't think we want ObjCBoxedExprs to escape their 
arguments. It looks to me like these expressions copy their argument values and 
don't hold on to them.




Comment at: lib/StaticAnalyzer/Core/ExprEngine.cpp:1127
+// only consist of ObjC objects, and escapes of ObjC objects
+// aren't so important (eg., retain count checker ignores them).
+if (isa(Ex) ||

Note that we do have other ObjC checkers that rely on escaping of ObjC objects, 
such as the ObjCLoopChecker and ObjCDeallocChecker. I think having the TODO is 
great, but I'd like you to remove the the bit about "escapes of ObjC objects 
aren't so important".



Comment at: lib/StaticAnalyzer/Core/ExprEngine.cpp:1129
+if (isa(Ex) ||
+(isa(Ex) &&
+ cast(Ex)->getSubExpr()->getType()->isRecordType()))

In general, I don't think we want things passed to ObjCBoxedExpr to escape. The 
boxed values are copied and encoded when they are boxed, so the boxed 
expression doesn't take ownership of them.



Comment at: lib/StaticAnalyzer/Core/ExprEngine.cpp:1132
+  for (auto Child : Ex->children()) {
+if (!Child)
+  continue;

Is this 'if' needed?



Comment at: test/Analysis/objc-boxing.m:66
+  BoxableStruct bs;
+  bs.str = strdup("dynamic string"); // The duped string shall be owned by val.
+  NSValue *val = @(bs); // no-warning

In this case the duped string is not owned by `val`. NSValue doesn't take 
ownership of the string, so this *will* leak and we should warn about it.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-09-25 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ updated this revision to Diff 116512.
NoQ added a comment.

Fix some comments in tests.

Devin: ok to commit? I hope i didn't miss anything in my reasoning about boxed 
expressions.


https://reviews.llvm.org/D35216

Files:
  lib/StaticAnalyzer/Core/ExprEngine.cpp
  test/Analysis/initializer.cpp
  test/Analysis/objc-boxing.m

Index: test/Analysis/objc-boxing.m
===
--- test/Analysis/objc-boxing.m
+++ test/Analysis/objc-boxing.m
@@ -5,6 +5,16 @@
 typedef signed char BOOL;
 typedef long NSInteger;
 typedef unsigned long NSUInteger;
+
+@protocol NSObject
+@end
+@interface NSObject  {}
+@end
+@protocol NSCopying
+@end
+@protocol NSCoding
+@end
+
 @interface NSString @end
 @interface NSString (NSStringExtensionMethods)
 + (id)stringWithUTF8String:(const char *)nullTerminatedCString;
@@ -28,7 +38,15 @@
 + (NSNumber *)numberWithUnsignedInteger:(NSUInteger)value ;
 @end
 
+@interface NSValue : NSObject 
+- (void)getValue:(void *)value;
++ (NSValue *)valueWithBytes:(const void *)value
+   objCType:(const char *)type;
+@end
 
+typedef typeof(sizeof(int)) size_t;
+extern void *malloc(size_t);
+extern void free(void *);
 extern char *strdup(const char *str);
 
 id constant_string() {
@@ -39,6 +57,23 @@
 return @(strdup("boxed dynamic string")); // expected-warning{{Potential memory leak}}
 }
 
+typedef struct __attribute__((objc_boxable)) {
+  const char *str;
+} BoxableStruct;
+
+id leak_within_boxed_struct() {
+  BoxableStruct bs;
+  bs.str = strdup("dynamic string"); // The duped string shall be owned by val.
+  NSValue *val = @(bs); // no-warning
+  return val;
+}
+
+id leak_of_boxed_struct() {
+  BoxableStruct *bs = malloc(sizeof(BoxableStruct)); // The pointer stored in bs isn't owned by val.
+  NSValue *val = @(*bs); // expected-warning{{Potential leak of memory pointed to by 'bs'}}
+  return val;
+}
+
 id const_char_pointer(int *x) {
   if (x)
 return @(3);
Index: test/Analysis/initializer.cpp
===
--- test/Analysis/initializer.cpp
+++ test/Analysis/initializer.cpp
@@ -1,7 +1,9 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDeleteLeaks,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
 
 void clang_analyzer_eval(bool);
 
+#include "Inputs/system-header-simulator-cxx.h"
+
 class A {
   int x;
 public:
@@ -204,3 +206,13 @@
const char()[2];
 };
 }
+
+namespace CXX_initializer_lists {
+struct C {
+  C(std::initializer_list list);
+};
+void foo() {
+  int *x = new int;
+  C c{x}; // no-warning
+}
+}
Index: lib/StaticAnalyzer/Core/ExprEngine.cpp
===
--- lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -827,6 +827,21 @@
   }
 }
 
+namespace {
+class CollectReachableSymbolsCallback final : public SymbolVisitor {
+  InvalidatedSymbols Symbols;
+
+public:
+  explicit CollectReachableSymbolsCallback(ProgramStateRef State) {}
+  const InvalidatedSymbols () const { return Symbols; }
+
+  bool VisitSymbol(SymbolRef Sym) override {
+Symbols.insert(Sym);
+return true;
+  }
+};
+} // end anonymous namespace
+
 void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
ExplodedNodeSet ) {
   PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
@@ -1103,8 +1118,33 @@
 SVal result = svalBuilder.conjureSymbolVal(nullptr, Ex, LCtx,
resultType,
currBldrCtx->blockCount());
-ProgramStateRef state = N->getState()->BindExpr(Ex, LCtx, result);
-Bldr2.generateNode(S, N, state);
+ProgramStateRef State = N->getState()->BindExpr(Ex, LCtx, result);
+
+// Escape pointers passed into the list.
+// TODO: Ideally we also need to escape the elements of
+// ObjCArrayLiterals and ObjCDictionaryLiterals, however these
+// only consist of ObjC objects, and escapes of ObjC objects
+// aren't so important (eg., retain count checker ignores them).
+if (isa(Ex) ||
+(isa(Ex) &&
+ cast(Ex)->getSubExpr()->getType()->isRecordType()))
+  for (auto Child : Ex->children()) {
+if (!Child)
+  continue;
+
+SVal Val = State->getSVal(Child, LCtx);
+
+CollectReachableSymbolsCallback Scanner =
+State->scanReachableSymbols(
+Val);
+const InvalidatedSymbols  = Scanner.getSymbols();
+
+State = getCheckerManager().runCheckersForPointerEscape(
+State, EscapedSymbols,
+/*CallEvent*/ nullptr, 

[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-08-30 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

In https://reviews.llvm.org/D35216#856093, @rsmith wrote:

> The `CXXStdInitializerListExpr` node has pretty simple evaluation semantics: 
> it takes a glvalue array expression, and constructs a 
> `std::initializer_list` from it as if by filling in the two members with a 
> pointer to the array and either the size of the array or a pointer to its 
> end. Can we not model those semantics directly here?


Yeah, it's definitely an easy case to perform complete modeling. However, in 
the analyzer we still lack some infrastructure for C++ library modeling, which 
was discussed quite a lot above, and i feel we've run out of debating time and 
still need to fix the false positive. It's important to make our checker APIs 
more powerful before we jump into modeling myriads of STL classes. The main 
problem is how to represent the private fields in the 
`std::initializer_list` object in our symbolic memory model. Here's a quick 
summary of the above.

We already have the object's fields in the AST, with its AST record layout, 
however field names and layouts are implementation-dependent, and it is unsafe 
to try to understand how the specific standard library implementation's layout 
works and what specific private fields we see in the AST mean. Instead it seems 
safer to attach metadata to the object, which would contain values of these 
private fields - this is a common operation for the analyzer (eg. attach string 
length metadata to a null-terminated C-string).

However, because such metadata should be handled by an analyzer's checker (we 
cannot possibly model all APIs in the core, moving it into checkers is much 
more scalable and natural), and therefore such metadata would be private to the 
checker in one way or another, it makes checkers fully responsible for modeling 
such metadata. In particular, the very feature lack of which that was causing 
the false positive (expressing the fact that if the list is passed into an 
external function (eg. with no visible body available for analysis), objects 
stored in it are also "escaping" into a function and can be, for example, 
deallocated here, so other checkers need to be notified to stop tracking them) 
cannot currently be implemented on the checker side. If we let checkers express 
this, it'd require an additional checker callback boilerplate that would need 
to be repeated in every C++ checker, and we already have a lot of boilerplate. 
Additionally, when constructing the `initializer_list`'s elements, we already 
need to have the storage for them, which is hard to handle by the core when 
such storage is checker-specific.

Another solution would be to let the analyzer core help checkers with the 
simple boilerplate, which is a long-standing project that is to be took up. The 
only downside of this approach is that it might make checker APIs less 
omnipotent, because they can no longer maintain their metadata in arbitrary 
manners, but only in manners that are understandable by the core.

An alternative solution described here was to let the checkers produce 
"imaginary" fields within our symbolic memory model, with unknown offsets 
within the object (similar to objective-c instance variables) that would have 
benefits of no longer being implementation-defined. The difference with 
maintaining metadata is that most of the boilerplate would be handled by the 
memory model (RegionStore) rather than the checker, and the core would 
generally know that the pointers to the array are located within the object. 
However, this also raises multiple concerns, because such imaginary "ghost" 
fields may conflict with the real fields in weird manners, they may 
accidentally leak to user's diagnostics, and what not. And we still have the 
problem with checker-specific storage.

So these are the excuses :)


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-08-29 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith added a comment.

The `CXXStdInitializerListExpr` node has pretty simple evaluation semantics: it 
takes a glvalue array expression, and constructs a `std::initializer_list` 
from it as if by filling in the two members with a pointer to the array and 
either the size of the array or a pointer to its end. Can we not model those 
semantics directly here?


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-08-29 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ updated this revision to Diff 113101.
NoQ added a comment.

Fix a bit of ObjCBoxedExpr behavior.


https://reviews.llvm.org/D35216

Files:
  lib/StaticAnalyzer/Core/ExprEngine.cpp
  test/Analysis/initializer.cpp
  test/Analysis/objc-boxing.m

Index: test/Analysis/objc-boxing.m
===
--- test/Analysis/objc-boxing.m
+++ test/Analysis/objc-boxing.m
@@ -5,6 +5,16 @@
 typedef signed char BOOL;
 typedef long NSInteger;
 typedef unsigned long NSUInteger;
+
+@protocol NSObject
+@end
+@interface NSObject  {}
+@end
+@protocol NSCopying
+@end
+@protocol NSCoding
+@end
+
 @interface NSString @end
 @interface NSString (NSStringExtensionMethods)
 + (id)stringWithUTF8String:(const char *)nullTerminatedCString;
@@ -28,7 +38,15 @@
 + (NSNumber *)numberWithUnsignedInteger:(NSUInteger)value ;
 @end
 
+@interface NSValue : NSObject 
+- (void)getValue:(void *)value;
++ (NSValue *)valueWithBytes:(const void *)value
+   objCType:(const char *)type;
+@end
 
+typedef typeof(sizeof(int)) size_t;
+extern void *malloc(size_t);
+extern void free(void *);
 extern char *strdup(const char *str);
 
 id constant_string() {
@@ -39,6 +57,23 @@
 return @(strdup("boxed dynamic string")); // expected-warning{{Potential memory leak}}
 }
 
+typedef struct __attribute__((objc_boxable)) {
+  const char *str;
+} BoxableStruct;
+
+id leak_within_boxed_struct() {
+  BoxableStruct bs;
+  bs.str = strdup("dynamic string"); // The duped string is now owned by val.
+  NSValue *val = @(bs); // no-warning
+  return val;
+}
+
+id leak_of_boxed_struct() {
+  BoxableStruct *bs = malloc(sizeof(BoxableStruct)); // This pointer isn't owned by val.
+  NSValue *val = @(*bs); // expected-warning{{Potential leak of memory pointed to by 'bs'}}
+  return val;
+}
+
 id const_char_pointer(int *x) {
   if (x)
 return @(3);
Index: test/Analysis/initializer.cpp
===
--- test/Analysis/initializer.cpp
+++ test/Analysis/initializer.cpp
@@ -1,7 +1,9 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDeleteLeaks,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
 
 void clang_analyzer_eval(bool);
 
+#include "Inputs/system-header-simulator-cxx.h"
+
 class A {
   int x;
 public:
@@ -204,3 +206,13 @@
const char()[2];
 };
 }
+
+namespace CXX_initializer_lists {
+struct C {
+  C(std::initializer_list list);
+};
+void foo() {
+  int *x = new int;
+  C c{x}; // no-warning
+}
+}
Index: lib/StaticAnalyzer/Core/ExprEngine.cpp
===
--- lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -827,6 +827,21 @@
   }
 }
 
+namespace {
+class CollectReachableSymbolsCallback final : public SymbolVisitor {
+  InvalidatedSymbols Symbols;
+
+public:
+  explicit CollectReachableSymbolsCallback(ProgramStateRef State) {}
+  const InvalidatedSymbols () const { return Symbols; }
+
+  bool VisitSymbol(SymbolRef Sym) override {
+Symbols.insert(Sym);
+return true;
+  }
+};
+} // end anonymous namespace
+
 void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
ExplodedNodeSet ) {
   PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
@@ -1103,8 +1118,33 @@
 SVal result = svalBuilder.conjureSymbolVal(nullptr, Ex, LCtx,
resultType,
currBldrCtx->blockCount());
-ProgramStateRef state = N->getState()->BindExpr(Ex, LCtx, result);
-Bldr2.generateNode(S, N, state);
+ProgramStateRef State = N->getState()->BindExpr(Ex, LCtx, result);
+
+// Escape pointers passed into the list.
+// TODO: Ideally we also need to escape the elements of
+// ObjCArrayLiterals and ObjCDictionaryLiterals, however these
+// only consist of ObjC objects, and escapes of ObjC objects
+// aren't so important (eg., retain count checker ignores them).
+if (isa(Ex) ||
+(isa(Ex) &&
+ cast(Ex)->getSubExpr()->getType()->isRecordType()))
+  for (auto Child : Ex->children()) {
+if (!Child)
+  continue;
+
+SVal Val = State->getSVal(Child, LCtx);
+
+CollectReachableSymbolsCallback Scanner =
+State->scanReachableSymbols(
+Val);
+const InvalidatedSymbols  = Scanner.getSymbols();
+
+State = getCheckerManager().runCheckersForPointerEscape(
+State, EscapedSymbols,
+/*CallEvent*/ nullptr, PSK_EscapeOther, nullptr);
+  }
+
+Bldr2.generateNode(S, N, State);
   }
 
   

[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-08-29 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added inline comments.



Comment at: lib/StaticAnalyzer/Core/ExprEngine.cpp:1104
+// expression classes separately.
+if (!isa(Ex))
+  for (auto Child : Ex->children()) {

dcoughlin wrote:
> What is special about ObjCBoxedExpr here? Naively I would have expected that 
> we'd want to keep the old behavior for ObjCArrayLiteral and ObjCDictionary as 
> well.
Yeah, i got it all wrong initially.

If a `char *` is boxed into `NSString`, it doesn't escape, even though `Val` 
would be the string pointer - which is what i wanted to express here; we 
already had a test for that.

Similarly, if, say, a C struct is boxed, the pointer to the struct doesn't 
escape. However, contents of the struct do escape! Conveniently, `Val` here 
would be the (lazy) compound value, similarly to `std::initializer_list`, so we 
don't need to explicitly avoid escaping the struct pointer itself.

If a C integer is boxed, nothing escapes, of course, until we implement the 
mythical non-pointer escape (eg. file descriptor integers in stream checker 
should escape when they get boxed).

For `ObjC{Array,Dictionary}Expr`essions, contents of the array/dictionary do 
actually escape. However, because only `NSObject`s can be within such boxed 
literals, and `RetainCountChecker` ignores escapes, it is largely irrelevant 
how we behave in this case - i've even no idea how to test that, so i guess it 
could be fixed later.

Reference: https://clang.llvm.org/docs/ObjectiveCLiterals.html


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-08-17 Thread Devin Coughlin via Phabricator via cfe-commits
dcoughlin added a comment.

Other than my question above, this looks good to me.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-08-17 Thread Devin Coughlin via Phabricator via cfe-commits
dcoughlin added inline comments.



Comment at: lib/StaticAnalyzer/Core/ExprEngine.cpp:1104
+// expression classes separately.
+if (!isa(Ex))
+  for (auto Child : Ex->children()) {

What is special about ObjCBoxedExpr here? Naively I would have expected that 
we'd want to keep the old behavior for ObjCArrayLiteral and ObjCDictionary as 
well.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-08-15 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ updated this revision to Diff 56.
NoQ added a comment.

Actually update the diff.


https://reviews.llvm.org/D35216

Files:
  lib/StaticAnalyzer/Core/ExprEngine.cpp
  test/Analysis/initializer.cpp

Index: test/Analysis/initializer.cpp
===
--- test/Analysis/initializer.cpp
+++ test/Analysis/initializer.cpp
@@ -1,7 +1,9 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDeleteLeaks,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
 
 void clang_analyzer_eval(bool);
 
+#include "Inputs/system-header-simulator-cxx.h"
+
 class A {
   int x;
 public:
@@ -204,3 +206,13 @@
const char()[2];
 };
 }
+
+namespace CXX_initializer_lists {
+struct C {
+  C(std::initializer_list list);
+};
+void foo() {
+  int *x = new int;
+  C c{x}; // no-warning
+}
+}
Index: lib/StaticAnalyzer/Core/ExprEngine.cpp
===
--- lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -806,6 +806,21 @@
   }
 }
 
+namespace {
+class CollectReachableSymbolsCallback final : public SymbolVisitor {
+  InvalidatedSymbols Symbols;
+
+public:
+  explicit CollectReachableSymbolsCallback(ProgramStateRef State) {}
+  const InvalidatedSymbols () const { return Symbols; }
+
+  bool VisitSymbol(SymbolRef Sym) override {
+Symbols.insert(Sym);
+return true;
+  }
+};
+} // end anonymous namespace
+
 void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
ExplodedNodeSet ) {
   PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
@@ -1082,8 +1097,28 @@
 SVal result = svalBuilder.conjureSymbolVal(nullptr, Ex, LCtx,
resultType,
currBldrCtx->blockCount());
-ProgramStateRef state = N->getState()->BindExpr(Ex, LCtx, result);
-Bldr2.generateNode(S, N, state);
+ProgramStateRef State = N->getState()->BindExpr(Ex, LCtx, result);
+
+// FIXME: Remove this first check when we begin modelling these
+// expression classes separately.
+if (!isa(Ex))
+  for (auto Child : Ex->children()) {
+if (!Child)
+  continue;
+
+SVal Val = State->getSVal(Child, LCtx);
+
+CollectReachableSymbolsCallback Scanner =
+State->scanReachableSymbols(
+Val);
+const InvalidatedSymbols  = Scanner.getSymbols();
+
+State = getCheckerManager().runCheckersForPointerEscape(
+State, EscapedSymbols,
+/*CallEvent*/ nullptr, PSK_EscapeOther, nullptr);
+  }
+
+Bldr2.generateNode(S, N, State);
   }
 
   getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this);
@@ -2217,21 +2252,6 @@
   getCheckerManager().runCheckersForPostStmt(Dst, AfterInvalidateSet, AE, *this);
 }
 
-namespace {
-class CollectReachableSymbolsCallback final : public SymbolVisitor {
-  InvalidatedSymbols Symbols;
-
-public:
-  CollectReachableSymbolsCallback(ProgramStateRef State) {}
-  const InvalidatedSymbols () const { return Symbols; }
-
-  bool VisitSymbol(SymbolRef Sym) override {
-Symbols.insert(Sym);
-return true;
-  }
-};
-} // end anonymous namespace
-
 // A value escapes in three possible cases:
 // (1) We are binding to something that is not a memory region.
 // (2) We are binding to a MemrRegion that does not have stack storage.
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-08-15 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ marked an inline comment as done.
NoQ added a comment.

We've discussed it in person with Devin, and he provided more points to think 
about:

- If the initializer list consists of non-POD data, constructors of list's 
objects need to take the sub-region of the list's region as `this`-region In 
the current (v2) version of this patch, these objects are constructed elsewhere 
and then trivial-copied into the list's metadata pointer region, which may be 
incorrect. This is our overall problem with C++ constructors, which manifests 
in this case as well. Additionally, objects would need to be constructed in the 
analyzer's core, which would not be able to predict that it needs to take a 
checker-specific region as `this`-region, which makes it harder, though it 
might be mitigated by sharing the checker state traits.

- Because "ghost variables" are not material to the user, we need to somehow 
make super sure that they don't make it into the diagnostic messages.

So, because this needs further digging into overall C++ support and rises too 
many questions, i'm delaying a better approach to this problem and will fall 
back to the original trivial patch.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-07-19 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

In https://reviews.llvm.org/D35216#814124, @dcoughlin wrote:

> In this case, I would be fine with some sort of AbstractStorageMemoryRegion 
> that meant "here is a memory region and somewhere reachable from here exists 
> another region of type T". Or even multiple regions with different 
> identifiers. This wouldn't specify how the memory is reachable, but it would 
> allow for transfer functions to get at those regions and it would allow for 
> invalidation.


Yeah, this is what we can easily implement now as a 
symbolic-region-based-on-a-metadata-symbol (though we can make a new region 
class for that if we eg. want it typed). The problem is that the relation 
between such storage region and its parent object region is essentially 
immaterial, similarly to the relation between SymbolRegionValue and its parent 
region. Region contents are mutable: today the abstract storage is reachable 
from its parent object, tomorrow it's not, and maybe something else becomes 
reachable, something that isn't even abstract. So the parent region for the 
abstract storage is most of the time at best a "nice to know" thing - we cannot 
rely on it to do any actual work. We'd anyway need to rely on the checker to do 
the job.

> For std::initializer_list this reachable region would the region for the 
> backing array and the transfer functions for begin() and end() yield the 
> beginning and end element regions for it.

So maybe in fact for std::initializer_list it may work fine because you cannot 
change the data after the object is constructed - so this region's contents are 
essentially immutable. For the future, i feel as if it is a dead end.

I'd like to consider another funny example. Suppose we're trying to model 
std::unique_ptr. Consider:

  void bar(const std::unique_ptr );
  
  void foo(std::unique_ptr ) {
int *a = x.get();   // (a, 0, direct): 
*a = 1; // (AbstractStorageRegion, 0, direct): 1 S32b
int *b = new int;
*b = 2; // (SymRegion{conj_$0}, 0 ,direct): 2 S32b
x.reset(b); // Checker map: x -> SymRegion{conj_$0}
bar(x); // 'a' doesn't escape (the pointer was unique), 'b' 
does.
clang_analyzer_eval(*a == 1); // Making this true is up to the checker.
clang_analyzer_eval(*b == 2); // Making this unknown is up to the checker.
  }

The checker doesn't totally need to ensure that `*a == 1` passes - even though 
the pointer was unique, it could theoretically have `.get()`-ed above and the 
code could of course break the uniqueness invariant (though we'd probably want 
it). The checker can say that "even if `*a` did escape, it was not because it 
was stuffed directly into `bar()`".

The checker's direct responsibility, however, is to solve the `*b == 2` thing 
(which is in fact the problem we're dealing with in this patch - escaping the 
storage region of the object).

So we're talking about one more operation over the program state (scanning 
reachable symbols and regions) that cannot work without checker support.

We can probably add a new callback "checkReachableSymbols" to solve this. This 
is in fact also related to the dead symbols problem (we're scanning for live 
symbols in the store and in the checkers separately, but we need to do so 
simultaneously with a single worklist). Hmm, in fact this sounds like a good 
idea; we can replace checkLiveSymbols with checkReachableSymbols.

Or we could just have ghost member variables, and no checker support required 
at all. For ghost member variables, the relation with their parent region 
(which would be their superregion) is actually useful, the mutability of their 
contents is expressed naturally, and the store automagically sees reachable 
symbols, live symbols, escapes, invalidations, whatever.

> In my view this differs from ghost variables in that (1) this storage does 
> actually exist (it is just a library implementation detail where that storage 
> lives) and (2) it is perfectly valid for a pointer into that storage to be 
> returned and for another part of the program to read or write from that 
> storage. (Well, in this case just read since it is allowed to be read-only 
> memory).
> 
> What I'm not OK with is modeling abstract analysis state (for example, the 
> count of a NSMutableArray or the typestate of a file handle) as a value 
> stored in some ginned up region in the store.This takes an easy problem that 
> the analyzer does well at (modeling typestate) and turns it into a hard one 
> that the analyzer is bad at (reasoning about the contents of the heap).

Yeah, i tend to agree on that. For simple typestates, this is probably an 
overkill, so let's definitely put aside the idea of "ghost symbolic regions" 
that i had earlier.

But, to summarize a bit, in our current case, however, the typestate we're 
looking for //**is**// the contents of the heap. And when we try to model such 
typestates (complex in this specific manner, i.e. heap-like) in any 

[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-07-18 Thread Devin Coughlin via Phabricator via cfe-commits
dcoughlin added a comment.

In this case, I would be fine with some sort of AbstractStorageMemoryRegion 
that meant "here is a memory region and somewhere reachable from here exists 
another region of type T". Or even multiple regions with different identifiers. 
This wouldn't specify how the memory is reachable, but it would allow for 
transfer functions to get at those regions and it would allow for invalidation.

For std::initializer_list this reachable region would the region for the 
backing array and the transfer functions for begin() and end() yield the 
beginning and end element regions for it.

In my view this differs from ghost variables in that (1) this storage does 
actually exist (it is just a library implementation detail where that storage 
lives) and (2) it is perfectly valid for a pointer into that storage to be 
returned and for another part of the program to read or write from that 
storage. (Well, in this case just read since it is allowed to be read-only 
memory).

What I'm not OK with is modeling abstract analysis state (for example, the 
count of a NSMutableArray or the typestate of a file handle) as a value stored 
in some ginned up region in the store. This takes an easy problem that the 
analyzer does well at (modeling typestate) and turns it into a hard one that 
the analyzer is bad at (reasoning about the contents of the heap).

I think the key criterion here is: "is the region accessible from outside the 
library". That is, does the library expose the region as a pointer that can be 
read to or written from in the client program? If so, then it makes sense for 
this to be in the store: we are modeling reachable storage as storage. But if 
we're just modeling arbitrary analysis facts that need to be invalidated when a 
pointer escapes then we shouldn't try to gin up storage for them just to get 
invalidation for free.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-07-18 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

> Approach (2): We could teach the Store to scan itself for bindings to 
> metadata-symbolic-based regions during scanReachableSymbols() whenever a 
> region turns out to be reachable. This requires no work on checker side, but 
> it sounds performance-heavy.

Nope, this approach is wrong. Metadata symbols may become out-of-date: when the 
object changes, metadata symbols attached to it aren't changing (because 
symbols simply don't change). The same metadata may have different symbols to 
denote its value in different moments of time, but at most one of them 
represents the actual metadata value. So we'd be escaping more stuff than 
necessary.

If only we had "ghost fields" 
(http://lists.llvm.org/pipermail/cfe-dev/2016-May/049000.html), it would have 
been much easier, because the ghost field would only contain the actual 
metadata, and the Store would always know about it. This example adds to my 
belief that ghost fields are exactly what we need for most C++ checkers.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-07-11 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

These are some great questions, i guess it'd be better to discuss them more 
openly. As a quick dump of my current mood:

- To me it seems obvious that we need  to aim for a checker API that is both 
simple //and// powerful. This can probably by keeping the API as powerful as 
necessary while providing a layer of simple ready-made solutions on top of it. 
Probably a few reusable components for assembling checkers. And this layer 
should ideally be pleasant enough to work with, so that people would prefer to 
extend it when something is lacking, instead of falling back to the complex 
omnipotent API. I'm thinking of AST matchers vs. AST visitors as a roughly 
similar situation: matchers are not omnipotent, but they're so nice.

- Separation between core and checkers is usually quite strange. Once we have 
shared state traits, i generally wouldn't mind having region store or range 
constraint manager as checkers (though it's probably not worth it to transform 
them - just a mood). The main thing to avoid here would be the situation when 
the checker overwrites stuff written by the core because it thinks it has a 
better idea what's going on, so the core should provide a good default behavior.

- Yeah, i totally care about performance as well, and if i try to implement 
approach, i'd make sure it's good.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-07-11 Thread Gábor Horváth via Phabricator via cfe-commits
xazax.hun added a comment.

At this point, I am a bit wondering about two questions.

- When should something belong to a checker and when should something belong to 
the engine? Sometimes we model library aspects in the engine and model language 
constructs in checkers.
- What is the checker programming model that we are aiming for? Maximum freedom 
or more easy checker development?

I think if we aim for maximum freedom, we do not need to worry about the 
potential stress on checkers, and we can introduce abstractions to mitigate 
that later on.
If we want to simplify the API, then maybe it makes more sense to move language 
construct modeling to the engine when the checker API is not sufficient instead 
of complicating the API.

Right now I have no preference or objections between the alternatives but there 
are some random thoughts:

- Maybe it would be great to have a guideline how to evolve the analyzer and 
follow it, so it can help us to decide in similar situations
- I do care about performance in this case. The reason is that we have a 
limited performance budget. And I think we should not expect most of the 
checker writers to add modeling of language constructs. So, in my opinion, it 
is ok to have less nice/more verbose API for language modeling if we can have 
better performance this way, since it only needs to be done once, and is done 
by the framework developers.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-07-10 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added a comment.

Or maybe it wasn't a good idea to make two diffs in one place. Dunno.




Comment at: test/Analysis/temporaries-callback-order.cpp:28
 // CHECK: Bind
-// CHECK-NEXT: RegionChanges
 

Apparently, this was caused by the check if the states are equal within 
`processPointerEscapedOnBind`, so it was a dry run for checkers.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-07-10 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added inline comments.



Comment at: lib/StaticAnalyzer/Core/ExprEngine.cpp:810
+public:
+  CollectReachableSymbolsCallback(ProgramStateRef State) {}
+  const InvalidatedSymbols () const { return Symbols; }

alexshap wrote:
> explicit ?
This code was moved from below, so i didn't bother changing it. Good point 
though.


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-07-10 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ updated this revision to Diff 105897.
NoQ edited the summary of this revision.
NoQ added a comment.
Herald added a subscriber: mgorny.

This other diff implements approach (1) through a draft of a checker (that 
doesn't model much yet). I had to additionally make sure we already have a 
region to construct metadata for, in spirit of https://reviews.llvm.org/D27202.

For easier comparison, i thought it'd be better to stick it to the same 
differential revision.


https://reviews.llvm.org/D35216

Files:
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/StdInitializerListChecker.cpp
  lib/StaticAnalyzer/Core/ExprEngine.cpp
  lib/StaticAnalyzer/Core/ProgramState.cpp
  test/Analysis/initializer.cpp
  test/Analysis/temporaries-callback-order.cpp

Index: test/Analysis/temporaries-callback-order.cpp
===
--- test/Analysis/temporaries-callback-order.cpp
+++ test/Analysis/temporaries-callback-order.cpp
@@ -18,14 +18,12 @@
 
 void seeIfCheckBindWorks() {
   // This should trigger checkBind. The rest of the code shouldn't.
-  // This also triggers checkRegionChanges after that.
   // Note that this function is analyzed first, so the messages would be on top.
   int x = 1;
 }
 
 // seeIfCheckBindWorks():
 // CHECK: Bind
-// CHECK-NEXT: RegionChanges
 
 // testTemporaries():
 // CHECK-NEXT: RegionChanges
Index: test/Analysis/initializer.cpp
===
--- test/Analysis/initializer.cpp
+++ test/Analysis/initializer.cpp
@@ -1,7 +1,9 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDeleteLeaks,cplusplus.StdInitializerList,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
 
 void clang_analyzer_eval(bool);
 
+#include "Inputs/system-header-simulator-cxx.h"
+
 class A {
   int x;
 public:
@@ -204,3 +206,13 @@
const char()[2];
 };
 }
+
+namespace CXX_initializer_lists {
+struct C {
+  C(std::initializer_list list);
+};
+void foo() {
+  int *x = new int;
+  C c{x}; // no-warning
+}
+}
Index: lib/StaticAnalyzer/Core/ProgramState.cpp
===
--- lib/StaticAnalyzer/Core/ProgramState.cpp
+++ lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -116,13 +116,18 @@
   const LocationContext *LCtx,
   bool notifyChanges) const {
   ProgramStateManager  = getStateManager();
-  ProgramStateRef newState = makeWithStore(Mgr.StoreMgr->Bind(getStore(),
- LV, V));
+  ProgramStateRef NewState =
+  makeWithStore(Mgr.StoreMgr->Bind(getStore(), LV, V));
   const MemRegion *MR = LV.getAsRegion();
-  if (MR && Mgr.getOwningEngine() && notifyChanges)
-return Mgr.getOwningEngine()->processRegionChange(newState, MR, LCtx);
+  if (MR && Mgr.getOwningEngine() && notifyChanges) {
+NewState = Mgr.getOwningEngine()->processRegionChange(NewState, MR, LCtx);
+if (!NewState)
+  return nullptr;
+return Mgr.getOwningEngine()->processPointerEscapedOnBind(NewState, LV, V,
+  LCtx);
+  }
 
-  return newState;
+  return NewState;
 }
 
 ProgramStateRef ProgramState::bindDefault(SVal loc,
Index: lib/StaticAnalyzer/Core/ExprEngine.cpp
===
--- lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1074,12 +1074,19 @@
   for (ExplodedNodeSet::iterator it = preVisit.begin(), et = preVisit.end();
it != et; ++it) {
 ExplodedNode *N = *it;
+ProgramStateRef State = N->getState();
 const LocationContext *LCtx = N->getLocationContext();
-SVal result = svalBuilder.conjureSymbolVal(nullptr, Ex, LCtx,
-   resultType,
-   currBldrCtx->blockCount());
-ProgramStateRef state = N->getState()->BindExpr(Ex, LCtx, result);
-Bldr2.generateNode(S, N, state);
+SVal Result;
+if (isa(Ex)) {
+  const MemRegion *InitListRegion =
+  svalBuilder.getRegionManager().getCXXStaticTempObjectRegion(Ex);
+  Result = State->getSVal(InitListRegion);
+} else {
+  Result = svalBuilder.conjureSymbolVal(nullptr, Ex, LCtx, resultType,
+currBldrCtx->blockCount());
+}
+State = State->BindExpr(Ex, LCtx, Result);
+Bldr2.generateNode(S, N, State);
   }
 
   getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this);
@@ -2232,7 +2239,7 @@
   // 

[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-07-10 Thread Alexander Shaposhnikov via Phabricator via cfe-commits
alexshap added inline comments.



Comment at: lib/StaticAnalyzer/Core/ExprEngine.cpp:810
+public:
+  CollectReachableSymbolsCallback(ProgramStateRef State) {}
+  const InvalidatedSymbols () const { return Symbols; }

explicit ?


https://reviews.llvm.org/D35216



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D35216: [analyzer] Escape symbols when creating std::initializer_list.

2017-07-10 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ created this revision.

I've seen a few false positives that appear because we construct C++11 
`std::initializer_list` objects with brace initializers, and such construction 
is not properly modeled. For instance, if a new object is constructed on the 
heap only to be put into a brace-initialized STL container, the object is 
reported to be leaked.

Approach (0): This can be trivially fixed by this patch, which causes pointers 
passed into initializer list expressions to immediately escape.

This fix is overly conservative though. So i did a bit of investigation as to 
how model `std::initializer_list` better.

According to the standard, `std::initializer_list` is an object that has 
methods `begin()`, `end()`, and `size()`, where `begin()` returns a pointer to 
continous array of `size()` objects of type `T`, and `end()` is equal to 
`begin()` plus `size()`. The standard does hint that it should be possible to 
implement `std::initializer_list` as a pair of pointers, or as a pointer and 
a size integer, however specific fields that the object would contain are an 
implementation detail.

Ideally, we should be able to model the initializer list's methods precisely. 
Or, at least, it should be possible to explain to the analyzer that the list 
somehow "takes hold" of the values put into it. Initializer lists can also be 
copied, which is a separate story that i'm not trying to address here.

The obvious approach to modeling `std::initializer_list` in a //checker// would 
be to construct a `SymbolMetadata` for the memory region of the initializer 
list object, which would be of type `T*` and represent `begin()`, so we'd 
trivially model `begin()` as a function that returns this symbol. The array 
pointed to by that symbol would be `bindLoc()`ed to contain the list's contents 
(probably as a `CompoundVal` to produce less bindings in the store). Extent of 
this array would represent `size()` and would be equal to the length of the 
list as written.

So this sounds good, however apparently it does nothing to address our false 
positives: when the list escapes, our `RegionStoreManager` is not magically 
guessing that the metadata symbol attached to it, together with its contents, 
should also escape. In fact, it's impossible to trigger a pointer escape from 
within the checker.

Approach (1): If only we enabled `ProgramState::bindLoc(..., 
notifyChanges=true)` to cause pointer escapes (not only region changes) (which 
sounds like the right thing to do anyway) such checker would be able to solve 
the false positives by triggering escapes when binding list elements to the 
list. However, it'd be as conservative as the current patch's solution. 
Ideally, we do not want escapes to happen so early. Instead, we'd prefer them 
to be delayed until the list itself escapes.

So i believe that escaping metadata symbols whenever their base regions escape 
would be the right thing to do. Currently we didn't think about that because we 
had neither pointer-type metadatas nor non-pointer escapes.

Approach (2): We could teach the Store to scan itself for bindings to 
metadata-symbolic-based regions during `scanReachableSymbols()` whenever a 
region turns out to be reachable. This requires no work on checker side, but it 
sounds performance-heavy.

Approach (3): We could let checkers maintain the set of active metadata symbols 
in the program state (ideally somewhere in the Store, which sounds weird but 
causes the smallest amount of layering violations), so that the core knew what 
to escape. This puts a stress on the checkers, but with a smart data map it 
wouldn't be a problem.

Approach (4): We could allow checkers to trigger pointer escapes in arbitrary 
moments. If we allow doing this within `checkPointerEscape` callback itself, we 
would be able to express facts like "when this region escapes, that metadata 
symbol attached to it should also escape". This sounds like an ultimate 
freedom, with maximum stress on the checkers - still not too much stress when 
we have smart data maps.

Does anybody like/hate any of these solutions or have anything better in mind? 
I'm personally liking the solution with `scanReachableSymbols()` - it should be 
possible to avoid performance overhead, and clarity seems nice.


https://reviews.llvm.org/D35216

Files:
  lib/StaticAnalyzer/Core/ExprEngine.cpp
  test/Analysis/initializer.cpp

Index: test/Analysis/initializer.cpp
===
--- test/Analysis/initializer.cpp
+++ test/Analysis/initializer.cpp
@@ -1,7 +1,9 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,cplusplus.NewDeleteLeaks,debug.ExprInspection -analyzer-config c++-inlining=constructors -std=c++11 -verify %s
 
 void clang_analyzer_eval(bool);
 
+#include "Inputs/system-header-simulator-cxx.h"
+
 class A