[PATCH] D56760: Add a new builtin: __builtin_dynamic_object_size

2019-01-30 Thread Phabricator via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL352665: Add a new builtin: __builtin_dynamic_object_size 
(authored by epilk, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D56760?vs=181923=184352#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56760/new/

https://reviews.llvm.org/D56760

Files:
  cfe/trunk/docs/LanguageExtensions.rst
  cfe/trunk/include/clang/Basic/Builtins.def
  cfe/trunk/lib/AST/ExprConstant.cpp
  cfe/trunk/lib/Analysis/CFG.cpp
  cfe/trunk/lib/CodeGen/CGBuiltin.cpp
  cfe/trunk/lib/CodeGen/CGCall.cpp
  cfe/trunk/lib/CodeGen/CodeGenFunction.h
  cfe/trunk/lib/Sema/SemaChecking.cpp
  cfe/trunk/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
  cfe/trunk/test/CodeGen/alloc-size.c
  cfe/trunk/test/CodeGen/object-size.c
  cfe/trunk/test/Sema/builtin-object-size.c

Index: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
===
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp
@@ -494,10 +494,11 @@
 llvm::Value *
 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
  llvm::IntegerType *ResType,
- llvm::Value *EmittedE) {
+ llvm::Value *EmittedE,
+ bool IsDynamic) {
   uint64_t ObjectSize;
   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
-return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
+return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
 }
 
@@ -513,7 +514,7 @@
 llvm::Value *
 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
llvm::IntegerType *ResType,
-   llvm::Value *EmittedE) {
+   llvm::Value *EmittedE, bool IsDynamic) {
   // We need to reference an argument if the pointer is a parameter with the
   // pass_object_size attribute.
   if (auto *D = dyn_cast(E->IgnoreParenImpCasts())) {
@@ -549,7 +550,7 @@
   Value *Min = Builder.getInt1((Type & 2) != 0);
   // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
   Value *NullIsUnknown = Builder.getTrue();
-  Value *Dynamic = Builder.getFalse();
+  Value *Dynamic = Builder.getInt1(IsDynamic);
   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
 }
 
@@ -1978,6 +1979,7 @@
   Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
 return RValue::get(Result);
   }
+  case Builtin::BI__builtin_dynamic_object_size:
   case Builtin::BI__builtin_object_size: {
 unsigned Type =
 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
@@ -1985,8 +1987,9 @@
 
 // We pass this builtin onto the optimizer so that it can figure out the
 // object size in more complex cases.
+bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
- /*EmittedE=*/nullptr));
+ /*EmittedE=*/nullptr, IsDynamic));
   }
   case Builtin::BI__builtin_prefetch: {
 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
Index: cfe/trunk/lib/CodeGen/CodeGenFunction.h
===
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h
@@ -4177,14 +4177,16 @@
   /// If EmittedExpr is non-null, this will use that instead of re-emitting E.
   llvm::Value *evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
llvm::IntegerType *ResType,
-   llvm::Value *EmittedE);
+   llvm::Value *EmittedE,
+   bool IsDynamic);
 
   /// Emits the size of E, as required by __builtin_object_size. This
   /// function is aware of pass_object_size parameters, and will act accordingly
   /// if E is a parameter with the pass_object_size attribute.
   llvm::Value *emitBuiltinObjectSize(const Expr *E, unsigned Type,
  llvm::IntegerType *ResType,
- llvm::Value *EmittedE);
+ llvm::Value *EmittedE,
+ bool IsDynamic);
 
 public:
 #ifndef NDEBUG
Index: cfe/trunk/lib/CodeGen/CGCall.cpp
===
--- cfe/trunk/lib/CodeGen/CGCall.cpp
+++ cfe/trunk/lib/CodeGen/CGCall.cpp
@@ -3459,7 +3459,8 

[PATCH] D56760: Add a new builtin: __builtin_dynamic_object_size

2019-01-29 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith accepted this revision.
rsmith added a comment.
This revision is now accepted and ready to land.

In D56760#1368279 , @erik.pilkington 
wrote:

> FWIW I'd prefer the __builtin_object_size spelling too, but it doesn't seem 
> like the GCC folks are super crazy about it to me. So it seems likely to me 
> that if we implement it it will just be a clang extension for at least the 
> medium term (if not permanently). I guess that's fine, so long as the GCC 
> people are aware that it would be bad to extend their builtin to use `type&4`.


In the absence of a commitment from the GCC folks, I think we should use the 
`__builtin_dynamic_object_size` approach for now. If they later change their 
mind we can deprecate that spelling in favor of a flag bit.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56760/new/

https://reviews.llvm.org/D56760



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56760: Add a new builtin: __builtin_dynamic_object_size

2019-01-23 Thread Erik Pilkington via Phabricator via cfe-commits
erik.pilkington added a comment.

In D56760#1368216 , @rsmith wrote:

> I don't see any evidence of that. Jakub said that modes 0-3 should stay 
> static, but that's in line with what we suggested.


I don't think Jakub really said anything about whether we should go with a new 
builtin or use the flag bit. I'm basing this on Jonathan Wakely's comment:

> I know Jakub is concerned about arbitrarily complex expressions, when
>  **__builtin_object_size is supposed to always be efficient and always
>  evaluate at compile time (which would imply the dynamic behaviour
>  should be a separate builtin, if it exists at all)**.

So your call. FWIW I'd prefer the __builtin_object_size spelling too, but it 
doesn't seem like the GCC folks are super crazy about it to me. So it seems 
likely to me that if we implement it it will just be a clang extension for at 
least the medium term (if not permanently). I guess that's fine, so long as the 
GCC people are aware that it would be bad to extend their builtin to use 
`type&4`.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56760/new/

https://reviews.llvm.org/D56760



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56760: Add a new builtin: __builtin_dynamic_object_size

2019-01-23 Thread JF Bastien via Phabricator via cfe-commits
jfb added a comment.

In D56760#1368216 , @rsmith wrote:

> In D56760#1368054 , @erik.pilkington 
> wrote:
>
> > So it seems like the GCC people want to keep `__builtin_object_size` static.
>
>
> I don't see any evidence of that. Jakub said that modes 0-3 should stay 
> static, but that's in line with what we suggested.


You'd originally asked:

> Would it make sense to model this as an (optional) extra flag bit for 
> `__builtin_object_size` rather than as a new builtin? It'd seem reasonable to 
> me to ask on the gcc dev list if they'd be interested in such an extension to 
> their builtin -- if not, then we should use a new name, and something like 
> this seems fine to me.

I agree Jakub wants to keep 0-3 static, but I didn't see a preference expressed 
for an extra bit versus `__builtin_dynamic_object_size`. Given this, which 
direction do you want to take?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56760/new/

https://reviews.llvm.org/D56760



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56760: Add a new builtin: __builtin_dynamic_object_size

2019-01-23 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith added a comment.

In D56760#1368054 , @erik.pilkington 
wrote:

> So it seems like the GCC people want to keep `__builtin_object_size` static.


I don't see any evidence of that. Jakub said that modes 0-3 should stay static, 
but that's in line with what we suggested.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56760/new/

https://reviews.llvm.org/D56760



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56760: Add a new builtin: __builtin_dynamic_object_size

2019-01-23 Thread Erik Pilkington via Phabricator via cfe-commits
erik.pilkington added a comment.

In D56760#1367915 , @jfb wrote:

> @rsmith, what do you think and how do you want to proceed? We think something 
> like what Erik implemented will catch things `_FORTIFY_SOURCE` currently 
> cannot. We agree there are valid code generation complexity concerns, yet it 
> seems like having a different spelling for the builtin helps mitigate those 
> concerns.


Yeah, I think the codegen explosion concerns are somewhat valid. It seems like 
for the most part its just a matter of keeping a value alive or doing a 
multiply or add here or there, which doesn't seem like the end of the world if 
its opt-in. The kind of pathological expressions that this is addressing seems 
like exactly the places where you would want the extra dynamic checks, like 
where you're indexing into an object with dynamically computed value with weird 
control flow or something. That being said, we could probably bail out of 
folding this in LLVM if the expression gets too complex.

So it seems like the GCC people want to keep `__builtin_object_size` static. In 
that case, I think that this current patch is the way to go. I'll post a patch 
to fix up pass_object_size too.

Thanks @jfb!


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56760/new/

https://reviews.llvm.org/D56760



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56760: Add a new builtin: __builtin_dynamic_object_size

2019-01-23 Thread JF Bastien via Phabricator via cfe-commits
jfb added a comment.

I pointed out this review to Jonathan Wakely, who posted it to the GCC list 
. Jakub replied with:

> The current modes (0-3) certainly must not be changed and must return a
>  constant, that is what huge amounts of code in the wild relies on.
> 
> The reason to choose constants only was the desire to make `_FORTIFY_SOURCE`
>  cheap at runtime.  For the dynamically computed expressions, the question
>  is how far it should go, how complex expressions it wants to build and how
>  much code and runtime can be spent on computing that.
> 
> The rationale for `__builtin_dynamic_object_size` lists only very simple
>  cases, where the builtin is just called on result of malloc, so that is
>  indeed easy, the argument is already evaluated before the malloc call, so
>  you can just save it in a temporary and use later.  Slightly more complex
>  is calloc, where you need to multiply two numbers (do you handle overflow
>  somehow, or not?).  But in real world, it can be arbitrarily more complex,
>  there can be pointer arithmetics with constant or variable offsets,
>  there can be conditional adjustments of pointers or PHIs with multiple
>  "dynamic" expressions for the sizes (shall the dynamic expression evaluate
>  as max over expressions for different phi arguments (that is essentially
>  what is done for the constant `__builtin_object_size`, but for dynamic
>  expressions max needs to be computed at runtime, or shall it reconstruct
>  the conditional or remember it and compute `whatever ? val1 : val2`),
>  loops which adjust pointers, etc. and all that can be done many times in
>  between where the objects are allocated and where the builtin is used.

@rsmith, what do you think and how do you want to proceed? We think something 
like what Erik implemented will catch things `_FORTIFY_SOURCE` currently 
cannot. We agree there are valid code generation complexity concerns, yet it 
seems like having a different spelling for the builtin helps mitigate those 
concerns.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56760/new/

https://reviews.llvm.org/D56760



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56760: Add a new builtin: __builtin_dynamic_object_size

2019-01-15 Thread George Burgess IV via Phabricator via cfe-commits
george.burgess.iv added a comment.

Thanks for this!

> Would it make sense to model this as an (optional) extra flag bit for 
> __builtin_object_size rather than as a new builtin?

+1. That way, we could avoid making a `pass_dynamic_object_size` (assuming we 
wouldn't want to have a different API between that and __builtin_object_size), 
as well. :)


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56760/new/

https://reviews.llvm.org/D56760



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56760: Add a new builtin: __builtin_dynamic_object_size

2019-01-15 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith added a comment.

Thanks, this seems like a useful feature to me.

Would it make sense to model this as an (optional) extra flag bit for 
`__builtin_object_size` rather than as a new builtin? It'd seem reasonable to 
me to ask on the gcc dev list if they'd be interested in such an extension to 
their builtin -- if not, then we should use a new name, and something like this 
seems fine to me.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56760/new/

https://reviews.llvm.org/D56760



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56760: Add a new builtin: __builtin_dynamic_object_size

2019-01-15 Thread Erik Pilkington via Phabricator via cfe-commits
erik.pilkington created this revision.
erik.pilkington added reviewers: george.burgess.iv, ahatanak, rjmccall, rsmith.
Herald added subscribers: kristina, dexonsmith, jkorous.

This builtin has the same UI as `__builtin_object_size`, but has the potential 
to be evaluated dynamically. It is meant to be used as a drop-in replacement 
for libraries that use `__builtin_object_size` when a dynamic checking mode is 
enabled. For instance, `__builtin_object_size` fails to provide any extra 
checking in the following function:

  void f(size_t alloc) {
char* p = malloc(alloc);
strcpy(p, "foobar"); // expands to __builtin___strcpy_chk(p, "foobar", 
__builtin_object_size(p, 0))
  }

This is an overflow if `alloc < 7`, but because LLVM can't fold the object size 
intrinsic statically, it folds `__builtin_object_size` to `-1`. With 
__builtin_dynamic_object_size, `alloc` is passed through to 
`__builtin___strcpy_chk`.

rdar://problem/32212419 ER: evaluate builtin_objectsize (or a successor) at 
runtime, at least when alloc_size is available

Thanks for taking a look!
Erik


Repository:
  rC Clang

https://reviews.llvm.org/D56760

Files:
  clang/docs/LanguageExtensions.rst
  clang/include/clang/Basic/Builtins.def
  clang/lib/AST/ExprConstant.cpp
  clang/lib/Analysis/CFG.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/Sema/SemaChecking.cpp
  clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
  clang/test/CodeGen/alloc-size.c
  clang/test/CodeGen/catch-undef-behavior.c
  clang/test/CodeGen/object-size.c
  clang/test/CodeGen/object-size.cpp
  clang/test/Sema/builtin-object-size.c

Index: clang/test/Sema/builtin-object-size.c
===
--- clang/test/Sema/builtin-object-size.c
+++ clang/test/Sema/builtin-object-size.c
@@ -1,22 +1,29 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
 // RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-darwin9 -verify %s
+// RUN: %clang_cc1 -DDYNAMIC -fsyntax-only -triple x86_64-apple-darwin9 -verify %s
+
+#ifndef DYNAMIC
+#define OBJECT_SIZE_BUILTIN __builtin_object_size
+#else
+#define OBJECT_SIZE_BUILTIN __builtin_dynamic_object_size
+#endif
 
 int a[10];
 
 int f0() {
-  return __builtin_object_size(); // expected-error {{too few arguments to function}}
+  return OBJECT_SIZE_BUILTIN(); // expected-error {{too few arguments to function}}
 }
 int f1() {
-  return (__builtin_object_size(, 0) + 
-  __builtin_object_size(, 1) + 
-  __builtin_object_size(, 2) + 
-  __builtin_object_size(, 3));
+  return (OBJECT_SIZE_BUILTIN(, 0) + 
+  OBJECT_SIZE_BUILTIN(, 1) + 
+  OBJECT_SIZE_BUILTIN(, 2) + 
+  OBJECT_SIZE_BUILTIN(, 3));
 }
 int f2() {
-  return __builtin_object_size(, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
+  return OBJECT_SIZE_BUILTIN(, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
 }
 int f3() {
-  return __builtin_object_size(, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  return OBJECT_SIZE_BUILTIN(, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
 }
 
 
@@ -31,9 +38,9 @@
 void * memcset(void *restrict dst, int src, size_t n);
 void * memcpy(void *restrict dst, const void *restrict src, size_t n);
 
-#define memset(dest, src, len) __builtin___memset_chk(dest, src, len, __builtin_object_size(dest, 0))
-#define memcpy(dest, src, len) __builtin___memcpy_chk(dest, src, len, __builtin_object_size(dest, 0))
-#define memcpy1(dest, src, len) __builtin___memcpy_chk(dest, src, len, __builtin_object_size(dest, 4))
+#define memset(dest, src, len) __builtin___memset_chk(dest, src, len, OBJECT_SIZE_BUILTIN(dest, 0))
+#define memcpy(dest, src, len) __builtin___memcpy_chk(dest, src, len, OBJECT_SIZE_BUILTIN(dest, 0))
+#define memcpy1(dest, src, len) __builtin___memcpy_chk(dest, src, len, OBJECT_SIZE_BUILTIN(dest, 4))
 #define NULL ((void *)0)
 
 void f5(void)
@@ -49,8 +56,8 @@
 {
   char b[5];
   char buf[10];
-  __builtin___memccpy_chk (buf, b, '\0', sizeof(b), __builtin_object_size (buf, 0));
-  __builtin___memccpy_chk (b, buf, '\0', sizeof(buf), __builtin_object_size (b, 0));  // expected-warning {{'__builtin___memccpy_chk' will always overflow; destination buffer has size 5, but size argument is 10}}
+  __builtin___memccpy_chk (buf, b, '\0', sizeof(b), OBJECT_SIZE_BUILTIN (buf, 0));
+  __builtin___memccpy_chk (b, buf, '\0', sizeof(buf), OBJECT_SIZE_BUILTIN (b, 0));  // expected-warning {{'__builtin___memccpy_chk' will always overflow; destination buffer has size 5, but size argument is 10}}
 }
 
 int pr28314(void) {
@@ -70,10 +77,10 @@
   } *p3;
 
   int a = 0;
-  a += __builtin_object_size(>a, 0);
-  a += __builtin_object_size(p->b, 0);
-  a += __builtin_object_size(p2->b, 0);
-  a += __builtin_object_size(p3->b, 0);
+  a +=