[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-04-24 Thread Andrew via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG74f00516e5ce: [DFSAN] Add support for strsep. (authored by 
tkuchta, committed by browneee).
Herald added a subscriber: Sanitizers.

Changed prior to commit:
  https://reviews.llvm.org/D141389?vs=51=516515#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

Files:
  compiler-rt/lib/dfsan/dfsan_custom.cpp
  compiler-rt/lib/dfsan/done_abilist.txt
  compiler-rt/test/dfsan/custom.cpp

Index: compiler-rt/test/dfsan/custom.cpp
===
--- compiler-rt/test/dfsan/custom.cpp
+++ compiler-rt/test/dfsan/custom.cpp
@@ -1630,6 +1630,51 @@
 #endif
 }
 
+void test_strsep() {
+  char *s = strdup("Hello world/");
+  char *delim = strdup(" /");
+
+  char *p_s = s;
+  char *base = s;
+  char *p_delim = delim;
+
+  // taint delim bytes
+  dfsan_set_label(i_label, p_delim, strlen(p_delim));
+  // taint delim pointer
+  dfsan_set_label(j_label, _delim, sizeof(p_delim));
+  // taint the string data bytes
+  dfsan_set_label(k_label, s, 5);
+  // taint the string pointer
+  dfsan_set_label(m_label, _s, sizeof(p_s));
+
+  char *rv = strsep(_s, p_delim);
+  assert(rv == [0]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, m_label);
+  ASSERT_READ_LABEL(rv, strlen(rv), k_label);
+#else
+  ASSERT_LABEL(rv, dfsan_union(dfsan_union(i_label, j_label),
+   dfsan_union(k_label, m_label)));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, p_s);
+#endif
+
+  // taint the remaining string's pointer
+  char **pp_s = _s;
+  char **pp_s_base = pp_s;
+  dfsan_set_label(n_label, pp_s, sizeof(pp_s));
+
+  rv = strsep(pp_s, p_delim);
+
+  assert(rv == [6]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, n_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *pp_s);
+#else
+  ASSERT_LABEL(rv, dfsan_union(dfsan_union(i_label, j_label), n_label));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *pp_s);
+#endif
+}
+
 void test_memchr() {
   char str1[] = "str1";
   dfsan_set_label(i_label, [3], 1);
@@ -2044,6 +2089,7 @@
   test_strncmp();
   test_strncpy();
   test_strpbrk();
+  test_strsep();
   test_strrchr();
   test_strstr();
   test_strtod();
Index: compiler-rt/lib/dfsan/done_abilist.txt
===
--- compiler-rt/lib/dfsan/done_abilist.txt
+++ compiler-rt/lib/dfsan/done_abilist.txt
@@ -283,6 +283,7 @@
 fun:strpbrk=custom
 fun:strrchr=custom
 fun:strstr=custom
+fun:strsep=custom
 
 # Functions which take action based on global state, such as running a callback
 # set by a separate function.
Index: compiler-rt/lib/dfsan/dfsan_custom.cpp
===
--- compiler-rt/lib/dfsan/dfsan_custom.cpp
+++ compiler-rt/lib/dfsan/dfsan_custom.cpp
@@ -204,6 +204,57 @@
   return const_cast(ret);
 }
 
+SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strsep(char **s, const char *delim,
+  dfsan_label s_label,
+  dfsan_label delim_label,
+  dfsan_label *ret_label) {
+  dfsan_label base_label = dfsan_read_label(s, sizeof(*s));
+  char *base = *s;
+  char *res = strsep(s, delim);
+  if (res != *s) {
+char *token_start = res;
+int token_length = strlen(res);
+// the delimiter byte has been set to NULL
+dfsan_set_label(0, token_start + token_length, 1);
+  }
+
+  if (flags().strict_data_dependencies) {
+*ret_label = res ? base_label : 0;
+  } else {
+size_t s_bytes_read = (res ? strlen(res) : strlen(base)) + 1;
+*ret_label = dfsan_union(
+dfsan_union(base_label, dfsan_read_label(base, sizeof(s_bytes_read))),
+dfsan_union(dfsan_read_label(delim, strlen(delim) + 1),
+dfsan_union(s_label, delim_label)));
+  }
+
+  return res;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strsep(
+char **s, const char *delim, dfsan_label s_label, dfsan_label delim_label,
+dfsan_label *ret_label, dfsan_origin s_origin, dfsan_origin delim_origin,
+dfsan_origin *ret_origin) {
+  dfsan_origin base_origin = dfsan_read_origin_of_first_taint(s, sizeof(*s));
+  char *res = __dfsw_strsep(s, delim, s_label, delim_label, ret_label);
+  if (flags().strict_data_dependencies) {
+if (res)
+  *ret_origin = base_origin;
+  } else {
+if (*ret_label) {
+  if (base_origin) {
+*ret_origin = base_origin;
+  } else {
+dfsan_origin o =
+dfsan_read_origin_of_first_taint(delim, strlen(delim) + 1);
+*ret_origin = o ? o : (s_label ? s_origin : delim_origin);
+  }
+}
+  }
+
+  return res;
+}
+
 static int dfsan_memcmp_bcmp(const void *s1, const void *s2, size_t n,
  size_t *bytes_read) {
   const char *cs1 = (const char *) s1, *cs2 = (const char *) s2;

[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-04-17 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

Separate review.

Keep patches as small as possible, do one review for each patch.

If you'd like me to submit it for you, please provide your email to credit the 
patch to.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D148496: [compiler-rt] [test] Mark dfsan tests XFAIL on glibc-2.37

2023-04-17 Thread Andrew via Phabricator via cfe-commits
browneee accepted this revision.
browneee added a comment.

LGTM

`release_shadow_space.c:29: size_t get_rss_kb(): Assertion ```feof(f)' failed.`

`custom.cpp:1858: void test_sprintf(): Assertion ```strcmp(buf, "Hello world!") 
== 0' failed.`

Issue appears to be with the program rather than the instrumentation. Did the 
behavior of the the libc functions used in the test change?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D148496/new/

https://reviews.llvm.org/D148496

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-04-13 Thread Andrew via Phabricator via cfe-commits
browneee accepted this revision.
browneee added a comment.
This revision is now accepted and ready to land.

Please fix the additional &.

Do you need me to submit it for you?




Comment at: compiler-rt/test/dfsan/custom.cpp:1645
+  // taint the string pointer
+  dfsan_set_label(m_label, _s, sizeof(_s));
+

Remove & in sizeof.



Comment at: compiler-rt/test/dfsan/custom.cpp:1641
+  // taint delim pointer
+  dfsan_set_label(j_label, _delim, sizeof(_delim));
+  // taint the string data bytes

browneee wrote:
> remove the &
> 
> It still works because `sizeof(pointer_var) == sizeof(_var)`, but it 
> doesn't logically match up like it should.
> 
> (Sorry, this one is my fault - I made this mistake giving an example in an 
> earlier comment.)
Remove & in sizeof.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-03-31 Thread Andrew via Phabricator via cfe-commits
browneee added inline comments.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:221
+  if (flags().strict_data_dependencies) {
+*ret_label = res ? dfsan_read_label(base, sizeof(base)) : 0;
+  } else {

tkuchta wrote:
> browneee wrote:
> > `base, sizeof(base)` does not make sense - the size does not correspond to 
> > the pointer used.
> > 
> > Either
> > ```
> > dfsan_read_label(base, sizeof(*base))   // first byte pointed to by base
> > dfsan_read_label(base, strlen(base)) // whole string pointed to by base
> > dfsan_read_label(, sizeof(base))  // the base pointer
> > ```
> > 
> > In this case I think we want the base pointer.
> > 
> > `dfsan_read_label(, sizeof(base))`  // the base pointer
> > should be equivalent to doing
> > `dfsan_label base_label = dfsan_read_label(s, sizeof(*s))`  up at the start 
> > of the function, just after we declare base then use `base_label` here.
> > 
> > Lets go with the second option to avoid taking the address of a variable.
> > 
> > This is semantically equivalent to my first suggestion: 
> > `dfsan_get_label(base) == dfsan_read_label(, sizeof(base)) == 
> > base_label`.
> > Sorry I didn't consider the other constraints (no dfsan_get_label in this 
> > file because the pass is not run on this code; avoid taking address of 
> > variable) and suggest this in the first place.
> Thank you for your comments! Just to clarify:
> If we have strict data dependencies we would like *ret_label to be related to 
> the taints of the contents of the string, is that correct? Should we use the 
> base_label there too? My understanding is that base_label represents a taint 
> applied to the pointer, not to the data? 
> 
> In other words, would that be correct to:
> 1) use taints of the string data in the first case (strict dependencies) -> 
> therefore no base_label there as it represents the taint of the pointer
> 2) use the taints of the string data + the taints of the pointer in the 
> second case -> therefore using base_label there
> If we have strict data dependencies we would like *ret_label to be related to 
> the taints of the contents of the string, is that correct?

No, `*ret_label` holds the taint label for the return value, which is a 
pointer. The return value pointer is derived from `*s`, aka `base`. This is the 
pointer to the string contents, not the string contents themselves.

> Should we use the base_label there too?

I think we should only use `base_label` here.

> My understanding is that base_label represents a taint applied to the 
> pointer, not to the data?

Correct.


> In other words, would that be correct to:
> 1. use taints of the string data in the first case (strict dependencies) -> 
> therefore no base_label there as it represents the taint of the pointer

No, other way around. We want the base_label, but not the string data.

> 2. use the taints of the string data + the taints of the pointer in the 
> second case -> therefore using base_label there

Yes, we could have both base_label and the taints of the string data in the 
else case.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-03-31 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

We're getting really close!

Yes, that build error looks unrelated. Someone should fix it soon.




Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:221
+  if (flags().strict_data_dependencies) {
+*ret_label = res ? dfsan_read_label(base, sizeof(base)) : 0;
+  } else {

`base, sizeof(base)` does not make sense - the size does not correspond to the 
pointer used.

Either
```
dfsan_read_label(base, sizeof(*base))   // first byte pointed to by base
dfsan_read_label(base, strlen(base)) // whole string pointed to by base
dfsan_read_label(, sizeof(base))  // the base pointer
```

In this case I think we want the base pointer.

`dfsan_read_label(, sizeof(base))`  // the base pointer
should be equivalent to doing
`dfsan_label base_label = dfsan_read_label(s, sizeof(*s))`  up at the start of 
the function, just after we declare base then use `base_label` here.

Lets go with the second option to avoid taking the address of a variable.

This is semantically equivalent to my first suggestion: `dfsan_get_label(base) 
== dfsan_read_label(, sizeof(base)) == base_label`.
Sorry I didn't consider the other constraints (no dfsan_get_label in this file 
because the pass is not run on this code; avoid taking address of variable) and 
suggest this in the first place.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:224
+*ret_label =
+dfsan_union(dfsan_read_label(base, sizeof(base)),
+dfsan_union(dfsan_read_label(delim, strlen(delim) + 1),

Also use `base_label` here.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:240
+if (res)
+  *ret_origin = dfsan_read_origin_of_first_taint(base, strlen(base));
+  } else {

`dfsan_get_origin(base) == dfsan_read_origin_of_first_taint(, 
sizeof(base))`

As noted above `base, strlen(base)` is a meaningfully valid pointer, length - 
but it is not the level of indirection we want here.

I think we want the same solution as above.

`dfsan_origin base_origin = dfsan_read_origin_of_first_taint(s, sizeof(*s))` at 
the start of the function, just after declaring and assigning base.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:243
+if (*ret_label) {
+  dfsan_origin o = dfsan_read_origin_of_first_taint(base, strlen(base));
+  if (o) {

Also use `base_origin` here.



Comment at: compiler-rt/test/dfsan/custom.cpp:1641
+  // taint delim pointer
+  dfsan_set_label(j_label, _delim, sizeof(_delim));
+  // taint the string data bytes

remove the &

It still works because `sizeof(pointer_var) == sizeof(_var)`, but it 
doesn't logically match up like it should.

(Sorry, this one is my fault - I made this mistake giving an example in an 
earlier comment.)



Comment at: compiler-rt/test/dfsan/custom.cpp:1645
+  // taint the string pointer
+  dfsan_set_label(m_label, _s, sizeof(_s));
+

remove & in sizeof



Comment at: compiler-rt/test/dfsan/custom.cpp:1650
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, k_label);
+  ASSERT_READ_LABEL(rv, strlen(rv), k_label);

The value `rv` has a data flow from the the string pointer, not the string 
bytes.
It should have `m_label` not `k_label`.

This is related to line 221 (using `base` instead of ``) in the other file.



Comment at: compiler-rt/test/dfsan/custom.cpp:1654
+  ASSERT_LABEL(rv, dfsan_union(dfsan_union(i_label, j_label), k_label));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *s);
+#endif

rv is a pointer, and I think it's origin should match the pointer s, not the 
bytes in the string.

I think this should be:

`ASSERT_INIT_ORIGIN_EQ_ORIGIN(, s);`

This is related to line 240 (`*ret_origin = 
dfsan_read_origin_of_first_taint(base, strlen(base));`) in the other file being 
the wrong level of indirection.


(Side note: the existing ASSERT_INIT_ORIGIN_EQ_ORIGIN macro feels a bit odd in 
that the arguments are at different levels of indirection - but not something 
to fix as part of this change)



Comment at: compiler-rt/test/dfsan/custom.cpp:1660
+  char **pp_s_base = pp_s;
+  dfsan_set_label(n_label, _s, sizeof(_s));
+

remove & in sizeof


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-03-02 Thread Andrew via Phabricator via cfe-commits
browneee added inline comments.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:221
+  if (flags().strict_data_dependencies) {
+*ret_label = res ? s_label : 0;
+  } else {

tkuchta wrote:
> browneee wrote:
> > When `res != NULL`, then `res` is derived from `*s`, not from `s`.
> > 
> > e.g.
> > 
> > ```
> > *ret_label = res ? dfsan_get_label(base) : 0;
> > ```
> Apologies for a delay.
> 
> I came across some difficulty with using dfsan_get_label inside the 
> dfsan_custom.cpp file.
> It seems that including the dfsan_interface.h header there, which would be 
> needed for dfsan_get_label, causes other conflicts and build errors.
> Would there be another way to use that function inside dfsan_custom.cpp? 
```
char *base = *s;
```

Here base is loaded, so we can also load the shadow.

`dfsan_get_label(base) == dfsan_read_label(s, sizeof(*s))`

If base was an argument, then it would have a corresponding label argument as 
part of the wrapper function, so you also wouldn't need `dfsan_get_label`.


Repository:
  rCRT Compiler Runtime

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-02-07 Thread Andrew via Phabricator via cfe-commits
browneee added inline comments.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:221
+  if (flags().strict_data_dependencies) {
+*ret_label = res ? s_label : 0;
+  } else {

When `res != NULL`, then `res` is derived from `*s`, not from `s`.

e.g.

```
*ret_label = res ? dfsan_get_label(base) : 0;
```



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:224
+size_t s_bytes_read = (res ? strlen(res) : strlen(base)) + 1;
+*ret_label =
+dfsan_union(dfsan_read_label(base, s_bytes_read),

I think `dfsan_get_label(base)` should also be a part of this output.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:241
+if (res)
+  *ret_origin = s_origin;
+  } else {

As above, when res != NULL, then res is derived from *s, not from s.

```
if (res)
*ret_origin = dfsan_get_origin(base);
```



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:243
+  } else {
+if (*ret_label) {
+  size_t s_bytes_read = (res ? strlen(res) : strlen(base)) + 1;

As above, I think `dfsan_get_origin(base)` should be the first priority, as it 
is the origin with the most direct value flow.

Note that `dfsan_get_origin(base)` gives the origin id for `base` whereas 
`dfsan_read_origin_of_first_taint(base, ...)` gives origin id for `*base`.



Comment at: compiler-rt/test/dfsan/custom.cpp:1659
+
+  rv = strsep(pp_s, p_delim);
+

I think this test can call strsep fewer times. It just needs to set all the 
different labels first.

Different inputs to taint (each one should be a distinct taint label):
* `*p_delim`   (with `dfsan_set_label(..., p_delim, strlen(p_delim));`)
* `p_delim`   (with `dfsan_set_label(..., _delim, sizeof(_delim));`)
* `*s`   (with `dfsan_set_label(..., s, strlen(s));`)
* `s` aka `p_s`   (with `dfsan_set_label(..., , sizeof());` or 
`dfsan_set_label(..., _s, sizeof(_s));`)
* `pp_s` (with `dfsan_set_label(..., _s, sizeof(_s));`)

This is less than 8, so we can do it all at once (unless you want to test 
different taint labels on different bytes of the input strings).


Repository:
  rCRT Compiler Runtime

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-02-03 Thread Andrew via Phabricator via cfe-commits
browneee added inline comments.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:213
+  char *res = strsep(s, delim);
+  s_label = dfsan_read_label(base, strlen(base));
+  if (res && (res != base)) {

tkuchta wrote:
> browneee wrote:
> > tkuchta wrote:
> > > browneee wrote:
> > > > tkuchta wrote:
> > > > > browneee wrote:
> > > > > > The `s_label` represents the taint label for `s` (the pointer).
> > > > > > 
> > > > > > This line would clobber the taint label of the pointer (`s`) with a 
> > > > > > taint label from `s[0][0..n]`.
> > > > > > 
> > > > > > I think this line should be deleted.
> > > > > Agree, s_label represents the taint associated with the **s pointer. 
> > > > > However I am now wondering if that is the taint wich we would like to 
> > > > > return.
> > > > > For example, if we have
> > > > > if (flags().strict_data_dependencies) {
> > > > > *ret_label = res ? s_label : 0;
> > > > > 
> > > > > We would taint the return value with the value of the pointer, not 
> > > > > the data. It means that if we operate on a string for which the 
> > > > > characters are tainted, but the pointer itself isn't, we are likely 
> > > > > going to return label 0. My understanding was that we are more 
> > > > > concerned with the taint of the data, not the pointer, am I missing 
> > > > > something?
> > > > > 
> > > > Yes, we are usually more concerned with the taint of the data, not the 
> > > > pointer.
> > > > 
> > > > With strict dependencies:
> > > > // If the input pointer is tainted, the output pointer would be tainted 
> > > > (because it is derived from the input pointer - maybe the same value).
> > > > taint(s[0]) == dfsan_read_label(s, sizeof(s)) > taint(ret) == 
> > > > ret_label[0]
> > > > 
> > > > // If the input data is tainted, the output data would be tainted 
> > > > (because it is derived from the input data).
> > > > taint(s[0][0]) == MEM_TO_SHADOW(s[0])[0] > taint(ret[0]) == 
> > > > MEM_TO_SHADOW(ret)[0]
> > > > 
> > > > Because s[0] == ret  (or ret==null), (for the non-null case) the output 
> > > > shadow bytes are the same bytes as input shadow bytes and so these 
> > > > taint labels for the string data in shadow memory do not need to be 
> > > > explicitly propagated in this function. 
> > > > 
> > > > I think the only case actually changing/copying string data is writing 
> > > > a delimiter byte to NULL, which you handled.
> > > I am working on the changes and I came across a strange behavior that I 
> > > would like to ask about.
> > > 
> > > It turned out that if we do
> > > 
> > > char *s = " ... ";
> > > dfsan_set_label(label, _s, sizeof(_s));
> > > 
> > > Then, the s_label within the handler is 0, not "label". This is 
> > > unexpected, as we would like the pointer itself to be labeled here.
> > > I believe this has something to do with the fact that the input string in 
> > > strsep is a double pointer. For example this works as expected for the 
> > > delimiter string, which is a single pointer. 
> > > It's either I'm doing something incorrectly or there is some issue with 
> > > propagating labels for double pointers.
> > > Have you perhaps come across this behavior before?
> > I'm not sure what p_s is in your example. Could you provide a more complete 
> > example?
> > (and maybe all in one function, not half in __dfsw_strsep and half in 
> > another function)
> > 
> > Here is an example showing taint labels at different levels of indirection:
> > 
> > ```
> > #include 
> > #include 
> > #include 
> > 
> > int main(void) {
> >   char *s = " ... ";
> >   char **p_s = 
> >   char ***pp_s = _s;
> > 
> >   dfsan_label i_label = 1 << 0;
> >   dfsan_label j_label = 1 << 1;
> >   dfsan_label k_label = 1 << 2;
> >   dfsan_label m_label = 1 << 3;
> > 
> >   // string data
> >   dfsan_set_label(i_label, s, strlen(s));
> >   // pointer s
> >   dfsan_set_label(j_label, , sizeof(s));
> >   // pointer to pointer s
> >   dfsan_set_label(k_label, _s, sizeof(p_s));
> >   // pointer to pointer to pointer s
> >   dfsan_set_label(m_label, _s, sizeof(pp_s));
> > 
> >   assert(pp_s[0][0] == s);
> > 
> >   // string data
> >   assert(dfsan_read_label(s, strlen(s)) == i_label);
> >   // pointer s
> >   assert(dfsan_read_label(, sizeof(s)) == j_label);
> >   // pointer to pointer s
> >   assert(dfsan_read_label(_s, sizeof(p_s)) == k_label);
> >   // pointer to pointer to pointer s
> >   assert(dfsan_read_label(_s, sizeof(pp_s)) == m_label);
> > 
> >   return 0;
> > }
> > ```
> Hello,
> 
> Thank you for the comment.
> 
> I should have provided a more complete example.
> What I meant is a behavior I've found while working on the tests.
> In the test file I have something like that:
> 
> ```
> char *s = "Hello world/";
> char *delim = " /";
> dfsan_set_label(label, , sizeof());
> char *rv = strep(, delim);
>  ```
> 
> If I get this right, the line 
> ```
> dfsan_set_label(label, , sizeof());
> 
> ``` 
> should have applied a taint to the `s` 

[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-02-02 Thread Andrew via Phabricator via cfe-commits
browneee added inline comments.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:213
+  char *res = strsep(s, delim);
+  s_label = dfsan_read_label(base, strlen(base));
+  if (res && (res != base)) {

tkuchta wrote:
> browneee wrote:
> > tkuchta wrote:
> > > browneee wrote:
> > > > The `s_label` represents the taint label for `s` (the pointer).
> > > > 
> > > > This line would clobber the taint label of the pointer (`s`) with a 
> > > > taint label from `s[0][0..n]`.
> > > > 
> > > > I think this line should be deleted.
> > > Agree, s_label represents the taint associated with the **s pointer. 
> > > However I am now wondering if that is the taint wich we would like to 
> > > return.
> > > For example, if we have
> > > if (flags().strict_data_dependencies) {
> > > *ret_label = res ? s_label : 0;
> > > 
> > > We would taint the return value with the value of the pointer, not the 
> > > data. It means that if we operate on a string for which the characters 
> > > are tainted, but the pointer itself isn't, we are likely going to return 
> > > label 0. My understanding was that we are more concerned with the taint 
> > > of the data, not the pointer, am I missing something?
> > > 
> > Yes, we are usually more concerned with the taint of the data, not the 
> > pointer.
> > 
> > With strict dependencies:
> > // If the input pointer is tainted, the output pointer would be tainted 
> > (because it is derived from the input pointer - maybe the same value).
> > taint(s[0]) == dfsan_read_label(s, sizeof(s)) > taint(ret) == 
> > ret_label[0]
> > 
> > // If the input data is tainted, the output data would be tainted (because 
> > it is derived from the input data).
> > taint(s[0][0]) == MEM_TO_SHADOW(s[0])[0] > taint(ret[0]) == 
> > MEM_TO_SHADOW(ret)[0]
> > 
> > Because s[0] == ret  (or ret==null), (for the non-null case) the output 
> > shadow bytes are the same bytes as input shadow bytes and so these taint 
> > labels for the string data in shadow memory do not need to be explicitly 
> > propagated in this function. 
> > 
> > I think the only case actually changing/copying string data is writing a 
> > delimiter byte to NULL, which you handled.
> I am working on the changes and I came across a strange behavior that I would 
> like to ask about.
> 
> It turned out that if we do
> 
> char *s = " ... ";
> dfsan_set_label(label, _s, sizeof(_s));
> 
> Then, the s_label within the handler is 0, not "label". This is unexpected, 
> as we would like the pointer itself to be labeled here.
> I believe this has something to do with the fact that the input string in 
> strsep is a double pointer. For example this works as expected for the 
> delimiter string, which is a single pointer. 
> It's either I'm doing something incorrectly or there is some issue with 
> propagating labels for double pointers.
> Have you perhaps come across this behavior before?
I'm not sure what p_s is in your example. Could you provide a more complete 
example?
(and maybe all in one function, not half in __dfsw_strsep and half in another 
function)

Here is an example showing taint labels at different levels of indirection:

```
#include 
#include 
#include 

int main(void) {
  char *s = " ... ";
  char **p_s = 
  char ***pp_s = _s;

  dfsan_label i_label = 1 << 0;
  dfsan_label j_label = 1 << 1;
  dfsan_label k_label = 1 << 2;
  dfsan_label m_label = 1 << 3;

  // string data
  dfsan_set_label(i_label, s, strlen(s));
  // pointer s
  dfsan_set_label(j_label, , sizeof(s));
  // pointer to pointer s
  dfsan_set_label(k_label, _s, sizeof(p_s));
  // pointer to pointer to pointer s
  dfsan_set_label(m_label, _s, sizeof(pp_s));

  assert(pp_s[0][0] == s);

  // string data
  assert(dfsan_read_label(s, strlen(s)) == i_label);
  // pointer s
  assert(dfsan_read_label(, sizeof(s)) == j_label);
  // pointer to pointer s
  assert(dfsan_read_label(_s, sizeof(p_s)) == k_label);
  // pointer to pointer to pointer s
  assert(dfsan_read_label(_s, sizeof(pp_s)) == m_label);

  return 0;
}
```


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-01-17 Thread Andrew via Phabricator via cfe-commits
browneee added inline comments.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:213
+  char *res = strsep(s, delim);
+  s_label = dfsan_read_label(base, strlen(base));
+  if (res && (res != base)) {

tkuchta wrote:
> browneee wrote:
> > The `s_label` represents the taint label for `s` (the pointer).
> > 
> > This line would clobber the taint label of the pointer (`s`) with a taint 
> > label from `s[0][0..n]`.
> > 
> > I think this line should be deleted.
> Agree, s_label represents the taint associated with the **s pointer. However 
> I am now wondering if that is the taint wich we would like to return.
> For example, if we have
> if (flags().strict_data_dependencies) {
> *ret_label = res ? s_label : 0;
> 
> We would taint the return value with the value of the pointer, not the data. 
> It means that if we operate on a string for which the characters are tainted, 
> but the pointer itself isn't, we are likely going to return label 0. My 
> understanding was that we are more concerned with the taint of the data, not 
> the pointer, am I missing something?
> 
Yes, we are usually more concerned with the taint of the data, not the pointer.

With strict dependencies:
// If the input pointer is tainted, the output pointer would be tainted 
(because it is derived from the input pointer - maybe the same value).
taint(s[0]) == dfsan_read_label(s, sizeof(s)) > taint(ret) == ret_label[0]

// If the input data is tainted, the output data would be tainted (because it 
is derived from the input data).
taint(s[0][0]) == MEM_TO_SHADOW(s[0])[0] > taint(ret[0]) == 
MEM_TO_SHADOW(ret)[0]

Because s[0] == ret  (or ret==null), (for the non-null case) the output shadow 
bytes are the same bytes as input shadow bytes and so these taint labels for 
the string data in shadow memory do not need to be explicitly propagated in 
this function. 

I think the only case actually changing/copying string data is writing a 
delimiter byte to NULL, which you handled.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-01-11 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

Please send separate changes one by one (e.g. first patch would be for `strsep` 
alone and would include several functions of implementation `__dfsw_strsep` + 
`__dfso_strsep` + test code for this).


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-01-10 Thread Andrew via Phabricator via cfe-commits
browneee requested changes to this revision.
browneee added a comment.
This revision now requires changes to proceed.

Hello! Thank you for the patch.

I think your changes for `__dfsw_strsep` are conflating the taint value of a 
pointer with the taint value of the value(s) it points to.
This is subtle - I believe I made the same mistake when I first worked with the 
DFSan code :)

With this in mind, please check the other functions.

Since this is a large patch and string manipulations need careful review, 
please send separate changes for each group of functions (e.g. first patch is 
`__dfsw_strsep` + `__dfso_strsep` + tests for this code).




Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:213
+  char *res = strsep(s, delim);
+  s_label = dfsan_read_label(base, strlen(base));
+  if (res && (res != base)) {

The `s_label` represents the taint label for `s` (the pointer).

This line would clobber the taint label of the pointer (`s`) with a taint label 
from `s[0][0..n]`.

I think this line should be deleted.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:214
+  s_label = dfsan_read_label(base, strlen(base));
+  if (res && (res != base)) {
+char *token_start = res;

I think `res && (res != base)` is never true.

Checking an implementation of strsep 
(http://git.musl-libc.org/cgit/musl/tree/src/string/strsep.c)
`res` can either be `NULL` or the same value as `base`.

I think this line should be `(res != *s)`. This is different because `*s` may 
be changed by the call to `strsep` just above.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:239
+  char *base = *s;
+  s_origin = dfsan_read_origin_of_first_taint(base, strlen(base) + 1);
+  char *res = __dfsw_strsep(s, delim, s_label, delim_label, ret_label);

Delete this line.

Like above, `s_origin` represents the origin value for the pointer `s`, not the 
origin value for for data in `base[0..n]`.



Comment at: compiler-rt/test/dfsan/custom.cpp:1693
+
+  dfsan_set_label(n_label, p_delim, sizeof(p_delim));
+

Also
dfsan_set_label(, _delim, sizeof(_delim));



Comment at: compiler-rt/test/dfsan/custom.cpp:1698
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_ZERO_LABEL(rv);
+#else

Also
ASSERT_READ_ZERO_LABEL(rv, strlen(rv))



Comment at: compiler-rt/test/dfsan/custom.cpp:1700
+#else
+  ASSERT_LABEL(rv, n_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *p_delim);

ASSERT_LABEL(rv, dfsan_union(some_different_label_x, n_label));



Comment at: compiler-rt/test/dfsan/custom.cpp:1704
+
+  dfsan_set_label(m_label, p_s, sizeof(p_s));
+  rv = strsep(_s, p_delim);

Also
dfsan_set_label(, _s, sizeof(_s));



Comment at: compiler-rt/test/dfsan/custom.cpp:1709
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, m_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, base[6]);

Change this to

ASSERT_READ_LABEL(rv, strlen(rv), m_label);
ASSERT_LABEL(rv, some_different_label_y);



Comment at: compiler-rt/test/dfsan/custom.cpp:1712
+#else
+  ASSERT_LABEL(rv, dfsan_union(m_label, n_label));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, base[6]);

some_different_label_y would also be set on rv.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140743: [DFSan] Fix overuse of REQUIRES: in tests.

2022-12-29 Thread Andrew via Phabricator via cfe-commits
browneee abandoned this revision.
browneee added a comment.

Abandon due to D140744  being submitted.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140743/new/

https://reviews.llvm.org/D140743

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140743: [DFSan] Fix overuse of REQUIRES: in tests.

2022-12-28 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 485552.
browneee added a comment.

Fix other line number.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140743/new/

https://reviews.llvm.org/D140743

Files:
  clang/lib/Driver/ToolChains/Linux.cpp
  compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
  compiler-rt/test/dfsan/atomic.cpp
  compiler-rt/test/dfsan/basic.c
  compiler-rt/test/dfsan/conditional_callbacks.c
  compiler-rt/test/dfsan/conditional_callbacks_sig.c
  compiler-rt/test/dfsan/custom.cpp
  compiler-rt/test/dfsan/dfsan_get_track_origins.c
  compiler-rt/test/dfsan/event_callbacks.c
  compiler-rt/test/dfsan/fast8labels.c
  compiler-rt/test/dfsan/flags.c
  compiler-rt/test/dfsan/flush.c
  compiler-rt/test/dfsan/fncall.c
  compiler-rt/test/dfsan/force_zero.c
  compiler-rt/test/dfsan/fork.cpp
  compiler-rt/test/dfsan/gep.c
  compiler-rt/test/dfsan/interceptors.c
  compiler-rt/test/dfsan/libatomic.c
  compiler-rt/test/dfsan/lookup_table.c
  compiler-rt/test/dfsan/mmap_at_init.c
  compiler-rt/test/dfsan/origin_add_label.c
  compiler-rt/test/dfsan/origin_branch.c
  compiler-rt/test/dfsan/origin_disabled.c
  compiler-rt/test/dfsan/origin_id_stack_trace.c
  compiler-rt/test/dfsan/origin_invalid.c
  compiler-rt/test/dfsan/origin_ld_lost.c
  compiler-rt/test/dfsan/origin_ldst.c
  compiler-rt/test/dfsan/origin_limit.c
  compiler-rt/test/dfsan/origin_memcpy.c
  compiler-rt/test/dfsan/origin_memmove.c
  compiler-rt/test/dfsan/origin_memset.c
  compiler-rt/test/dfsan/origin_of_first_taint.c
  compiler-rt/test/dfsan/origin_overlapped.c
  compiler-rt/test/dfsan/origin_set_label.c
  compiler-rt/test/dfsan/origin_stack_trace.c
  compiler-rt/test/dfsan/origin_track_ld.c
  compiler-rt/test/dfsan/origin_unaligned_memtrans.c
  compiler-rt/test/dfsan/origin_untainted.c
  compiler-rt/test/dfsan/origin_with_sigactions.c
  compiler-rt/test/dfsan/origin_with_signals.cpp
  compiler-rt/test/dfsan/pair.cpp
  compiler-rt/test/dfsan/propagate.c
  compiler-rt/test/dfsan/pthread.c
  compiler-rt/test/dfsan/reaches_function.c
  compiler-rt/test/dfsan/release_shadow_space.c
  compiler-rt/test/dfsan/sigaction.c
  compiler-rt/test/dfsan/sigaction_stress_test.c
  compiler-rt/test/dfsan/stack_trace.c
  compiler-rt/test/dfsan/struct.c
  compiler-rt/test/dfsan/threaded_flush.c
  compiler-rt/test/dfsan/trace-cmp.c
  compiler-rt/test/dfsan/vararg.c
  compiler-rt/test/dfsan/write_callback.c

Index: compiler-rt/test/dfsan/write_callback.c
===
--- compiler-rt/test/dfsan/write_callback.c
+++ compiler-rt/test/dfsan/write_callback.c
@@ -1,6 +1,4 @@
 // RUN: %clang_dfsan %s -o %t && %run %t | FileCheck %s
-//
-// REQUIRES: x86_64-target-arch
 
 // Tests that the custom implementation of write() does writes with or without
 // a callback set using dfsan_set_write_callback().
Index: compiler-rt/test/dfsan/vararg.c
===
--- compiler-rt/test/dfsan/vararg.c
+++ compiler-rt/test/dfsan/vararg.c
@@ -1,8 +1,6 @@
 // RUN: %clang_dfsan %s -o %t
 // RUN: not %run %t 2>&1 | FileCheck %s
 // RUN: %run %t foo
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 
Index: compiler-rt/test/dfsan/trace-cmp.c
===
--- compiler-rt/test/dfsan/trace-cmp.c
+++ compiler-rt/test/dfsan/trace-cmp.c
@@ -3,8 +3,6 @@
 //
 // RUN: %clang_dfsan -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp %s -o %t
 // RUN: %run %t 2>&1 | FileCheck %s
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 #include 
Index: compiler-rt/test/dfsan/threaded_flush.c
===
--- compiler-rt/test/dfsan/threaded_flush.c
+++ compiler-rt/test/dfsan/threaded_flush.c
@@ -1,8 +1,6 @@
 // Tests that doing dfsan_flush() while another thread is executing doesn't
 // segfault.
 // RUN: %clang_dfsan %s -o %t && %run %t
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 #include 
Index: compiler-rt/test/dfsan/struct.c
===
--- compiler-rt/test/dfsan/struct.c
+++ compiler-rt/test/dfsan/struct.c
@@ -1,7 +1,5 @@
 // RUN: %clang_dfsan %s -O1 -o %t && %run %t
 // RUN: %clang_dfsan %s -O0 -DO0 -o %t && %run %t
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 #include 
Index: compiler-rt/test/dfsan/stack_trace.c
===
--- compiler-rt/test/dfsan/stack_trace.c
+++ compiler-rt/test/dfsan/stack_trace.c
@@ -1,7 +1,5 @@
 // RUN: %clang_dfsan -gmlt %s -o %t && %run %t >%t.out 2>&1
 // RUN: FileCheck %s < %t.out
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 #include 
@@ -36,9 +34,9 @@
   printf("==OUTPUT==\n%s==EOS==\n", buf);
 
   // CHECK: ==OUTPUT==
-  // CHECK: #0 {{.*}} in bar.dfsan [[FILEPATH]]/stack_trace.c:15
-  // CHECK-COUNT-8: #{{[1-9]+}} {{.*}} in bar.dfsan [[FILEPATH]]/stack_trace.c:18
-  // 

[PATCH] D140743: [DFSan] Fix overuse of REQUIRES: in tests.

2022-12-28 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 485547.
browneee added a comment.

Fix updated line numbers in test.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140743/new/

https://reviews.llvm.org/D140743

Files:
  clang/lib/Driver/ToolChains/Linux.cpp
  compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
  compiler-rt/test/dfsan/atomic.cpp
  compiler-rt/test/dfsan/basic.c
  compiler-rt/test/dfsan/conditional_callbacks.c
  compiler-rt/test/dfsan/conditional_callbacks_sig.c
  compiler-rt/test/dfsan/custom.cpp
  compiler-rt/test/dfsan/dfsan_get_track_origins.c
  compiler-rt/test/dfsan/event_callbacks.c
  compiler-rt/test/dfsan/fast8labels.c
  compiler-rt/test/dfsan/flags.c
  compiler-rt/test/dfsan/flush.c
  compiler-rt/test/dfsan/fncall.c
  compiler-rt/test/dfsan/force_zero.c
  compiler-rt/test/dfsan/fork.cpp
  compiler-rt/test/dfsan/gep.c
  compiler-rt/test/dfsan/interceptors.c
  compiler-rt/test/dfsan/libatomic.c
  compiler-rt/test/dfsan/lookup_table.c
  compiler-rt/test/dfsan/mmap_at_init.c
  compiler-rt/test/dfsan/origin_add_label.c
  compiler-rt/test/dfsan/origin_branch.c
  compiler-rt/test/dfsan/origin_disabled.c
  compiler-rt/test/dfsan/origin_id_stack_trace.c
  compiler-rt/test/dfsan/origin_invalid.c
  compiler-rt/test/dfsan/origin_ld_lost.c
  compiler-rt/test/dfsan/origin_ldst.c
  compiler-rt/test/dfsan/origin_limit.c
  compiler-rt/test/dfsan/origin_memcpy.c
  compiler-rt/test/dfsan/origin_memmove.c
  compiler-rt/test/dfsan/origin_memset.c
  compiler-rt/test/dfsan/origin_of_first_taint.c
  compiler-rt/test/dfsan/origin_overlapped.c
  compiler-rt/test/dfsan/origin_set_label.c
  compiler-rt/test/dfsan/origin_stack_trace.c
  compiler-rt/test/dfsan/origin_track_ld.c
  compiler-rt/test/dfsan/origin_unaligned_memtrans.c
  compiler-rt/test/dfsan/origin_untainted.c
  compiler-rt/test/dfsan/origin_with_sigactions.c
  compiler-rt/test/dfsan/origin_with_signals.cpp
  compiler-rt/test/dfsan/pair.cpp
  compiler-rt/test/dfsan/propagate.c
  compiler-rt/test/dfsan/pthread.c
  compiler-rt/test/dfsan/reaches_function.c
  compiler-rt/test/dfsan/release_shadow_space.c
  compiler-rt/test/dfsan/sigaction.c
  compiler-rt/test/dfsan/sigaction_stress_test.c
  compiler-rt/test/dfsan/stack_trace.c
  compiler-rt/test/dfsan/struct.c
  compiler-rt/test/dfsan/threaded_flush.c
  compiler-rt/test/dfsan/trace-cmp.c
  compiler-rt/test/dfsan/vararg.c
  compiler-rt/test/dfsan/write_callback.c

Index: compiler-rt/test/dfsan/write_callback.c
===
--- compiler-rt/test/dfsan/write_callback.c
+++ compiler-rt/test/dfsan/write_callback.c
@@ -1,6 +1,4 @@
 // RUN: %clang_dfsan %s -o %t && %run %t | FileCheck %s
-//
-// REQUIRES: x86_64-target-arch
 
 // Tests that the custom implementation of write() does writes with or without
 // a callback set using dfsan_set_write_callback().
Index: compiler-rt/test/dfsan/vararg.c
===
--- compiler-rt/test/dfsan/vararg.c
+++ compiler-rt/test/dfsan/vararg.c
@@ -1,8 +1,6 @@
 // RUN: %clang_dfsan %s -o %t
 // RUN: not %run %t 2>&1 | FileCheck %s
 // RUN: %run %t foo
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 
Index: compiler-rt/test/dfsan/trace-cmp.c
===
--- compiler-rt/test/dfsan/trace-cmp.c
+++ compiler-rt/test/dfsan/trace-cmp.c
@@ -3,8 +3,6 @@
 //
 // RUN: %clang_dfsan -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp %s -o %t
 // RUN: %run %t 2>&1 | FileCheck %s
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 #include 
Index: compiler-rt/test/dfsan/threaded_flush.c
===
--- compiler-rt/test/dfsan/threaded_flush.c
+++ compiler-rt/test/dfsan/threaded_flush.c
@@ -1,8 +1,6 @@
 // Tests that doing dfsan_flush() while another thread is executing doesn't
 // segfault.
 // RUN: %clang_dfsan %s -o %t && %run %t
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 #include 
Index: compiler-rt/test/dfsan/struct.c
===
--- compiler-rt/test/dfsan/struct.c
+++ compiler-rt/test/dfsan/struct.c
@@ -1,7 +1,5 @@
 // RUN: %clang_dfsan %s -O1 -o %t && %run %t
 // RUN: %clang_dfsan %s -O0 -DO0 -o %t && %run %t
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 #include 
Index: compiler-rt/test/dfsan/stack_trace.c
===
--- compiler-rt/test/dfsan/stack_trace.c
+++ compiler-rt/test/dfsan/stack_trace.c
@@ -1,7 +1,5 @@
 // RUN: %clang_dfsan -gmlt %s -o %t && %run %t >%t.out 2>&1
 // RUN: FileCheck %s < %t.out
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 #include 
@@ -36,9 +34,9 @@
   printf("==OUTPUT==\n%s==EOS==\n", buf);
 
   // CHECK: ==OUTPUT==
-  // CHECK: #0 {{.*}} in bar.dfsan [[FILEPATH]]/stack_trace.c:15
+  // CHECK: #0 {{.*}} in bar.dfsan [[FILEPATH]]/stack_trace.c:11
   // 

[PATCH] D140743: [DFSan] Fix overuse of REQUIRES: in tests.

2022-12-28 Thread Andrew via Phabricator via cfe-commits
browneee created this revision.
browneee added a reviewer: MaskRay.
Herald added subscribers: Enna1, StephenFan.
Herald added a project: All.
browneee requested review of this revision.
Herald added projects: clang, Sanitizers.
Herald added subscribers: Sanitizers, cfe-commits.

Following suggestion on D140690 .


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D140743

Files:
  clang/lib/Driver/ToolChains/Linux.cpp
  compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
  compiler-rt/test/dfsan/atomic.cpp
  compiler-rt/test/dfsan/basic.c
  compiler-rt/test/dfsan/conditional_callbacks.c
  compiler-rt/test/dfsan/conditional_callbacks_sig.c
  compiler-rt/test/dfsan/custom.cpp
  compiler-rt/test/dfsan/dfsan_get_track_origins.c
  compiler-rt/test/dfsan/event_callbacks.c
  compiler-rt/test/dfsan/fast8labels.c
  compiler-rt/test/dfsan/flags.c
  compiler-rt/test/dfsan/flush.c
  compiler-rt/test/dfsan/fncall.c
  compiler-rt/test/dfsan/force_zero.c
  compiler-rt/test/dfsan/fork.cpp
  compiler-rt/test/dfsan/gep.c
  compiler-rt/test/dfsan/interceptors.c
  compiler-rt/test/dfsan/libatomic.c
  compiler-rt/test/dfsan/lookup_table.c
  compiler-rt/test/dfsan/mmap_at_init.c
  compiler-rt/test/dfsan/origin_add_label.c
  compiler-rt/test/dfsan/origin_branch.c
  compiler-rt/test/dfsan/origin_disabled.c
  compiler-rt/test/dfsan/origin_id_stack_trace.c
  compiler-rt/test/dfsan/origin_invalid.c
  compiler-rt/test/dfsan/origin_ld_lost.c
  compiler-rt/test/dfsan/origin_ldst.c
  compiler-rt/test/dfsan/origin_limit.c
  compiler-rt/test/dfsan/origin_memcpy.c
  compiler-rt/test/dfsan/origin_memmove.c
  compiler-rt/test/dfsan/origin_memset.c
  compiler-rt/test/dfsan/origin_of_first_taint.c
  compiler-rt/test/dfsan/origin_overlapped.c
  compiler-rt/test/dfsan/origin_set_label.c
  compiler-rt/test/dfsan/origin_stack_trace.c
  compiler-rt/test/dfsan/origin_track_ld.c
  compiler-rt/test/dfsan/origin_unaligned_memtrans.c
  compiler-rt/test/dfsan/origin_untainted.c
  compiler-rt/test/dfsan/origin_with_sigactions.c
  compiler-rt/test/dfsan/origin_with_signals.cpp
  compiler-rt/test/dfsan/pair.cpp
  compiler-rt/test/dfsan/propagate.c
  compiler-rt/test/dfsan/pthread.c
  compiler-rt/test/dfsan/reaches_function.c
  compiler-rt/test/dfsan/release_shadow_space.c
  compiler-rt/test/dfsan/sigaction.c
  compiler-rt/test/dfsan/sigaction_stress_test.c
  compiler-rt/test/dfsan/stack_trace.c
  compiler-rt/test/dfsan/struct.c
  compiler-rt/test/dfsan/threaded_flush.c
  compiler-rt/test/dfsan/trace-cmp.c
  compiler-rt/test/dfsan/vararg.c
  compiler-rt/test/dfsan/write_callback.c

Index: compiler-rt/test/dfsan/write_callback.c
===
--- compiler-rt/test/dfsan/write_callback.c
+++ compiler-rt/test/dfsan/write_callback.c
@@ -1,6 +1,4 @@
 // RUN: %clang_dfsan %s -o %t && %run %t | FileCheck %s
-//
-// REQUIRES: x86_64-target-arch
 
 // Tests that the custom implementation of write() does writes with or without
 // a callback set using dfsan_set_write_callback().
Index: compiler-rt/test/dfsan/vararg.c
===
--- compiler-rt/test/dfsan/vararg.c
+++ compiler-rt/test/dfsan/vararg.c
@@ -1,8 +1,6 @@
 // RUN: %clang_dfsan %s -o %t
 // RUN: not %run %t 2>&1 | FileCheck %s
 // RUN: %run %t foo
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 
Index: compiler-rt/test/dfsan/trace-cmp.c
===
--- compiler-rt/test/dfsan/trace-cmp.c
+++ compiler-rt/test/dfsan/trace-cmp.c
@@ -3,8 +3,6 @@
 //
 // RUN: %clang_dfsan -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp %s -o %t
 // RUN: %run %t 2>&1 | FileCheck %s
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 #include 
Index: compiler-rt/test/dfsan/threaded_flush.c
===
--- compiler-rt/test/dfsan/threaded_flush.c
+++ compiler-rt/test/dfsan/threaded_flush.c
@@ -1,8 +1,6 @@
 // Tests that doing dfsan_flush() while another thread is executing doesn't
 // segfault.
 // RUN: %clang_dfsan %s -o %t && %run %t
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 #include 
Index: compiler-rt/test/dfsan/struct.c
===
--- compiler-rt/test/dfsan/struct.c
+++ compiler-rt/test/dfsan/struct.c
@@ -1,7 +1,5 @@
 // RUN: %clang_dfsan %s -O1 -o %t && %run %t
 // RUN: %clang_dfsan %s -O0 -DO0 -o %t && %run %t
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 #include 
Index: compiler-rt/test/dfsan/stack_trace.c
===
--- compiler-rt/test/dfsan/stack_trace.c
+++ compiler-rt/test/dfsan/stack_trace.c
@@ -1,7 +1,5 @@
 // RUN: %clang_dfsan -gmlt %s -o %t && %run %t >%t.out 2>&1
 // RUN: FileCheck %s < %t.out
-//
-// REQUIRES: x86_64-target-arch
 
 #include 
 #include 
Index: compiler-rt/test/dfsan/sigaction_stress_test.c

[PATCH] D140690: [compiler-rt][dfsan] Enable loongarch64 and add test support

2022-12-28 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

In D140690#4018043 , @SixWeining 
wrote:

> We notice that DFSan is a work in progress 
> , 
> currently under development for x86_64 Linux. So maybe we can defer the 
> change until DFSan development is finished and there is a use case on 
> loongarch64?

This sounds like a reasonable option.

DFSan development is now fairly mature. We don't support other platforms due to 
lack of demand (all the users I'm aware of are x86_64).

As a side note: I am pleased to see that the patch required to add this support 
is as small as I'd expect (even if we add the pieces in D140689 
 not shown here atm).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140690/new/

https://reviews.llvm.org/D140690

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140690: [compiler-rt][dfsan] Enable loongarch64 and add test support

2022-12-27 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

Who will use DFSan on loongarch64?

Who will maintain DFSan on loongarch64?

Is there a loongarch64 buildbot?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140690/new/

https://reviews.llvm.org/D140690

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D126944: [Clang] Fix memory leak due to TemplateArgumentListInfo used in AST node.

2022-06-08 Thread Andrew via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGc7689fd552cd: [Clang] Fix memory leak due to 
TemplateArgumentListInfo used in AST node. (authored by browneee).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126944/new/

https://reviews.llvm.org/D126944

Files:
  clang-tools-extra/clangd/AST.cpp
  clang/docs/ReleaseNotes.rst
  clang/include/clang/AST/DeclTemplate.h
  clang/include/clang/AST/TemplateBase.h
  clang/lib/AST/ASTImporter.cpp
  clang/lib/AST/DeclTemplate.cpp
  clang/lib/AST/TemplateBase.cpp
  clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

Index: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
===
--- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -3801,13 +3801,15 @@
 return nullptr;
 
   // Substitute the current template arguments.
-  const TemplateArgumentListInfo  = D->getTemplateArgsInfo();
-  VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo.getLAngleLoc());
-  VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo.getRAngleLoc());
+  if (const ASTTemplateArgumentListInfo *TemplateArgsInfo =
+  D->getTemplateArgsInfo()) {
+VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo->getLAngleLoc());
+VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo->getRAngleLoc());
 
-  if (SemaRef.SubstTemplateArguments(TemplateArgsInfo.arguments(), TemplateArgs,
- VarTemplateArgsInfo))
-return nullptr;
+if (SemaRef.SubstTemplateArguments(TemplateArgsInfo->arguments(),
+   TemplateArgs, VarTemplateArgsInfo))
+  return nullptr;
+  }
 
   // Check that the template argument list is well-formed for this template.
   SmallVector Converted;
@@ -5554,8 +5556,18 @@
 // declaration of the definition.
 TemplateDeclInstantiator Instantiator(*this, Var->getDeclContext(),
   TemplateArgs);
+
+TemplateArgumentListInfo TemplateArgInfo;
+if (const ASTTemplateArgumentListInfo *ArgInfo =
+VarSpec->getTemplateArgsInfo()) {
+  TemplateArgInfo.setLAngleLoc(ArgInfo->getLAngleLoc());
+  TemplateArgInfo.setRAngleLoc(ArgInfo->getRAngleLoc());
+  for (const TemplateArgumentLoc  : ArgInfo->arguments())
+TemplateArgInfo.addArgument(Arg);
+}
+
 Var = cast_or_null(Instantiator.VisitVarTemplateSpecializationDecl(
-VarSpec->getSpecializedTemplate(), Def, VarSpec->getTemplateArgsInfo(),
+VarSpec->getSpecializedTemplate(), Def, TemplateArgInfo,
 VarSpec->getTemplateArgs().asArray(), VarSpec));
 if (Var) {
   llvm::PointerUnion(List->getNumTemplateArgs());
+  void *Mem = C.Allocate(size, alignof(ASTTemplateArgumentListInfo));
+  return new (Mem) ASTTemplateArgumentListInfo(List);
+}
+
 ASTTemplateArgumentListInfo::ASTTemplateArgumentListInfo(
 const TemplateArgumentListInfo ) {
   LAngleLoc = Info.getLAngleLoc();
@@ -628,6 +639,17 @@
 new ([i]) TemplateArgumentLoc(Info[i]);
 }
 
+ASTTemplateArgumentListInfo::ASTTemplateArgumentListInfo(
+const ASTTemplateArgumentListInfo *Info) {
+  LAngleLoc = Info->getLAngleLoc();
+  RAngleLoc = Info->getRAngleLoc();
+  NumTemplateArgs = Info->getNumTemplateArgs();
+
+  TemplateArgumentLoc *ArgBuffer = getTrailingObjects();
+  for (unsigned i = 0; i != NumTemplateArgs; ++i)
+new ([i]) TemplateArgumentLoc((*Info)[i]);
+}
+
 void ASTTemplateKWAndArgsInfo::initializeFrom(
 SourceLocation TemplateKWLoc, const TemplateArgumentListInfo ,
 TemplateArgumentLoc *OutArgArray) {
Index: clang/lib/AST/DeclTemplate.cpp
===
--- clang/lib/AST/DeclTemplate.cpp
+++ clang/lib/AST/DeclTemplate.cpp
@@ -1335,10 +1335,14 @@
 
 void VarTemplateSpecializationDecl::setTemplateArgsInfo(
 const TemplateArgumentListInfo ) {
-  TemplateArgsInfo.setLAngleLoc(ArgsInfo.getLAngleLoc());
-  TemplateArgsInfo.setRAngleLoc(ArgsInfo.getRAngleLoc());
-  for (const TemplateArgumentLoc  : ArgsInfo.arguments())
-TemplateArgsInfo.addArgument(Loc);
+  TemplateArgsInfo =
+  ASTTemplateArgumentListInfo::Create(getASTContext(), ArgsInfo);
+}
+
+void VarTemplateSpecializationDecl::setTemplateArgsInfo(
+const ASTTemplateArgumentListInfo *ArgsInfo) {
+  TemplateArgsInfo =
+  ASTTemplateArgumentListInfo::Create(getASTContext(), ArgsInfo);
 }
 
 //===--===//
Index: clang/lib/AST/ASTImporter.cpp
===
--- clang/lib/AST/ASTImporter.cpp
+++ clang/lib/AST/ASTImporter.cpp
@@ -6018,9 +6018,10 @@
   return TInfoOrErr.takeError();
 
 TemplateArgumentListInfo ToTAInfo;
-if (Error Err = ImportTemplateArgumentListInfo(
-

[PATCH] D126944: [Clang] Fix memory leak due to TemplateArgumentListInfo used in AST node.

2022-06-08 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 435224.
browneee marked an inline comment as done.
browneee added a comment.

Added release notes and fixed variable naming.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126944/new/

https://reviews.llvm.org/D126944

Files:
  clang-tools-extra/clangd/AST.cpp
  clang/docs/ReleaseNotes.rst
  clang/include/clang/AST/DeclTemplate.h
  clang/include/clang/AST/TemplateBase.h
  clang/lib/AST/ASTImporter.cpp
  clang/lib/AST/DeclTemplate.cpp
  clang/lib/AST/TemplateBase.cpp
  clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

Index: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
===
--- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -3801,13 +3801,15 @@
 return nullptr;
 
   // Substitute the current template arguments.
-  const TemplateArgumentListInfo  = D->getTemplateArgsInfo();
-  VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo.getLAngleLoc());
-  VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo.getRAngleLoc());
+  if (const ASTTemplateArgumentListInfo *TemplateArgsInfo =
+  D->getTemplateArgsInfo()) {
+VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo->getLAngleLoc());
+VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo->getRAngleLoc());
 
-  if (SemaRef.SubstTemplateArguments(TemplateArgsInfo.arguments(), TemplateArgs,
- VarTemplateArgsInfo))
-return nullptr;
+if (SemaRef.SubstTemplateArguments(TemplateArgsInfo->arguments(),
+   TemplateArgs, VarTemplateArgsInfo))
+  return nullptr;
+  }
 
   // Check that the template argument list is well-formed for this template.
   SmallVector Converted;
@@ -5554,8 +5556,18 @@
 // declaration of the definition.
 TemplateDeclInstantiator Instantiator(*this, Var->getDeclContext(),
   TemplateArgs);
+
+TemplateArgumentListInfo TemplateArgInfo;
+if (const ASTTemplateArgumentListInfo *ArgInfo =
+VarSpec->getTemplateArgsInfo()) {
+  TemplateArgInfo.setLAngleLoc(ArgInfo->getLAngleLoc());
+  TemplateArgInfo.setRAngleLoc(ArgInfo->getRAngleLoc());
+  for (const TemplateArgumentLoc  : ArgInfo->arguments())
+TemplateArgInfo.addArgument(Arg);
+}
+
 Var = cast_or_null(Instantiator.VisitVarTemplateSpecializationDecl(
-VarSpec->getSpecializedTemplate(), Def, VarSpec->getTemplateArgsInfo(),
+VarSpec->getSpecializedTemplate(), Def, TemplateArgInfo,
 VarSpec->getTemplateArgs().asArray(), VarSpec));
 if (Var) {
   llvm::PointerUnion(List->getNumTemplateArgs());
+  void *Mem = C.Allocate(size, alignof(ASTTemplateArgumentListInfo));
+  return new (Mem) ASTTemplateArgumentListInfo(List);
+}
+
 ASTTemplateArgumentListInfo::ASTTemplateArgumentListInfo(
 const TemplateArgumentListInfo ) {
   LAngleLoc = Info.getLAngleLoc();
@@ -628,6 +639,17 @@
 new ([i]) TemplateArgumentLoc(Info[i]);
 }
 
+ASTTemplateArgumentListInfo::ASTTemplateArgumentListInfo(
+const ASTTemplateArgumentListInfo *Info) {
+  LAngleLoc = Info->getLAngleLoc();
+  RAngleLoc = Info->getRAngleLoc();
+  NumTemplateArgs = Info->getNumTemplateArgs();
+
+  TemplateArgumentLoc *ArgBuffer = getTrailingObjects();
+  for (unsigned i = 0; i != NumTemplateArgs; ++i)
+new ([i]) TemplateArgumentLoc((*Info)[i]);
+}
+
 void ASTTemplateKWAndArgsInfo::initializeFrom(
 SourceLocation TemplateKWLoc, const TemplateArgumentListInfo ,
 TemplateArgumentLoc *OutArgArray) {
Index: clang/lib/AST/DeclTemplate.cpp
===
--- clang/lib/AST/DeclTemplate.cpp
+++ clang/lib/AST/DeclTemplate.cpp
@@ -1335,10 +1335,14 @@
 
 void VarTemplateSpecializationDecl::setTemplateArgsInfo(
 const TemplateArgumentListInfo ) {
-  TemplateArgsInfo.setLAngleLoc(ArgsInfo.getLAngleLoc());
-  TemplateArgsInfo.setRAngleLoc(ArgsInfo.getRAngleLoc());
-  for (const TemplateArgumentLoc  : ArgsInfo.arguments())
-TemplateArgsInfo.addArgument(Loc);
+  TemplateArgsInfo =
+  ASTTemplateArgumentListInfo::Create(getASTContext(), ArgsInfo);
+}
+
+void VarTemplateSpecializationDecl::setTemplateArgsInfo(
+const ASTTemplateArgumentListInfo *ArgsInfo) {
+  TemplateArgsInfo =
+  ASTTemplateArgumentListInfo::Create(getASTContext(), ArgsInfo);
 }
 
 //===--===//
Index: clang/lib/AST/ASTImporter.cpp
===
--- clang/lib/AST/ASTImporter.cpp
+++ clang/lib/AST/ASTImporter.cpp
@@ -6018,9 +6018,10 @@
   return TInfoOrErr.takeError();
 
 TemplateArgumentListInfo ToTAInfo;
-if (Error Err = ImportTemplateArgumentListInfo(
-D->getTemplateArgsInfo(), ToTAInfo))
-  return std::move(Err);
+if (const ASTTemplateArgumentListInfo 

[PATCH] D126944: [Clang] Fix memory leak due to TemplateArgumentListInfo used in AST node.

2022-06-07 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

Bump


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126944/new/

https://reviews.llvm.org/D126944

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D126944: [Clang] Fix memory leak due to TemplateArgumentListInfo used in AST node.

2022-06-03 Thread Andrew via Phabricator via cfe-commits
browneee added inline comments.



Comment at: clang/include/clang/AST/DeclTemplate.h:2776
   void setTemplateArgsInfo(const TemplateArgumentListInfo );
+  void setTemplateArgsInfo(const ASTTemplateArgumentListInfo *ArgsInfo);
 

aaron.ballman wrote:
> Can we make it a prerequisite that the argument has to be nonnull and thus we 
> can pass in a const reference instead?
I tried this initially. Tests broke, so I went with this more conservative 
change. Someone may be able to reduce use of nulls in follow up changes.



Comment at: clang/include/clang/AST/DeclTemplate.h:2778-2779
 
-  const TemplateArgumentListInfo () const {
+  const ASTTemplateArgumentListInfo *getTemplateArgsInfo() const {
 return TemplateArgsInfo;
   }

aaron.ballman wrote:
> Do we want to assert that `TemplateArgsInfo` is nonnull and return a const 
> reference instead?
> 
> Basically, I'm wondering if we can remove a bunch of the pointers and go with 
> references as much as possible; it seems to me that a null pointer is a sign 
> of an error, same as with `TemplateArgs`.
This may be why https://reviews.llvm.org/D126937 is failing tests.

My goals is to fix the memory leak, so the sanitizer buildbots are clean. I 
would agree there is probably more cleanup to do for this member variable after 
https://reviews.llvm.org/D126944 - but fixing the memory leak is the urgent 
thing for me.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126944/new/

https://reviews.llvm.org/D126944

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D126944: [Clang] Fix memory leak due to TemplateArgumentListInfo used in AST node.

2022-06-03 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 434135.
browneee marked 3 inline comments as done.
browneee added a comment.

Formatting.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126944/new/

https://reviews.llvm.org/D126944

Files:
  clang-tools-extra/clangd/AST.cpp
  clang/include/clang/AST/DeclTemplate.h
  clang/include/clang/AST/TemplateBase.h
  clang/lib/AST/ASTImporter.cpp
  clang/lib/AST/DeclTemplate.cpp
  clang/lib/AST/TemplateBase.cpp
  clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

Index: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
===
--- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -3801,13 +3801,15 @@
 return nullptr;
 
   // Substitute the current template arguments.
-  const TemplateArgumentListInfo  = D->getTemplateArgsInfo();
-  VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo.getLAngleLoc());
-  VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo.getRAngleLoc());
+  if (const ASTTemplateArgumentListInfo *TemplateArgsInfo =
+  D->getTemplateArgsInfo()) {
+VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo->getLAngleLoc());
+VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo->getRAngleLoc());
 
-  if (SemaRef.SubstTemplateArguments(TemplateArgsInfo.arguments(), TemplateArgs,
- VarTemplateArgsInfo))
-return nullptr;
+if (SemaRef.SubstTemplateArguments(TemplateArgsInfo->arguments(),
+   TemplateArgs, VarTemplateArgsInfo))
+  return nullptr;
+  }
 
   // Check that the template argument list is well-formed for this template.
   SmallVector Converted;
@@ -5554,8 +5556,18 @@
 // declaration of the definition.
 TemplateDeclInstantiator Instantiator(*this, Var->getDeclContext(),
   TemplateArgs);
+
+TemplateArgumentListInfo TemplateArgInfo;
+if (const ASTTemplateArgumentListInfo *ArgInfo =
+VarSpec->getTemplateArgsInfo()) {
+  TemplateArgInfo.setLAngleLoc(ArgInfo->getLAngleLoc());
+  TemplateArgInfo.setRAngleLoc(ArgInfo->getRAngleLoc());
+  for (const TemplateArgumentLoc  : ArgInfo->arguments())
+TemplateArgInfo.addArgument(arg);
+}
+
 Var = cast_or_null(Instantiator.VisitVarTemplateSpecializationDecl(
-VarSpec->getSpecializedTemplate(), Def, VarSpec->getTemplateArgsInfo(),
+VarSpec->getSpecializedTemplate(), Def, TemplateArgInfo,
 VarSpec->getTemplateArgs().asArray(), VarSpec));
 if (Var) {
   llvm::PointerUnion(List->getNumTemplateArgs());
+  void *Mem = C.Allocate(size, alignof(ASTTemplateArgumentListInfo));
+  return new (Mem) ASTTemplateArgumentListInfo(List);
+}
+
 ASTTemplateArgumentListInfo::ASTTemplateArgumentListInfo(
 const TemplateArgumentListInfo ) {
   LAngleLoc = Info.getLAngleLoc();
@@ -628,6 +639,17 @@
 new ([i]) TemplateArgumentLoc(Info[i]);
 }
 
+ASTTemplateArgumentListInfo::ASTTemplateArgumentListInfo(
+const ASTTemplateArgumentListInfo *Info) {
+  LAngleLoc = Info->getLAngleLoc();
+  RAngleLoc = Info->getRAngleLoc();
+  NumTemplateArgs = Info->getNumTemplateArgs();
+
+  TemplateArgumentLoc *ArgBuffer = getTrailingObjects();
+  for (unsigned i = 0; i != NumTemplateArgs; ++i)
+new ([i]) TemplateArgumentLoc((*Info)[i]);
+}
+
 void ASTTemplateKWAndArgsInfo::initializeFrom(
 SourceLocation TemplateKWLoc, const TemplateArgumentListInfo ,
 TemplateArgumentLoc *OutArgArray) {
Index: clang/lib/AST/DeclTemplate.cpp
===
--- clang/lib/AST/DeclTemplate.cpp
+++ clang/lib/AST/DeclTemplate.cpp
@@ -1335,10 +1335,14 @@
 
 void VarTemplateSpecializationDecl::setTemplateArgsInfo(
 const TemplateArgumentListInfo ) {
-  TemplateArgsInfo.setLAngleLoc(ArgsInfo.getLAngleLoc());
-  TemplateArgsInfo.setRAngleLoc(ArgsInfo.getRAngleLoc());
-  for (const TemplateArgumentLoc  : ArgsInfo.arguments())
-TemplateArgsInfo.addArgument(Loc);
+  TemplateArgsInfo =
+  ASTTemplateArgumentListInfo::Create(getASTContext(), ArgsInfo);
+}
+
+void VarTemplateSpecializationDecl::setTemplateArgsInfo(
+const ASTTemplateArgumentListInfo *ArgsInfo) {
+  TemplateArgsInfo =
+  ASTTemplateArgumentListInfo::Create(getASTContext(), ArgsInfo);
 }
 
 //===--===//
Index: clang/lib/AST/ASTImporter.cpp
===
--- clang/lib/AST/ASTImporter.cpp
+++ clang/lib/AST/ASTImporter.cpp
@@ -6018,9 +6018,10 @@
   return TInfoOrErr.takeError();
 
 TemplateArgumentListInfo ToTAInfo;
-if (Error Err = ImportTemplateArgumentListInfo(
-D->getTemplateArgsInfo(), ToTAInfo))
-  return std::move(Err);
+if (const ASTTemplateArgumentListInfo *Args = D->getTemplateArgsInfo()) {
+  if (Error Err = 

[PATCH] D126944: [Clang] Fix memory leak due to TemplateArgumentListInfo used in AST node.

2022-06-03 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 434075.
browneee added a comment.

Remove auto used under clang/. Format.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126944/new/

https://reviews.llvm.org/D126944

Files:
  clang-tools-extra/clangd/AST.cpp
  clang/include/clang/AST/DeclTemplate.h
  clang/include/clang/AST/TemplateBase.h
  clang/lib/AST/ASTImporter.cpp
  clang/lib/AST/DeclTemplate.cpp
  clang/lib/AST/TemplateBase.cpp
  clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

Index: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
===
--- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -3801,13 +3801,15 @@
 return nullptr;
 
   // Substitute the current template arguments.
-  const TemplateArgumentListInfo  = D->getTemplateArgsInfo();
-  VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo.getLAngleLoc());
-  VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo.getRAngleLoc());
+  if (const ASTTemplateArgumentListInfo *TemplateArgsInfo =
+  D->getTemplateArgsInfo()) {
+VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo->getLAngleLoc());
+VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo->getRAngleLoc());
 
-  if (SemaRef.SubstTemplateArguments(TemplateArgsInfo.arguments(), TemplateArgs,
- VarTemplateArgsInfo))
-return nullptr;
+if (SemaRef.SubstTemplateArguments(TemplateArgsInfo->arguments(),
+   TemplateArgs, VarTemplateArgsInfo))
+  return nullptr;
+  }
 
   // Check that the template argument list is well-formed for this template.
   SmallVector Converted;
@@ -5554,8 +5556,19 @@
 // declaration of the definition.
 TemplateDeclInstantiator Instantiator(*this, Var->getDeclContext(),
   TemplateArgs);
+
+TemplateArgumentListInfo TemplateArgInfo;
+if (const ASTTemplateArgumentListInfo *ArgInfo =
+VarSpec->getTemplateArgsInfo()) {
+  TemplateArgInfo.setLAngleLoc(ArgInfo->getLAngleLoc());
+  TemplateArgInfo.setRAngleLoc(ArgInfo->getRAngleLoc());
+  for (const TemplateArgumentLoc  : ArgInfo->arguments()) {
+TemplateArgInfo.addArgument(arg);
+  }
+}
+
 Var = cast_or_null(Instantiator.VisitVarTemplateSpecializationDecl(
-VarSpec->getSpecializedTemplate(), Def, VarSpec->getTemplateArgsInfo(),
+VarSpec->getSpecializedTemplate(), Def, TemplateArgInfo,
 VarSpec->getTemplateArgs().asArray(), VarSpec));
 if (Var) {
   llvm::PointerUnion(List->getNumTemplateArgs());
+  void *Mem = C.Allocate(size, alignof(ASTTemplateArgumentListInfo));
+  return new (Mem) ASTTemplateArgumentListInfo(List);
+}
+
 ASTTemplateArgumentListInfo::ASTTemplateArgumentListInfo(
 const TemplateArgumentListInfo ) {
   LAngleLoc = Info.getLAngleLoc();
@@ -628,6 +639,17 @@
 new ([i]) TemplateArgumentLoc(Info[i]);
 }
 
+ASTTemplateArgumentListInfo::ASTTemplateArgumentListInfo(
+const ASTTemplateArgumentListInfo *Info) {
+  LAngleLoc = Info->getLAngleLoc();
+  RAngleLoc = Info->getRAngleLoc();
+  NumTemplateArgs = Info->getNumTemplateArgs();
+
+  TemplateArgumentLoc *ArgBuffer = getTrailingObjects();
+  for (unsigned i = 0; i != NumTemplateArgs; ++i)
+new ([i]) TemplateArgumentLoc((*Info)[i]);
+}
+
 void ASTTemplateKWAndArgsInfo::initializeFrom(
 SourceLocation TemplateKWLoc, const TemplateArgumentListInfo ,
 TemplateArgumentLoc *OutArgArray) {
Index: clang/lib/AST/DeclTemplate.cpp
===
--- clang/lib/AST/DeclTemplate.cpp
+++ clang/lib/AST/DeclTemplate.cpp
@@ -1335,10 +1335,14 @@
 
 void VarTemplateSpecializationDecl::setTemplateArgsInfo(
 const TemplateArgumentListInfo ) {
-  TemplateArgsInfo.setLAngleLoc(ArgsInfo.getLAngleLoc());
-  TemplateArgsInfo.setRAngleLoc(ArgsInfo.getRAngleLoc());
-  for (const TemplateArgumentLoc  : ArgsInfo.arguments())
-TemplateArgsInfo.addArgument(Loc);
+  TemplateArgsInfo =
+  ASTTemplateArgumentListInfo::Create(getASTContext(), ArgsInfo);
+}
+
+void VarTemplateSpecializationDecl::setTemplateArgsInfo(
+const ASTTemplateArgumentListInfo *ArgsInfo) {
+  TemplateArgsInfo =
+  ASTTemplateArgumentListInfo::Create(getASTContext(), ArgsInfo);
 }
 
 //===--===//
Index: clang/lib/AST/ASTImporter.cpp
===
--- clang/lib/AST/ASTImporter.cpp
+++ clang/lib/AST/ASTImporter.cpp
@@ -6018,9 +6018,10 @@
   return TInfoOrErr.takeError();
 
 TemplateArgumentListInfo ToTAInfo;
-if (Error Err = ImportTemplateArgumentListInfo(
-D->getTemplateArgsInfo(), ToTAInfo))
-  return std::move(Err);
+if (const ASTTemplateArgumentListInfo *Args = D->getTemplateArgsInfo()) {
+  if (Error Err = 

[PATCH] D126944: [Clang] Fix memory leak due to TemplateArgumentListInfo used in AST node.

2022-06-02 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

More details on the bug: https://reviews.llvm.org/D125802#3551305

Alternative implementation for fix: https://reviews.llvm.org/D126937

I'm not yet sure if these changes are correct.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126944/new/

https://reviews.llvm.org/D126944

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D126937: Fix memleak in VarTemplateSpecializationDecl

2022-06-02 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

I was also looking into fixing this: https://reviews.llvm.org/D126944

I'm not yet sure if my changes are correct.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126937/new/

https://reviews.llvm.org/D126937

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D126944: [Clang] Fix memory leak due to TemplateArgumentListInfo used in AST node.

2022-06-02 Thread Andrew via Phabricator via cfe-commits
browneee created this revision.
Herald added subscribers: usaxena95, kadircet, arphaman, martong.
Herald added a reviewer: shafik.
Herald added a project: All.
browneee requested review of this revision.
Herald added projects: clang, clang-tools-extra.
Herald added a subscriber: cfe-commits.

It looks like the leak is rooted at the allocation here:
https://github.com/llvm/llvm-project/blob/1a155ee7de3b62a2fabee86fb470a1554fadc54d/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp#L3857

The VarTemplateSpecializationDecl is allocated using placement new which uses 
the AST structure for ownership: 
https://github.com/llvm/llvm-project/blob/1a155ee7de3b62a2fabee86fb470a1554fadc54d/clang/lib/AST/DeclBase.cpp#L99

The problem is the TemplateArgumentListInfo inside 
https://github.com/llvm/llvm-project/blob/1a155ee7de3b62a2fabee86fb470a1554fadc54d/clang/include/clang/AST/DeclTemplate.h#L2721
This object contains a vector which does not use placement new: 
https://github.com/llvm/llvm-project/blob/1a155ee7de3b62a2fabee86fb470a1554fadc54d/clang/include/clang/AST/TemplateBase.h#L564

Apparently ASTTemplateArgumentListInfo should be used instead 
https://github.com/llvm/llvm-project/blob/1a155ee7de3b62a2fabee86fb470a1554fadc54d/clang/include/clang/AST/TemplateBase.h#L575

https://reviews.llvm.org/D125802#3551305


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D126944

Files:
  clang-tools-extra/clangd/AST.cpp
  clang/include/clang/AST/DeclTemplate.h
  clang/include/clang/AST/TemplateBase.h
  clang/lib/AST/ASTImporter.cpp
  clang/lib/AST/DeclTemplate.cpp
  clang/lib/AST/TemplateBase.cpp
  clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

Index: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
===
--- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -3801,13 +3801,14 @@
 return nullptr;
 
   // Substitute the current template arguments.
-  const TemplateArgumentListInfo  = D->getTemplateArgsInfo();
-  VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo.getLAngleLoc());
-  VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo.getRAngleLoc());
+  if (const ASTTemplateArgumentListInfo *TemplateArgsInfo = D->getTemplateArgsInfo()) {
+VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo->getLAngleLoc());
+VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo->getRAngleLoc());
 
-  if (SemaRef.SubstTemplateArguments(TemplateArgsInfo.arguments(), TemplateArgs,
- VarTemplateArgsInfo))
-return nullptr;
+if (SemaRef.SubstTemplateArguments(TemplateArgsInfo->arguments(),
+   TemplateArgs, VarTemplateArgsInfo))
+  return nullptr;
+  }
 
   // Check that the template argument list is well-formed for this template.
   SmallVector Converted;
@@ -5554,8 +,18 @@
 // declaration of the definition.
 TemplateDeclInstantiator Instantiator(*this, Var->getDeclContext(),
   TemplateArgs);
+
+TemplateArgumentListInfo TemplateArgInfo;
+if (auto *ArgInfo = VarSpec->getTemplateArgsInfo()) {
+  TemplateArgInfo.setLAngleLoc(ArgInfo->getLAngleLoc());
+  TemplateArgInfo.setRAngleLoc(ArgInfo->getRAngleLoc());
+  for (const TemplateArgumentLoc& arg : ArgInfo->arguments()) {
+TemplateArgInfo.addArgument(arg);
+  }
+}
+
 Var = cast_or_null(Instantiator.VisitVarTemplateSpecializationDecl(
-VarSpec->getSpecializedTemplate(), Def, VarSpec->getTemplateArgsInfo(),
+VarSpec->getSpecializedTemplate(), Def, TemplateArgInfo,
 VarSpec->getTemplateArgs().asArray(), VarSpec));
 if (Var) {
   llvm::PointerUnion(List->getNumTemplateArgs());
+  void *Mem = C.Allocate(size, alignof(ASTTemplateArgumentListInfo));
+  return new (Mem) ASTTemplateArgumentListInfo(List);
+}
+
 ASTTemplateArgumentListInfo::ASTTemplateArgumentListInfo(
 const TemplateArgumentListInfo ) {
   LAngleLoc = Info.getLAngleLoc();
@@ -628,6 +637,17 @@
 new ([i]) TemplateArgumentLoc(Info[i]);
 }
 
+ASTTemplateArgumentListInfo::ASTTemplateArgumentListInfo(
+const ASTTemplateArgumentListInfo *Info) {
+  LAngleLoc = Info->getLAngleLoc();
+  RAngleLoc = Info->getRAngleLoc();
+  NumTemplateArgs = Info->getNumTemplateArgs();
+
+  TemplateArgumentLoc *ArgBuffer = getTrailingObjects();
+  for (unsigned i = 0; i != NumTemplateArgs; ++i)
+new ([i]) TemplateArgumentLoc((*Info)[i]);
+}
+
 void ASTTemplateKWAndArgsInfo::initializeFrom(
 SourceLocation TemplateKWLoc, const TemplateArgumentListInfo ,
 TemplateArgumentLoc *OutArgArray) {
Index: clang/lib/AST/DeclTemplate.cpp
===
--- clang/lib/AST/DeclTemplate.cpp
+++ clang/lib/AST/DeclTemplate.cpp
@@ -1335,10 +1335,14 @@
 
 void VarTemplateSpecializationDecl::setTemplateArgsInfo(
 const TemplateArgumentListInfo ) {
-  

[PATCH] D125802: Fix std::has_unique_object_representations for _BitInt types with padding bits

2022-06-01 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

It looks like the leak is rooted at the allocation here:
https://github.com/llvm/llvm-project/blob/1a155ee7de3b62a2fabee86fb470a1554fadc54d/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp#L3857

The VarTemplateSpecializationDecl is allocated using placement new which uses 
the AST structure for ownership: 
https://github.com/llvm/llvm-project/blob/1a155ee7de3b62a2fabee86fb470a1554fadc54d/clang/lib/AST/DeclBase.cpp#L99

The problem is the TemplateArgumentListInfo inside 
https://github.com/llvm/llvm-project/blob/1a155ee7de3b62a2fabee86fb470a1554fadc54d/clang/include/clang/AST/DeclTemplate.h#L2721
This object contains a vector which does not use placement new: 
https://github.com/llvm/llvm-project/blob/1a155ee7de3b62a2fabee86fb470a1554fadc54d/clang/include/clang/AST/TemplateBase.h#L564

Apparently ASTTemplateArgumentListInfo 

 should be used instead 
https://github.com/llvm/llvm-project/blob/1a155ee7de3b62a2fabee86fb470a1554fadc54d/clang/include/clang/AST/TemplateBase.h#L575


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125802/new/

https://reviews.llvm.org/D125802

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125802: Fix std::has_unique_object_representations for _BitInt types with padding bits

2022-06-01 Thread Andrew via Phabricator via cfe-commits
browneee added inline comments.



Comment at: clang/test/SemaCXX/has_unique_object_reps_bitint.cpp:1
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify 
-std=c++17 -Wno-bitfield-width %s
+//  expected-no-diagnostics

This unit test appears to trigger a memory leak:

https://lab.llvm.org/buildbot/#/builders/5/builds/24359/steps/15/logs/stdio

```
FAIL: Clang :: SemaCXX/has_unique_object_reps_bitint.cpp (13615 of 66177)
 TEST 'Clang :: SemaCXX/has_unique_object_reps_bitint.cpp' 
FAILED 
Script:
--
: 'RUN: at line 1';   
/b/sanitizer-x86_64-linux-fast/build/llvm_build_asan/bin/clang -cc1 
-internal-isystem 
/b/sanitizer-x86_64-linux-fast/build/llvm_build_asan/lib/clang/15.0.0/include 
-nostdsysteminc -triple x86_64-unknown-unknown -fsyntax-only -verify -std=c++17 
-Wno-bitfield-width 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/test/SemaCXX/has_unique_object_reps_bitint.cpp
--
Exit Code: 1
Command Output (stderr):
--
=
==22556==ERROR: LeakSanitizer: detected memory leaks
Direct leak of 1632 byte(s) in 3 object(s) allocated from:
#0 0x55cc84e8d6be in malloc 
/b/s/w/ir/cache/builder/src/third_party/llvm/compiler-rt/lib/asan/asan_malloc_linux.cpp:69:3
#1 0x55cc8c0a417c in safe_malloc 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/include/llvm/Support/MemAlloc.h:26:18
#2 0x55cc8c0a417c in llvm::SmallVectorBase::grow_pod(void*, 
unsigned long, unsigned long) 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/lib/Support/SmallVector.cpp:126:15
#3 0x55cc94342e34 in grow_pod 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/include/llvm/ADT/SmallVector.h:120:11
#4 0x55cc94342e34 in grow 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/include/llvm/ADT/SmallVector.h:506:41
#5 0x55cc94342e34 in 
reserveForParamAndGetAddressImpl > 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/include/llvm/ADT/SmallVector.h:225:11
#6 0x55cc94342e34 in reserveForParamAndGetAddress 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/include/llvm/ADT/SmallVector.h:511:12
#7 0x55cc94342e34 in push_back 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/include/llvm/ADT/SmallVector.h:543:23
#8 0x55cc94342e34 in addArgument 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/include/clang/AST/TemplateBase.h:604:15
#9 0x55cc94342e34 in 
clang::VarTemplateSpecializationDecl::setTemplateArgsInfo(clang::TemplateArgumentListInfo
 const&) 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/AST/DeclTemplate.cpp:1341:22
#10 0x55cc937d5b66 in 
clang::TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(clang::VarTemplateDecl*,
 clang::VarDecl*, clang::TemplateArgumentListInfo const&, 
llvm::ArrayRef, clang::VarTemplateSpecializationDecl*) 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp:3860:8
#11 0x55cc937e1dd4 in 
clang::Sema::BuildVarTemplateInstantiation(clang::VarTemplateDecl*, 
clang::VarDecl*, clang::TemplateArgumentList const&, 
clang::TemplateArgumentListInfo const&, 
llvm::SmallVectorImpl&, clang::SourceLocation, 
llvm::SmallVector*, 
clang::LocalInstantiationScope*) 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp:5135:20
#12 0x55cc933d777d in 
clang::Sema::CheckVarTemplateId(clang::VarTemplateDecl*, clang::SourceLocation, 
clang::SourceLocation, clang::TemplateArgumentListInfo const&) 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Sema/SemaTemplate.cpp:4621:41
#13 0x55cc933daddc in CheckVarTemplateId 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Sema/SemaTemplate.cpp:4657:21
#14 0x55cc933daddc in clang::Sema::BuildTemplateIdExpr(clang::CXXScopeSpec 
const&, clang::SourceLocation, clang::LookupResult&, bool, 
clang::TemplateArgumentListInfo const*) 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Sema/SemaTemplate.cpp:4748:22
#15 0x55cc928ffef3 in clang::Sema::ActOnIdExpression(clang::Scope*, 
clang::CXXScopeSpec&, clang::SourceLocation, clang::UnqualifiedId&, bool, bool, 
clang::CorrectionCandidateCallback*, bool, clang::Token*) 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Sema/SemaExpr.cpp:2728:12
#16 0x55cc91d0e988 in 
clang::Parser::tryParseCXXIdExpression(clang::CXXScopeSpec&, bool, 
clang::Token&) 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Parse/ParseExprCXX.cpp:610:17
#17 0x55cc91d124d9 in clang::Parser::ParseCXXIdExpression(bool) 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Parse/ParseExprCXX.cpp:676:7
#18 0x55cc91cdb510 in 
clang::Parser::ParseCastExpression(clang::Parser::CastParseKind, bool, bool&, 
clang::Parser::TypeCastState, bool, bool*) 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Parse/ParseExpr.cpp:1626:11

[PATCH] D124594: [LegacyPM] Remove DataFlowSanitizerLegacyPass

2022-04-27 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

I know this is currently used in at least one other place:
https://github.com/tensorflow/tensorflow/blob/dde12a690476650573a0ef3f8f681271a2016224/tensorflow/compiler/xla/service/cpu/compiler_functor.cc#L111


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124594/new/

https://reviews.llvm.org/D124594

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D119621: [SanitizerCoverage] Add instrumentation callbacks for FP cmp instructions

2022-03-29 Thread Andrew via Phabricator via cfe-commits
browneee added inline comments.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:2515-2540
 SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp, void) {}
 SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp1, void) {}
 SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp2, void) {}
 SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp4, void) {}
 SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp8, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp_fp2,
+ void) {}

This (including the existing code) 
[wouldn't](https://github.com/llvm/llvm-project/blob/8f66f1371981bda1af1ca43d505e1bc5836b3e36/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp#L2802
) work if 
[dfsan-track-origins](https://github.com/llvm/llvm-project/blob/8f66f1371981bda1af1ca43d505e1bc5836b3e36/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp#L235)=1.

I guess this is something no one has tried, so probably not important? I'm also 
unsure where dfsan + sancov is used?



Comment at: compiler-rt/lib/fuzzer/dataflow/DataFlowCallbacks.cpp:92
 
 } // extern "C"

Note that many of the lib/fuzzer/dataflow/ [tests were 
disabled](https://github.com/llvm/llvm-project/commit/070556237e29e8a804fbec1d416d431239384ab0),
 as no one was using this... and I think they are still disabled.



CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D119621/new/

https://reviews.llvm.org/D119621

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D118199: [AArch64] ACLE feature macro for Armv8.8-A MOPS

2022-02-08 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

Hi,

This change appears to break on of the buildbots (clang-ppc64le-linux-lnt 
):
https://lab.llvm.org/buildbot/#/builders/105/builds/21233/steps/7/logs/FAIL__Clang__aarch64-mops_c

   TEST 'Clang :: CodeGen/aarch64-mops.c' FAILED 

  Script:
  --
  : 'RUN: at line 3';   
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/clang -cc1 
-internal-isystem 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/lib/clang/15.0.0/include
 -nostdsysteminc -triple aarch64-arm-unknown-eabi -target-feature +mops 
-target-feature +mte -S -emit-llvm -o - 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
  | 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/FileCheck 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
  : 'RUN: at line 4';   
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/clang -cc1 
-internal-isystem 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/lib/clang/15.0.0/include
 -nostdsysteminc -triple aarch64-arm-unknown-eabi -verify 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
  : 'RUN: at line 5';   
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/clang 
-target aarch64-arm-none-eabi -march=armv8.7-a+mops+memtag  -S 
-emit-llvm 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
 -o - | 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/FileCheck 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
  : 'RUN: at line 6';   
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/clang 
-target aarch64-arm-none-eabi -march=armv8.8-a+memtag   -S 
-emit-llvm 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
 -o - | 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/FileCheck 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
  : 'RUN: at line 7';   
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/clang 
-target aarch64-arm-none-eabi -march=armv9.2-a+mops+memtag  -S 
-emit-llvm 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
 -o - | 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/FileCheck 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
  : 'RUN: at line 8';   
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/clang 
-target aarch64-arm-none-eabi -march=armv9.3-a+memtag   -S 
-emit-llvm 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
 -o - | 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/FileCheck 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
  : 'RUN: at line 9';   
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/clang 
-target aarch64-arm-none-eabi -march=armv8.7-a  -Xclang -verify -S 
-emit-llvm 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
 -o -
  : 'RUN: at line 10';   
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/clang 
-target aarch64-arm-none-eabi -march=armv8.7-a+mops -Xclang -verify -S 
-emit-llvm 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
 -o -
  : 'RUN: at line 11';   
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/clang 
-target aarch64-arm-none-eabi -march=armv8.8-a  -Xclang -verify -S 
-emit-llvm 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
 -o -
  : 'RUN: at line 12';   
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/clang 
-target aarch64-arm-none-eabi -march=armv9.2-a  -Xclang -verify -S 
-emit-llvm 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
 -o -
  : 'RUN: at line 13';   
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/clang 
-target aarch64-arm-none-eabi -march=armv9.2-a+mops -Xclang -verify -S 
-emit-llvm 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
 -o -
  : 'RUN: at line 14';   
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/stage1/bin/clang 
-target aarch64-arm-none-eabi -march=armv9.3-a  -Xclang -verify -S 
-emit-llvm 
/home/buildbots/ppc64le-clang-lnt-test/clang-ppc64le-lnt/llvm/clang/test/CodeGen/aarch64-mops.c
 -o -
  --
  Exit Code: 2
  Command Output (stderr):
  --
  In file included from 

[PATCH] D117177: [NFC][DFSan] Update DataFlowSanitizer user docs for -dfsan-conditional-callbacks, added in https://reviews.llvm.org/D116207

2022-01-13 Thread Andrew via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG529f098789d3: [NFC][DFSan] Update DataFlowSanitizer user 
docs for -dfsan-conditional… (authored by browneee).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117177/new/

https://reviews.llvm.org/D117177

Files:
  clang/docs/DataFlowSanitizer.rst


Index: clang/docs/DataFlowSanitizer.rst
===
--- clang/docs/DataFlowSanitizer.rst
+++ clang/docs/DataFlowSanitizer.rst
@@ -214,6 +214,25 @@
   void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
   void __dfsan_cmp_callback(dfsan_label CombinedLabel);
 
+* ``-dfsan-conditional-callbacks`` -- An experimental feature that inserts
+  callbacks for control flow conditional expressions.
+  This can be used to find where tainted values can control execution.
+
+  In addition to this compilation flag, a callback handler must be registered
+  using ``dfsan_set_conditional_callback(my_callback);``, where my_callback is
+  a function with a signature matching
+  ``void my_callback(dfsan_label l, dfsan_origin o);``.
+  This signature is the same when origin tracking is disabled - in this case
+  the dfsan_origin passed in it will always be 0.
+
+  The callback will only be called when a tainted value reaches a conditional
+  expression for control flow (such as an if's condition).
+  The callback will be skipped for conditional expressions inside signal
+  handlers, as this is prone to deadlock. Tainted values used in conditional
+  expressions inside signal handlers will instead be aggregated via bitwise
+  or, and can be accessed using
+  ``dfsan_label dfsan_get_labels_in_signal_conditional();``.
+
 * ``-dfsan-track-origins`` -- Controls how to track origins. When its value is
   0, the runtime does not track origins. When its value is 1, the runtime 
tracks
   origins at memory store operations. When its value is 2, the runtime tracks


Index: clang/docs/DataFlowSanitizer.rst
===
--- clang/docs/DataFlowSanitizer.rst
+++ clang/docs/DataFlowSanitizer.rst
@@ -214,6 +214,25 @@
   void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
   void __dfsan_cmp_callback(dfsan_label CombinedLabel);
 
+* ``-dfsan-conditional-callbacks`` -- An experimental feature that inserts
+  callbacks for control flow conditional expressions.
+  This can be used to find where tainted values can control execution.
+
+  In addition to this compilation flag, a callback handler must be registered
+  using ``dfsan_set_conditional_callback(my_callback);``, where my_callback is
+  a function with a signature matching
+  ``void my_callback(dfsan_label l, dfsan_origin o);``.
+  This signature is the same when origin tracking is disabled - in this case
+  the dfsan_origin passed in it will always be 0.
+
+  The callback will only be called when a tainted value reaches a conditional
+  expression for control flow (such as an if's condition).
+  The callback will be skipped for conditional expressions inside signal
+  handlers, as this is prone to deadlock. Tainted values used in conditional
+  expressions inside signal handlers will instead be aggregated via bitwise
+  or, and can be accessed using
+  ``dfsan_label dfsan_get_labels_in_signal_conditional();``.
+
 * ``-dfsan-track-origins`` -- Controls how to track origins. When its value is
   0, the runtime does not track origins. When its value is 1, the runtime tracks
   origins at memory store operations. When its value is 2, the runtime tracks
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117177: [NFC][DFSan] Update DataFlowSanitizer user docs for -dfsan-conditional-callbacks, added in https://reviews.llvm.org/D116207

2022-01-12 Thread Andrew via Phabricator via cfe-commits
browneee created this revision.
browneee added a reviewer: morehouse.
browneee requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D117177

Files:
  clang/docs/DataFlowSanitizer.rst


Index: clang/docs/DataFlowSanitizer.rst
===
--- clang/docs/DataFlowSanitizer.rst
+++ clang/docs/DataFlowSanitizer.rst
@@ -214,6 +214,25 @@
   void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
   void __dfsan_cmp_callback(dfsan_label CombinedLabel);
 
+* ``-dfsan-conditional-callbacks`` -- An experimental feature that inserts
+  callbacks for control flow conditional expressions.
+  This can be used to find where tainted values can control execution.
+
+  In addition to this compilation flag, a callback handler must be registered
+  using ``dfsan_set_conditional_callback(my_callback);``, where my_callback is
+  a function with a signature matching
+  ``void my_callback(dfsan_label l, dfsan_origin o);``.
+  This signature is the same when origin tracking is disabled - in this case
+  the dfsan_origin passed in it will always be 0.
+
+  The callback will only be called when a tainted value reaches a conditional
+  expression for control flow (such as an if's condition).
+  The callback will be skipped for conditional expressions inside signal
+  handlers, as this is prone to deadlock. Tainted values used in conditional
+  expressions inside signal handlers will instead be aggregated via bitwise
+  or, and can be accessed using
+  ``dfsan_label dfsan_get_labels_in_signal_conditional();``.
+
 * ``-dfsan-track-origins`` -- Controls how to track origins. When its value is
   0, the runtime does not track origins. When its value is 1, the runtime 
tracks
   origins at memory store operations. When its value is 2, the runtime tracks


Index: clang/docs/DataFlowSanitizer.rst
===
--- clang/docs/DataFlowSanitizer.rst
+++ clang/docs/DataFlowSanitizer.rst
@@ -214,6 +214,25 @@
   void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
   void __dfsan_cmp_callback(dfsan_label CombinedLabel);
 
+* ``-dfsan-conditional-callbacks`` -- An experimental feature that inserts
+  callbacks for control flow conditional expressions.
+  This can be used to find where tainted values can control execution.
+
+  In addition to this compilation flag, a callback handler must be registered
+  using ``dfsan_set_conditional_callback(my_callback);``, where my_callback is
+  a function with a signature matching
+  ``void my_callback(dfsan_label l, dfsan_origin o);``.
+  This signature is the same when origin tracking is disabled - in this case
+  the dfsan_origin passed in it will always be 0.
+
+  The callback will only be called when a tainted value reaches a conditional
+  expression for control flow (such as an if's condition).
+  The callback will be skipped for conditional expressions inside signal
+  handlers, as this is prone to deadlock. Tainted values used in conditional
+  expressions inside signal handlers will instead be aggregated via bitwise
+  or, and can be accessed using
+  ``dfsan_label dfsan_get_labels_in_signal_conditional();``.
+
 * ``-dfsan-track-origins`` -- Controls how to track origins. When its value is
   0, the runtime does not track origins. When its value is 1, the runtime tracks
   origins at memory store operations. When its value is 2, the runtime tracks
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D103745: [dfsan] Add full fast8 support

2021-10-28 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

Sorry, I was thinking/describing fast-16 to fast-8.

The same reasons apply for legacy mode (2^16 labels) to fast-8.

- simplify code (smaller codesize overhead)
- share more code and shadow/origin memory layout with MSan (which also has a 
1:1 shadow)
- reduce memory overhead

In this case it also reduces runtime overhead.

Our experience was legacy mode did not scale to large programs. The choice we 
saw was do multiple runs with fast-8, or be unable to run with legacy.
Using fast-8 we can successfully run dfsan with large programs.

Having said this, we also fixed many other issues (e.g. memory leaks) to make 
fast-8 scale. So our experience comparing legacy and fast-8 mode is not quite 
fair.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103745/new/

https://reviews.llvm.org/D103745

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D103745: [dfsan] Add full fast8 support

2021-10-28 Thread Andrew via Phabricator via cfe-commits
browneee added a subscriber: kcc.
browneee added a comment.

In D103745#3094831 , @twoh wrote:

> @gbalats @stephan.yichao.zhao Hello sorry for the late comment but I wonder 
> what was the reason behind this change (changing taint label representation 
> from 16-bit to 8-bit-fast only). Do we have any discussion thread from 
> llvm-dev regarding this? Thanks!

Hi twoh,

The reasons are:

- simplify code (smaller codesize overhead)
- share more code and shadow/origin memory layout with MSan (which also has a 
1:1 shadow)
- reduce memory overhead

The only disadvantage is you need 2x executions to cover the same number of 
taint labels, but we already need do multiple executions.

There was no discussion thread, but we discussed with @kcc internally.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103745/new/

https://reviews.llvm.org/D103745

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109847: [DFSan] Add force_zero_label abilist option to DFSan. This can be used as a work-around for overtainting.

2021-09-17 Thread Andrew via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGc533b88a6dc9: [DFSan] Add force_zero_label abilist option to 
DFSan. This can be used as a… (authored by browneee).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109847/new/

https://reviews.llvm.org/D109847

Files:
  clang/docs/DataFlowSanitizer.rst
  compiler-rt/test/dfsan/Inputs/flags_abilist.txt
  compiler-rt/test/dfsan/force_zero.c
  llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
  llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
  llvm/test/Instrumentation/DataFlowSanitizer/force_zero.ll

Index: llvm/test/Instrumentation/DataFlowSanitizer/force_zero.ll
===
--- /dev/null
+++ llvm/test/Instrumentation/DataFlowSanitizer/force_zero.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -dfsan -dfsan-abilist=%S/Inputs/abilist.txt -S | FileCheck %s -DSHADOW_XOR_MASK=87960930222080
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+define i32 @function_to_force_zero(i32 %0, i32* %1) {
+  ; CHECK-LABEL: define i32 @function_to_force_zero.dfsan
+  ; CHECK: %[[#SHADOW_XOR:]] = xor i64 {{.*}}, [[SHADOW_XOR_MASK]]
+  ; CHECK: %[[#SHADOW_PTR:]] = inttoptr i64 %[[#SHADOW_XOR]] to i8*
+  ; CHECK: %[[#SHADOW_BITCAST:]] = bitcast i8* %[[#SHADOW_PTR]] to i32*
+  ; CHECK: store i32 0, i32* %[[#SHADOW_BITCAST]]
+  ; CHECK: store i32 %{{.*}}
+  store i32 %0, i32* %1, align 4
+  ; CHECK: store i8 0, {{.*}}@__dfsan_retval_tls
+  ; CHECK: ret i32
+  ret i32 %0
+}
Index: llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
===
--- llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
+++ llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
@@ -8,3 +8,5 @@
 fun:custom*=custom
 
 fun:uninstrumented*=uninstrumented
+
+fun:function_to_force_zero=force_zero_labels
Index: llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
===
--- llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -144,8 +144,17 @@
 // to the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains
 // additional annotations for those functions, a call to one of those functions
 // will produce a warning message, as the labelling behaviour of the function is
-// unknown.  The other supported annotations are "functional" and "discard",
-// which are described below under DataFlowSanitizer::WrapperKind.
+// unknown. The other supported annotations for uninstrumented functions are
+// "functional" and "discard", which are described below under
+// DataFlowSanitizer::WrapperKind.
+// Functions will often be labelled with both "uninstrumented" and one of
+// "functional" or "discard". This will leave the function unchanged by this
+// pass, and create a wrapper function that will call the original.
+//
+// Instrumented functions can also be annotated as "force_zero_labels", which
+// will make all shadow and return values set zero labels.
+// Functions should never be labelled with both "force_zero_labels" and
+// "uninstrumented" or any of the unistrumented wrapper kinds.
 static cl::list ClABIListFiles(
 "dfsan-abilist",
 cl::desc("File listing native ABI functions and how the pass treats them"),
@@ -469,6 +478,7 @@
   getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos);
   bool isInstrumented(const Function *F);
   bool isInstrumented(const GlobalAlias *GA);
+  bool isForceZeroLabels(const Function *F);
   FunctionType *getArgsFunctionType(FunctionType *T);
   FunctionType *getTrampolineFunctionType(FunctionType *T);
   TransformedFunction getCustomFunctionType(FunctionType *T);
@@ -541,6 +551,7 @@
   DominatorTree DT;
   DataFlowSanitizer::InstrumentedABI IA;
   bool IsNativeABI;
+  bool IsForceZeroLabels;
   AllocaInst *LabelReturnAlloca = nullptr;
   AllocaInst *OriginReturnAlloca = nullptr;
   DenseMap ValShadowMap;
@@ -571,8 +582,10 @@
   DenseMap CachedCollapsedShadows;
   DenseMap> ShadowElements;
 
-  DFSanFunction(DataFlowSanitizer , Function *F, bool IsNativeABI)
-  : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) {
+  DFSanFunction(DataFlowSanitizer , Function *F, bool IsNativeABI,
+bool IsForceZeroLabels)
+  : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI),
+IsForceZeroLabels(IsForceZeroLabels) {
 DT.recalculate(*F);
   }
 
@@ -1107,6 +1120,10 @@
   return !ABIList.isIn(*GA, "uninstrumented");
 }
 
+bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
+  return ABIList.isIn(*F, "force_zero_labels");
+}
+
 DataFlowSanitizer::InstrumentedABI 

[PATCH] D109847: [DFSan] Add force_zero_label abilist option to DFSan. This can be used as a work-around for overtainting.

2021-09-17 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 373274.
browneee marked 3 inline comments as done.
browneee added a comment.

Update comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109847/new/

https://reviews.llvm.org/D109847

Files:
  clang/docs/DataFlowSanitizer.rst
  compiler-rt/test/dfsan/Inputs/flags_abilist.txt
  compiler-rt/test/dfsan/force_zero.c
  llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
  llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
  llvm/test/Instrumentation/DataFlowSanitizer/force_zero.ll

Index: llvm/test/Instrumentation/DataFlowSanitizer/force_zero.ll
===
--- /dev/null
+++ llvm/test/Instrumentation/DataFlowSanitizer/force_zero.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -dfsan -dfsan-abilist=%S/Inputs/abilist.txt -S | FileCheck %s -DSHADOW_XOR_MASK=87960930222080
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+define i32 @function_to_force_zero(i32 %0, i32* %1) {
+  ; CHECK-LABEL: define i32 @function_to_force_zero.dfsan
+  ; CHECK: %[[#SHADOW_XOR:]] = xor i64 {{.*}}, [[SHADOW_XOR_MASK]]
+  ; CHECK: %[[#SHADOW_PTR:]] = inttoptr i64 %[[#SHADOW_XOR]] to i8*
+  ; CHECK: %[[#SHADOW_BITCAST:]] = bitcast i8* %[[#SHADOW_PTR]] to i32*
+  ; CHECK: store i32 0, i32* %[[#SHADOW_BITCAST]]
+  ; CHECK: store i32 %{{.*}}
+  store i32 %0, i32* %1, align 4
+  ; CHECK: store i8 0, {{.*}}@__dfsan_retval_tls
+  ; CHECK: ret i32
+  ret i32 %0
+}
Index: llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
===
--- llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
+++ llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
@@ -8,3 +8,5 @@
 fun:custom*=custom
 
 fun:uninstrumented*=uninstrumented
+
+fun:function_to_force_zero=force_zero_labels
Index: llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
===
--- llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -144,8 +144,17 @@
 // to the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains
 // additional annotations for those functions, a call to one of those functions
 // will produce a warning message, as the labelling behaviour of the function is
-// unknown.  The other supported annotations are "functional" and "discard",
-// which are described below under DataFlowSanitizer::WrapperKind.
+// unknown. The other supported annotations for uninstrumented functions are
+// "functional" and "discard", which are described below under
+// DataFlowSanitizer::WrapperKind.
+// Functions will often be labelled with both "uninstrumented" and one of
+// "functional" or "discard". This will leave the function unchanged by this
+// pass, and create a wrapper function that will call the original.
+//
+// Instrumented functions can also be annotated as "force_zero_labels", which
+// will make all shadow and return values set zero labels.
+// Functions should never be labelled with both "force_zero_labels" and
+// "uninstrumented" or any of the unistrumented wrapper kinds.
 static cl::list ClABIListFiles(
 "dfsan-abilist",
 cl::desc("File listing native ABI functions and how the pass treats them"),
@@ -469,6 +478,7 @@
   getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos);
   bool isInstrumented(const Function *F);
   bool isInstrumented(const GlobalAlias *GA);
+  bool isForceZeroLabels(const Function *F);
   FunctionType *getArgsFunctionType(FunctionType *T);
   FunctionType *getTrampolineFunctionType(FunctionType *T);
   TransformedFunction getCustomFunctionType(FunctionType *T);
@@ -541,6 +551,7 @@
   DominatorTree DT;
   DataFlowSanitizer::InstrumentedABI IA;
   bool IsNativeABI;
+  bool IsForceZeroLabels;
   AllocaInst *LabelReturnAlloca = nullptr;
   AllocaInst *OriginReturnAlloca = nullptr;
   DenseMap ValShadowMap;
@@ -571,8 +582,10 @@
   DenseMap CachedCollapsedShadows;
   DenseMap> ShadowElements;
 
-  DFSanFunction(DataFlowSanitizer , Function *F, bool IsNativeABI)
-  : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) {
+  DFSanFunction(DataFlowSanitizer , Function *F, bool IsNativeABI,
+bool IsForceZeroLabels)
+  : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI),
+IsForceZeroLabels(IsForceZeroLabels) {
 DT.recalculate(*F);
   }
 
@@ -1107,6 +1120,10 @@
   return !ABIList.isIn(*GA, "uninstrumented");
 }
 
+bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
+  return ABIList.isIn(*F, "force_zero_labels");
+}
+
 DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
   return ClArgsABI ? IA_Args : IA_TLS;
 }
@@ -1197,7 +1214,8 @@
 
 // F is called by a 

[PATCH] D109847: [DFSan] Add force_zero_label abilist option to DFSan. This can be used as a work-around for overtainting.

2021-09-17 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 373271.
browneee added a comment.

Rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109847/new/

https://reviews.llvm.org/D109847

Files:
  clang/docs/DataFlowSanitizer.rst
  compiler-rt/test/dfsan/Inputs/flags_abilist.txt
  compiler-rt/test/dfsan/force_zero.c
  llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
  llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
  llvm/test/Instrumentation/DataFlowSanitizer/force_zero.ll

Index: llvm/test/Instrumentation/DataFlowSanitizer/force_zero.ll
===
--- /dev/null
+++ llvm/test/Instrumentation/DataFlowSanitizer/force_zero.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -dfsan -dfsan-abilist=%S/Inputs/abilist.txt -S | FileCheck %s -DSHADOW_XOR_MASK=87960930222080
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+define i32 @function_to_force_zero(i32 %0, i32* %1) {
+  ; CHECK-LABEL: define i32 @function_to_force_zero.dfsan
+  ; CHECK: %[[#SHADOW_XOR:]] = xor i64 {{.*}}, [[SHADOW_XOR_MASK]]
+  ; CHECK: %[[#SHADOW_PTR:]] = inttoptr i64 %[[#SHADOW_XOR]] to i8*
+  ; CHECK: %[[#SHADOW_BITCAST:]] = bitcast i8* %[[#SHADOW_PTR]] to i32*
+  ; CHECK: store i32 0, i32* %[[#SHADOW_BITCAST]]
+  ; CHECK: store i32 %{{.*}}
+  store i32 %0, i32* %1, align 4
+  ; CHECK: store i8 0, {{.*}}@__dfsan_retval_tls
+  ; CHECK: ret i32
+  ret i32 %0
+}
Index: llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
===
--- llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
+++ llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
@@ -8,3 +8,5 @@
 fun:custom*=custom
 
 fun:uninstrumented*=uninstrumented
+
+fun:function_to_force_zero=force_zero_labels
Index: llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
===
--- llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -144,8 +144,17 @@
 // to the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains
 // additional annotations for those functions, a call to one of those functions
 // will produce a warning message, as the labelling behaviour of the function is
-// unknown.  The other supported annotations are "functional" and "discard",
-// which are described below under DataFlowSanitizer::WrapperKind.
+// unknown. The other supported annotations for uninstrumented functions are
+// "functional" and "discard", which are described below under
+// DataFlowSanitizer::WrapperKind.
+// Functions will often be labelled with both "uninstrumented" and one of
+// "functional" or "discard". This will leave the function unchanged by this
+// pass, and create a wrapper function with that will call the original.
+//
+// Instrumented functions can also be annotated as "force_zero_labels", which
+// will make all shadow stores and label for return values set zero labels.
+// Functions should never be labelled with both "force_zero_labels" and
+// "uninstrumented" or any of the unistrumented wrapper kinds.
 static cl::list ClABIListFiles(
 "dfsan-abilist",
 cl::desc("File listing native ABI functions and how the pass treats them"),
@@ -469,6 +478,7 @@
   getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos);
   bool isInstrumented(const Function *F);
   bool isInstrumented(const GlobalAlias *GA);
+  bool isForceZeroLabels(const Function *F);
   FunctionType *getArgsFunctionType(FunctionType *T);
   FunctionType *getTrampolineFunctionType(FunctionType *T);
   TransformedFunction getCustomFunctionType(FunctionType *T);
@@ -541,6 +551,7 @@
   DominatorTree DT;
   DataFlowSanitizer::InstrumentedABI IA;
   bool IsNativeABI;
+  bool IsForceZeroLabels;
   AllocaInst *LabelReturnAlloca = nullptr;
   AllocaInst *OriginReturnAlloca = nullptr;
   DenseMap ValShadowMap;
@@ -571,8 +582,10 @@
   DenseMap CachedCollapsedShadows;
   DenseMap> ShadowElements;
 
-  DFSanFunction(DataFlowSanitizer , Function *F, bool IsNativeABI)
-  : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) {
+  DFSanFunction(DataFlowSanitizer , Function *F, bool IsNativeABI,
+bool IsForceZeroLabels)
+  : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI),
+IsForceZeroLabels(IsForceZeroLabels) {
 DT.recalculate(*F);
   }
 
@@ -1107,6 +1120,10 @@
   return !ABIList.isIn(*GA, "uninstrumented");
 }
 
+bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
+  return ABIList.isIn(*F, "force_zero_labels");
+}
+
 DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
   return ClArgsABI ? IA_Args : IA_TLS;
 }
@@ -1197,7 +1214,8 @@
 
 // F is called by a wrapped custom function with 

[PATCH] D109847: [DFSan] Add force_zero_label abilist option to DFSan. This can be used as a work-around for overtainting.

2021-09-16 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 373065.
browneee added a comment.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Docs


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109847/new/

https://reviews.llvm.org/D109847

Files:
  clang/docs/DataFlowSanitizer.rst
  compiler-rt/test/dfsan/Inputs/flags_abilist.txt
  compiler-rt/test/dfsan/force_zero.c
  llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
  llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
  llvm/test/Instrumentation/DataFlowSanitizer/force_zero.ll

Index: llvm/test/Instrumentation/DataFlowSanitizer/force_zero.ll
===
--- /dev/null
+++ llvm/test/Instrumentation/DataFlowSanitizer/force_zero.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -dfsan -dfsan-abilist=%S/Inputs/abilist.txt -S | FileCheck %s -DSHADOW_XOR_MASK=87960930222080
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+define i32 @function_to_force_zero(i32 %0, i32* %1) {
+  ; CHECK-LABEL: define i32 @function_to_force_zero.dfsan
+  ; CHECK: %[[#SHADOW_XOR:]] = xor i64 {{.*}}, [[SHADOW_XOR_MASK]]
+  ; CHECK: %[[#SHADOW_PTR:]] = inttoptr i64 %[[#SHADOW_XOR]] to i8*
+  ; CHECK: %[[#SHADOW_BITCAST:]] = bitcast i8* %[[#SHADOW_PTR]] to i32*
+  ; CHECK: store i32 0, i32* %[[#SHADOW_BITCAST]]
+  ; CHECK: store i32 %{{.*}}
+  store i32 %0, i32* %1, align 4
+  ; CHECK: store i8 0, {{.*}}@__dfsan_retval_tls
+  ; CHECK: ret i32
+  ret i32 %0
+}
Index: llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
===
--- llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
+++ llvm/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
@@ -8,3 +8,5 @@
 fun:custom*=custom
 
 fun:uninstrumented*=uninstrumented
+
+fun:function_to_force_zero=force_zero_labels
Index: llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
===
--- llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -144,8 +144,17 @@
 // to the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains
 // additional annotations for those functions, a call to one of those functions
 // will produce a warning message, as the labelling behaviour of the function is
-// unknown.  The other supported annotations are "functional" and "discard",
-// which are described below under DataFlowSanitizer::WrapperKind.
+// unknown. The other supported annotations for uninstrumented functions are
+// "functional" and "discard", which are described below under
+// DataFlowSanitizer::WrapperKind.
+// Functions will often be labelled with both "uninstrumented" and one of
+// "functional" or "discard". This will leave the function unchanged by this
+// pass, and create a wrapper function with that will call the original.
+//
+// Instrumented functions can also be annotated as "force_zero_labels", which
+// will make all shadow stores and label for return values set zero labels.
+// Functions should never be labelled with both "force_zero_labels" and
+// "uninstrumented" or any of the unistrumented wrapper kinds.
 static cl::list ClABIListFiles(
 "dfsan-abilist",
 cl::desc("File listing native ABI functions and how the pass treats them"),
@@ -469,6 +478,7 @@
   getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos);
   bool isInstrumented(const Function *F);
   bool isInstrumented(const GlobalAlias *GA);
+  bool isForceZeroLabels(const Function *F);
   FunctionType *getArgsFunctionType(FunctionType *T);
   FunctionType *getTrampolineFunctionType(FunctionType *T);
   TransformedFunction getCustomFunctionType(FunctionType *T);
@@ -541,6 +551,7 @@
   DominatorTree DT;
   DataFlowSanitizer::InstrumentedABI IA;
   bool IsNativeABI;
+  bool IsForceZeroLabels;
   AllocaInst *LabelReturnAlloca = nullptr;
   AllocaInst *OriginReturnAlloca = nullptr;
   DenseMap ValShadowMap;
@@ -571,8 +582,10 @@
   DenseMap CachedCollapsedShadows;
   DenseMap> ShadowElements;
 
-  DFSanFunction(DataFlowSanitizer , Function *F, bool IsNativeABI)
-  : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) {
+  DFSanFunction(DataFlowSanitizer , Function *F, bool IsNativeABI,
+bool IsForceZeroLabels)
+  : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI),
+IsForceZeroLabels(IsForceZeroLabels) {
 DT.recalculate(*F);
   }
 
@@ -1107,6 +1120,10 @@
   return !ABIList.isIn(*GA, "uninstrumented");
 }
 
+bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
+  return ABIList.isIn(*F, "force_zero_labels");
+}
+
 DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
   return ClArgsABI ? IA_Args : IA_TLS;
 }
@@ -1197,7 

[PATCH] D104896: [DFSan] Change shadow and origin memory layouts to match MSan.

2021-06-25 Thread Andrew via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG45f6d5522f8d: [DFSan] Change shadow and origin memory 
layouts to match MSan. (authored by browneee).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104896/new/

https://reviews.llvm.org/D104896

Files:
  clang/docs/DataFlowSanitizerDesign.rst
  compiler-rt/lib/dfsan/dfsan.cpp
  compiler-rt/lib/dfsan/dfsan.h
  compiler-rt/lib/dfsan/dfsan_allocator.cpp
  compiler-rt/lib/dfsan/dfsan_platform.h
  compiler-rt/test/dfsan/origin_invalid.c
  llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
  llvm/test/Instrumentation/DataFlowSanitizer/atomics.ll
  llvm/test/Instrumentation/DataFlowSanitizer/basic.ll
  llvm/test/Instrumentation/DataFlowSanitizer/load.ll
  llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll
  llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
  llvm/test/Instrumentation/DataFlowSanitizer/store.ll

Index: llvm/test/Instrumentation/DataFlowSanitizer/store.ll
===
--- llvm/test/Instrumentation/DataFlowSanitizer/store.ll
+++ llvm/test/Instrumentation/DataFlowSanitizer/store.ll
@@ -22,7 +22,7 @@
   ; COMBINE_PTR_LABEL: load i[[#SBITS]], i[[#SBITS]]* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i[[#SBITS]]
   ; CHECK: ptrtoint i8* {{.*}} i64
-  ; CHECK-NEXT:and i64
+  ; CHECK-NEXT:xor i64
   ; CHECK-NEXT:inttoptr i64 {{.*}} i[[#SBITS]]*
   ; CHECK-NEXT:getelementptr i[[#SBITS]], i[[#SBITS]]*
   ; CHECK-NEXT:store i[[#SBITS]]
@@ -39,7 +39,7 @@
   ; COMBINE_PTR_LABEL: load i[[#SBITS]], i[[#SBITS]]* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i[[#SBITS]]
   ; CHECK: ptrtoint i16* {{.*}} i64
-  ; CHECK-NEXT:and i64
+  ; CHECK-NEXT:xor i64
   ; CHECK-NEXT:inttoptr i64 {{.*}} i[[#SBITS]]*
   ; CHECK-NEXT:getelementptr i[[#SBITS]], i[[#SBITS]]*
   ; CHECK-NEXT:store i[[#SBITS]]
@@ -58,7 +58,7 @@
   ; COMBINE_PTR_LABEL: load i[[#SBITS]], i[[#SBITS]]* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i[[#SBITS]]
   ; CHECK: ptrtoint i32* {{.*}} i64
-  ; CHECK-NEXT:and i64
+  ; CHECK-NEXT:xor i64
   ; CHECK-NEXT:inttoptr i64 {{.*}} i[[#SBITS]]*
   ; CHECK-NEXT:getelementptr i[[#SBITS]], i[[#SBITS]]*
   ; CHECK-NEXT:store i[[#SBITS]]
@@ -81,7 +81,7 @@
   ; COMBINE_PTR_LABEL: load i[[#SBITS]], i[[#SBITS]]* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i[[#SBITS]]
   ; CHECK: ptrtoint i64* {{.*}} i64
-  ; CHECK-NEXT:and i64
+  ; CHECK-NEXT:xor i64
   ; CHECK-NEXT:inttoptr i64 {{.*}} i[[#SBITS]]*
   ; CHECK-COUNT-8: insertelement {{.*}} i[[#SBITS]]
   ; CHECK-NEXT:bitcast i[[#SBITS]]* {{.*}} <8 x i[[#SBITS]]>*
Index: llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
===
--- llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
+++ llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
@@ -52,9 +52,9 @@
   ; CHECK-NEXT:   %[[#AO:]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4
   ; CHECK-NEXT:   %[[#AS:]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[ALIGN]]
   ; CHECK:%[[#INTP:]] = ptrtoint i16* %p to i64
-  ; CHECK-NEXT:   %[[#SHADOW_ADDR:]] = and i64 %[[#INTP]], [[#%.10d,MASK:]]
-  ; CHECK-NEXT:   %[[#SHADOW_PTR0:]] = inttoptr i64 %[[#SHADOW_ADDR]] to i[[#SBITS]]*
-  ; CHECK-NEXT:   %[[#ORIGIN_OFFSET:]] = add i64 %[[#INTP+1]], [[#%.10d,ORIGIN_MASK:]]
+  ; CHECK-NEXT:   %[[#SHADOW_OFFSET:]] = xor i64 %[[#INTP]], [[#%.10d,MASK:]]
+  ; CHECK-NEXT:   %[[#SHADOW_PTR0:]] = inttoptr i64 %[[#SHADOW_OFFSET]] to i[[#SBITS]]*
+  ; CHECK-NEXT:   %[[#ORIGIN_OFFSET:]] = add i64 %[[#SHADOW_OFFSET]], [[#%.10d,ORIGIN_BASE:]]
   ; CHECK-NEXT:   %[[#ORIGIN_ADDR:]] = and i64 %[[#ORIGIN_OFFSET]], -4
   ; CHECK-NEXT:   %[[#ORIGIN_PTR:]] = inttoptr i64 %[[#ORIGIN_ADDR]] to i32*
   ; CHECK:%_dfscmp = icmp ne i[[#SBITS]] %[[#AS]], 0
Index: llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll
===
--- llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll
+++ llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll
@@ -35,9 +35,9 @@
 define i16* @load_escaped_alloca() {
   ; CHECK-LABEL:  @load_escaped_alloca.dfsan
   ; CHECK:%[[#INTP:]] = ptrtoint i16* %p to i64
-  ; CHECK-NEXT:   %[[#SHADOW_ADDR:]] = and i64 %[[#INTP]], [[#%.10d,MASK:]]
-  ; CHECK-NEXT:   %[[#SHADOW_PTR0:]] = inttoptr i64 %[[#SHADOW_ADDR]] to i[[#SBITS]]*
-  ; CHECK-NEXT:   %[[#ORIGIN_OFFSET:]] = add i64 %[[#INTP+1]], [[#%.10d,ORIGIN_MASK:]]
+  ; CHECK-NEXT:   %[[#SHADOW_OFFSET:]] = xor i64 %[[#INTP]], [[#%.10d,MASK:]]
+  ; 

[PATCH] D104896: [DFSan] Change shadow and origin memory layouts to match MSan.

2021-06-25 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 354558.
browneee marked 8 inline comments as done.
browneee added a comment.

Addressed comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104896/new/

https://reviews.llvm.org/D104896

Files:
  clang/docs/DataFlowSanitizerDesign.rst
  compiler-rt/lib/dfsan/dfsan.cpp
  compiler-rt/lib/dfsan/dfsan.h
  compiler-rt/lib/dfsan/dfsan_allocator.cpp
  compiler-rt/lib/dfsan/dfsan_platform.h
  compiler-rt/test/dfsan/origin_invalid.c
  llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
  llvm/test/Instrumentation/DataFlowSanitizer/atomics.ll
  llvm/test/Instrumentation/DataFlowSanitizer/basic.ll
  llvm/test/Instrumentation/DataFlowSanitizer/load.ll
  llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll
  llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
  llvm/test/Instrumentation/DataFlowSanitizer/store.ll

Index: llvm/test/Instrumentation/DataFlowSanitizer/store.ll
===
--- llvm/test/Instrumentation/DataFlowSanitizer/store.ll
+++ llvm/test/Instrumentation/DataFlowSanitizer/store.ll
@@ -22,7 +22,7 @@
   ; COMBINE_PTR_LABEL: load i[[#SBITS]], i[[#SBITS]]* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i[[#SBITS]]
   ; CHECK: ptrtoint i8* {{.*}} i64
-  ; CHECK-NEXT:and i64
+  ; CHECK-NEXT:xor i64
   ; CHECK-NEXT:inttoptr i64 {{.*}} i[[#SBITS]]*
   ; CHECK-NEXT:getelementptr i[[#SBITS]], i[[#SBITS]]*
   ; CHECK-NEXT:store i[[#SBITS]]
@@ -39,7 +39,7 @@
   ; COMBINE_PTR_LABEL: load i[[#SBITS]], i[[#SBITS]]* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i[[#SBITS]]
   ; CHECK: ptrtoint i16* {{.*}} i64
-  ; CHECK-NEXT:and i64
+  ; CHECK-NEXT:xor i64
   ; CHECK-NEXT:inttoptr i64 {{.*}} i[[#SBITS]]*
   ; CHECK-NEXT:getelementptr i[[#SBITS]], i[[#SBITS]]*
   ; CHECK-NEXT:store i[[#SBITS]]
@@ -58,7 +58,7 @@
   ; COMBINE_PTR_LABEL: load i[[#SBITS]], i[[#SBITS]]* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i[[#SBITS]]
   ; CHECK: ptrtoint i32* {{.*}} i64
-  ; CHECK-NEXT:and i64
+  ; CHECK-NEXT:xor i64
   ; CHECK-NEXT:inttoptr i64 {{.*}} i[[#SBITS]]*
   ; CHECK-NEXT:getelementptr i[[#SBITS]], i[[#SBITS]]*
   ; CHECK-NEXT:store i[[#SBITS]]
@@ -81,7 +81,7 @@
   ; COMBINE_PTR_LABEL: load i[[#SBITS]], i[[#SBITS]]* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i[[#SBITS]]
   ; CHECK: ptrtoint i64* {{.*}} i64
-  ; CHECK-NEXT:and i64
+  ; CHECK-NEXT:xor i64
   ; CHECK-NEXT:inttoptr i64 {{.*}} i[[#SBITS]]*
   ; CHECK-COUNT-8: insertelement {{.*}} i[[#SBITS]]
   ; CHECK-NEXT:bitcast i[[#SBITS]]* {{.*}} <8 x i[[#SBITS]]>*
Index: llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
===
--- llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
+++ llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
@@ -52,9 +52,9 @@
   ; CHECK-NEXT:   %[[#AO:]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4
   ; CHECK-NEXT:   %[[#AS:]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[ALIGN]]
   ; CHECK:%[[#INTP:]] = ptrtoint i16* %p to i64
-  ; CHECK-NEXT:   %[[#SHADOW_ADDR:]] = and i64 %[[#INTP]], [[#%.10d,MASK:]]
-  ; CHECK-NEXT:   %[[#SHADOW_PTR0:]] = inttoptr i64 %[[#SHADOW_ADDR]] to i[[#SBITS]]*
-  ; CHECK-NEXT:   %[[#ORIGIN_OFFSET:]] = add i64 %[[#INTP+1]], [[#%.10d,ORIGIN_MASK:]]
+  ; CHECK-NEXT:   %[[#SHADOW_OFFSET:]] = xor i64 %[[#INTP]], [[#%.10d,MASK:]]
+  ; CHECK-NEXT:   %[[#SHADOW_PTR0:]] = inttoptr i64 %[[#SHADOW_OFFSET]] to i[[#SBITS]]*
+  ; CHECK-NEXT:   %[[#ORIGIN_OFFSET:]] = add i64 %[[#SHADOW_OFFSET]], [[#%.10d,ORIGIN_BASE:]]
   ; CHECK-NEXT:   %[[#ORIGIN_ADDR:]] = and i64 %[[#ORIGIN_OFFSET]], -4
   ; CHECK-NEXT:   %[[#ORIGIN_PTR:]] = inttoptr i64 %[[#ORIGIN_ADDR]] to i32*
   ; CHECK:%_dfscmp = icmp ne i[[#SBITS]] %[[#AS]], 0
Index: llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll
===
--- llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll
+++ llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll
@@ -35,9 +35,9 @@
 define i16* @load_escaped_alloca() {
   ; CHECK-LABEL:  @load_escaped_alloca.dfsan
   ; CHECK:%[[#INTP:]] = ptrtoint i16* %p to i64
-  ; CHECK-NEXT:   %[[#SHADOW_ADDR:]] = and i64 %[[#INTP]], [[#%.10d,MASK:]]
-  ; CHECK-NEXT:   %[[#SHADOW_PTR0:]] = inttoptr i64 %[[#SHADOW_ADDR]] to i[[#SBITS]]*
-  ; CHECK-NEXT:   %[[#ORIGIN_OFFSET:]] = add i64 %[[#INTP+1]], [[#%.10d,ORIGIN_MASK:]]
+  ; CHECK-NEXT:   %[[#SHADOW_OFFSET:]] = xor i64 %[[#INTP]], [[#%.10d,MASK:]]
+  ; CHECK-NEXT:   %[[#SHADOW_PTR0:]] = inttoptr i64 

[PATCH] D104896: [DFSan] Change shadow and origin memory layouts to match MSan.

2021-06-24 Thread Andrew via Phabricator via cfe-commits
browneee created this revision.
browneee added reviewers: stephan.yichao.zhao, gbalats.
Herald added subscribers: pengfei, hiraditya.
browneee requested review of this revision.
Herald added projects: clang, Sanitizers, LLVM.
Herald added subscribers: llvm-commits, Sanitizers, cfe-commits.

Previously on x86_64:

  ++ 0x8000 (top of memory)
  | application memory |
  ++ 0x70008000 (kAppAddr)
  ||
  |   unused   |
  ||
  ++ 0x3000 (kUnusedAddr)
  |   origin   |
  ++ 0x20008000 (kOriginAddr)
  |   unused   |
  ++ 0x2000
  |   shadow memory|
  ++ 0x10008000 (kShadowAddr)
  |   unused   |
  ++ 0x0001
  | reserved by kernel |
  ++ 0x
  
  MEM_TO_SHADOW(mem) = mem & ~0x6000
  SHADOW_TO_ORIGIN(shadow) = kOriginAddr - kShadowAddr + shadow

Now for x86_64:

  ++ 0x8000 (top of memory)
  |application 3   |
  ++ 0x7000
  |  invalid   |
  ++ 0x6100
  |  origin 1  |
  ++ 0x6000
  |application 2   |
  ++ 0x5100
  |  shadow 1  |
  ++ 0x5000
  |  invalid   |
  ++ 0x4000
  |  origin 3  |
  ++ 0x3000
  |  shadow 3  |
  ++ 0x2000
  |  origin 2  |
  ++ 0x1100
  |  invalid   |
  ++ 0x1000
  |  shadow 2  |
  ++ 0x0100
  |application 1   |
  ++ 0x
  
  MEM_TO_SHADOW(mem) = mem ^ 0x5000
  SHADOW_TO_ORIGIN(shadow) = shadow + 0x1000


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D104896

Files:
  clang/docs/DataFlowSanitizerDesign.rst
  compiler-rt/lib/dfsan/dfsan.cpp
  compiler-rt/lib/dfsan/dfsan.h
  compiler-rt/lib/dfsan/dfsan_allocator.cpp
  compiler-rt/lib/dfsan/dfsan_platform.h
  compiler-rt/test/dfsan/origin_invalid.c
  llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
  llvm/test/Instrumentation/DataFlowSanitizer/atomics.ll
  llvm/test/Instrumentation/DataFlowSanitizer/basic.ll
  llvm/test/Instrumentation/DataFlowSanitizer/load.ll
  llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll
  llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
  llvm/test/Instrumentation/DataFlowSanitizer/store.ll

Index: llvm/test/Instrumentation/DataFlowSanitizer/store.ll
===
--- llvm/test/Instrumentation/DataFlowSanitizer/store.ll
+++ llvm/test/Instrumentation/DataFlowSanitizer/store.ll
@@ -22,7 +22,7 @@
   ; COMBINE_PTR_LABEL: load i[[#SBITS]], i[[#SBITS]]* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i[[#SBITS]]
   ; CHECK: ptrtoint i8* {{.*}} i64
-  ; CHECK-NEXT:and i64
+  ; CHECK-NEXT:xor i64
   ; CHECK-NEXT:inttoptr i64 {{.*}} i[[#SBITS]]*
   ; CHECK-NEXT:getelementptr i[[#SBITS]], i[[#SBITS]]*
   ; CHECK-NEXT:store i[[#SBITS]]
@@ -39,7 +39,7 @@
   ; COMBINE_PTR_LABEL: load i[[#SBITS]], i[[#SBITS]]* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i[[#SBITS]]
   ; CHECK: ptrtoint i16* {{.*}} i64
-  ; CHECK-NEXT:and i64
+  ; CHECK-NEXT:xor i64
   ; CHECK-NEXT:inttoptr i64 {{.*}} i[[#SBITS]]*
   ; CHECK-NEXT:getelementptr i[[#SBITS]], i[[#SBITS]]*
   ; CHECK-NEXT:store i[[#SBITS]]
@@ -58,7 +58,7 @@
   ; COMBINE_PTR_LABEL: load i[[#SBITS]], i[[#SBITS]]* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i[[#SBITS]]
   ; CHECK: ptrtoint i32* {{.*}} i64
-  ; CHECK-NEXT:and i64
+  ; CHECK-NEXT:xor i64
   ; CHECK-NEXT:inttoptr i64 {{.*}} i[[#SBITS]]*
   ; CHECK-NEXT:getelementptr i[[#SBITS]], i[[#SBITS]]*
   ; CHECK-NEXT:store i[[#SBITS]]
@@ -81,7 +81,7 @@
   ; COMBINE_PTR_LABEL: load i[[#SBITS]], i[[#SBITS]]* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i[[#SBITS]]
   ; CHECK: ptrtoint i64* {{.*}} i64
-  ; CHECK-NEXT:and i64
+  ; CHECK-NEXT:xor i64
   ; CHECK-NEXT:inttoptr i64 {{.*}} i[[#SBITS]]*
   ; CHECK-COUNT-8: insertelement {{.*}} i[[#SBITS]]
   ; CHECK-NEXT:bitcast i[[#SBITS]]* {{.*}} <8 x i[[#SBITS]]>*
Index: llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
===
--- llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
+++ llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
@@ -52,9 +52,9 @@
   ; CHECK-NEXT:   %[[#AO:]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x 

[PATCH] D103745: [dfsan] Add full fast8 support

2021-06-07 Thread Andrew via Phabricator via cfe-commits
browneee accepted this revision.
browneee added inline comments.



Comment at: llvm/test/Instrumentation/DataFlowSanitizer/external_mask.ll:6
 define i32 @test(i32 %a, i32* nocapture readonly %b) #0 {
-; CHECK: @"dfs$test"
-; CHECK: %[[RV:.*]] load{{.*}}__dfsan_shadow_ptr_mask
-; CHECK: ptrtoint i32* {{.*}} to i64
-; CHECK: and {{.*}}%[[RV:.*]]
-; CHECK16: mul i64
+  ; CHECK: @"dfs$test"
+  ; CHECK: %[[RV:.*]] load{{.*}}__dfsan_shadow_ptr_mask

Fix whitespace



Comment at: llvm/test/Instrumentation/DataFlowSanitizer/load.ll:99
+  ; CHECK-NEXT:ret i64 %a
+  
   %a = load i64, i64* %p

Fix whitespace


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103745/new/

https://reviews.llvm.org/D103745

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77621: ADT: SmallVector size/capacity use word-size integers when elements are small

2020-04-30 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

https://reviews.llvm.org/D79214


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77621: ADT: SmallVector size/capacity use word-size integers when elements are small

2020-04-30 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

In D77621#2013546 , @nikic wrote:

> @browneee Looks like LLVM already defines `LLVM_PTR_SIZE` as a more portable 
> version of `__SIZEOF_POINTER__`.


I saw LLVM_PTR_SIZE, but its definition may be based on sizeof() 
,
 so I don't think it should be used in a preprocessor condition.

SIZE_MAX looks like a good option.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77621: ADT: SmallVector size/capacity use word-size integers when elements are small

2020-04-30 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

Thanks for the tips, MaskRay.

Yes, I expect this would fix that issue.

smeenai, SizeTypeMax() is intended to return size_t.

---

I see a couple of options for fixing the truncation warning on 32-bit platforms:

1. Add an explicit cast to remove the warning.
  - Disadvantage: the second instantiation still exists even though it is 
unused.

  static constexpr size_t SizeTypeMax() {
return static_cast(std::numeric_limits::max());
  }



2. Use a std::conditional to swap the type of one instantiation to avoid 
conflicts.

  In this case I'd probably swap back to using uintptr_t and disable the 
uint32_t on 32bit.
  - Disadvantage: the second instantiation still exists even though it is 
unused.

  // Will be unused when instantiated with char.
  // This is to avoid instantiation for uint32_t conflicting with uintptr_t on 
32-bit systems.
  template class llvm::SmallVectorBase::type>; 
  template class llvm::SmallVectorBase;  



3. Use preprocessor to disable one of the instantiations on 32-bit platforms.

  In this case I'd probably swap back to using uintptr_t and disable the 
uint32_t on 32bit.
  - Disadvantage: uses preprocessor
  - Disadvantage: potential for portability issues with different platforms 
lacking certain macros

  #if __SIZEOF_POINTER__ != 4  && !defined(_WIN32) && !defined(__ILP32)
  template class llvm::SmallVectorBase; 
  #endif
  template class llvm::SmallVectorBase;  

My order of preference would be 1, 2, 3.

Is there another solution I've missed? Thoughts on which is best? @dblaikie


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77621: ADT: SmallVector size/capacity use word-size integers when elements are small

2020-04-24 Thread Andrew via Phabricator via cfe-commits
browneee marked an inline comment as done.
browneee added inline comments.



Comment at: llvm/include/llvm/ADT/SmallVector.h:19
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"

nikic wrote:
> Is this include still needed?
SmallVectorTemplateBase::grow() still remains in the 
header and uses report_bad_alloc_error().


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77621: ADT: SmallVector size/capacity use word-size integers when elements are small

2020-04-24 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 260064.
browneee added a comment.

Change uintptr_t to uint64_t to ensure this does not instantiate the same 
template twice on platforms where uintptr_t is equivalent to uint32_t.

Also considered using the preprocessor to disable the uintptr_t instantiation, 
but chose to avoid preprocessor use.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  llvm/include/llvm/ADT/SmallVector.h
  llvm/lib/Support/SmallVector.cpp

Index: llvm/lib/Support/SmallVector.cpp
===
--- llvm/lib/Support/SmallVector.cpp
+++ llvm/lib/Support/SmallVector.cpp
@@ -37,24 +37,30 @@
   sizeof(unsigned) * 2 + sizeof(void *) * 2,
   "wasted space in SmallVector size 1");
 
-/// grow_pod - This is an implementation of the grow() method which only works
-/// on POD-like datatypes and is out of line to reduce code duplication.
-/// This function will report a fatal error if it cannot increase capacity.
-void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
-   size_t TSize) {
-  // Ensure we can fit the new capacity in 32 bits.
-  if (MinCapacity > UINT32_MAX)
+static_assert(sizeof(SmallVector) ==
+  sizeof(void *) * 2 + sizeof(void *),
+  "1 byte elements have word-sized type for size and capacity");
+
+// Note: Moving this function into the header may cause performance regression.
+template 
+void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
+   size_t TSize) {
+  // Ensure we can fit the new capacity.
+  // This is only going to be applicable when the capacity is 32 bit.
+  if (MinCapacity > SizeTypeMax())
 report_bad_alloc_error("SmallVector capacity overflow during allocation");
 
   // Ensure we can meet the guarantee of space for at least one more element.
   // The above check alone will not catch the case where grow is called with a
   // default MinCapacity of 0, but the current capacity cannot be increased.
-  if (capacity() == size_t(UINT32_MAX))
+  // This is only going to be applicable when the capacity is 32 bit.
+  if (capacity() == SizeTypeMax())
 report_bad_alloc_error("SmallVector capacity unable to grow");
 
+  // In theory 2*capacity can overflow if the capacity is 64 bit, but the
+  // original capacity would never be large enough for this to be a problem.
   size_t NewCapacity = 2 * capacity() + 1; // Always grow.
-  NewCapacity =
-  std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX));
+  NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeTypeMax());
 
   void *NewElts;
   if (BeginX == FirstEl) {
@@ -70,3 +76,6 @@
   this->BeginX = NewElts;
   this->Capacity = NewCapacity;
 }
+
+template class llvm::SmallVectorBase;
+template class llvm::SmallVectorBase;
Index: llvm/include/llvm/ADT/SmallVector.h
===
--- llvm/include/llvm/ADT/SmallVector.h
+++ llvm/include/llvm/ADT/SmallVector.h
@@ -16,10 +16,10 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemAlloc.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/Support/ErrorHandling.h"
 #include 
 #include 
 #include 
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -34,11 +35,23 @@
 
 namespace llvm {
 
-/// This is all the non-templated stuff common to all SmallVectors.
-class SmallVectorBase {
+/// This is all the stuff common to all SmallVectors.
+///
+/// The template parameter specifies the type which should be used to hold the
+/// Size and Capacity of the SmallVector, so it can be adjusted.
+/// Using 32 bit size is desirable to shink the size of the SmallVector.
+/// Using 64 bit size is desirable for cases like SmallVector, where a
+/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
+/// buffering bitcode output - which can exceed 4GB.
+template  class SmallVectorBase {
 protected:
   void *BeginX;
-  unsigned Size = 0, Capacity;
+  Size_T Size = 0, Capacity;
+
+  /// The maximum value of the Size_T used.
+  static constexpr size_t SizeTypeMax() {
+return std::numeric_limits::max();
+  }
 
   SmallVectorBase() = delete;
   SmallVectorBase(void *FirstEl, size_t TotalCapacity)
@@ -70,9 +83,14 @@
   }
 };
 
+template 
+using SmallVectorSizeType =
+typename std::conditional= 8, uint64_t,
+  uint32_t>::type;
+
 /// Figure out the offset of the first element.
 template  struct SmallVectorAlignmentAndSize {
-  AlignedCharArrayUnion Base;
+  AlignedCharArrayUnion>> Base;
   AlignedCharArrayUnion FirstEl;
 };
 
@@ -80,7 +98,10 @@
 /// the type T is a POD. 

[PATCH] D77621: ADT: SmallVector size/capacity use word-size integers when elements are small

2020-04-24 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

Reverted in 5cb4c3776a34d48e43d9118921d2191aee0e3d21 


Fails on plaforms where uintptr_t is the same type as uint32_t.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77621: ADT: SmallVector size/capacity use word-size integers when elements are small

2020-04-24 Thread Andrew via Phabricator via cfe-commits
browneee closed this revision.
browneee added a comment.

Comitted: b5f0eae1dc3c09c020cdf9d07238dec9acdacf5f 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77621: ADT: SmallVector size/capacity use word-size integers when elements are small

2020-04-24 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 260006.
browneee marked 4 inline comments as done.
browneee added a comment.

- Change SizeTypeMax to a static constexpr function.
- Fix comment typos.
- Add comment to alert others to possible performance loss if that function is 
moved to the header.

@nikic: Thank you for detecting, analyzing, and solving the performance 
regression!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  llvm/include/llvm/ADT/SmallVector.h
  llvm/lib/Support/SmallVector.cpp

Index: llvm/lib/Support/SmallVector.cpp
===
--- llvm/lib/Support/SmallVector.cpp
+++ llvm/lib/Support/SmallVector.cpp
@@ -37,24 +37,30 @@
   sizeof(unsigned) * 2 + sizeof(void *) * 2,
   "wasted space in SmallVector size 1");
 
-/// grow_pod - This is an implementation of the grow() method which only works
-/// on POD-like datatypes and is out of line to reduce code duplication.
-/// This function will report a fatal error if it cannot increase capacity.
-void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
-   size_t TSize) {
-  // Ensure we can fit the new capacity in 32 bits.
-  if (MinCapacity > UINT32_MAX)
+static_assert(sizeof(SmallVector) ==
+  sizeof(void *) * 2 + sizeof(void *),
+  "1 byte elements have word-sized type for size and capacity");
+
+// Note: Moving this function into the header may cause performance regression.
+template 
+void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
+   size_t TSize) {
+  // Ensure we can fit the new capacity.
+  // This is only going to be applicable when the capacity is 32 bit.
+  if (MinCapacity > SizeTypeMax())
 report_bad_alloc_error("SmallVector capacity overflow during allocation");
 
   // Ensure we can meet the guarantee of space for at least one more element.
   // The above check alone will not catch the case where grow is called with a
   // default MinCapacity of 0, but the current capacity cannot be increased.
-  if (capacity() == size_t(UINT32_MAX))
+  // This is only going to be applicable when the capacity is 32 bit.
+  if (capacity() == SizeTypeMax())
 report_bad_alloc_error("SmallVector capacity unable to grow");
 
+  // In theory 2*capacity can overflow if the capacity is 64 bit, but the
+  // original capacity would never be large enough for this to be a problem.
   size_t NewCapacity = 2 * capacity() + 1; // Always grow.
-  NewCapacity =
-  std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX));
+  NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeTypeMax());
 
   void *NewElts;
   if (BeginX == FirstEl) {
@@ -70,3 +76,6 @@
   this->BeginX = NewElts;
   this->Capacity = NewCapacity;
 }
+
+template class llvm::SmallVectorBase;
+template class llvm::SmallVectorBase;
Index: llvm/include/llvm/ADT/SmallVector.h
===
--- llvm/include/llvm/ADT/SmallVector.h
+++ llvm/include/llvm/ADT/SmallVector.h
@@ -16,10 +16,10 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemAlloc.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/Support/ErrorHandling.h"
 #include 
 #include 
 #include 
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -34,11 +35,23 @@
 
 namespace llvm {
 
-/// This is all the non-templated stuff common to all SmallVectors.
-class SmallVectorBase {
+/// This is all the stuff common to all SmallVectors.
+///
+/// The template parameter specifies the type which should be used to hold the
+/// Size and Capacity of the SmallVector, so it can be adjusted.
+/// Using 32 bit size is desirable to shink the size of the SmallVector.
+/// Using 64 bit size is desirable for cases like SmallVector, where a
+/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
+/// buffering bitcode output - which can exceed 4GB.
+template  class SmallVectorBase {
 protected:
   void *BeginX;
-  unsigned Size = 0, Capacity;
+  Size_T Size = 0, Capacity;
+
+  /// The maximum value of the Size_T used.
+  static constexpr size_t SizeTypeMax() {
+return std::numeric_limits::max();
+  }
 
   SmallVectorBase() = delete;
   SmallVectorBase(void *FirstEl, size_t TotalCapacity)
@@ -70,9 +83,13 @@
   }
 };
 
+template 
+using SmallVectorSizeType =
+typename std::conditional::type;
+
 /// Figure out the offset of the first element.
 template  struct SmallVectorAlignmentAndSize {
-  AlignedCharArrayUnion Base;
+  AlignedCharArrayUnion>> Base;
   AlignedCharArrayUnion FirstEl;
 };
 
@@ -80,7 +97,10 @@
 /// the type T is a POD. The extra dummy 

[PATCH] D77621: ADT: SmallVector size/capacity use word-size integers when elements are small

2020-04-24 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 259949.
browneee added a comment.

Switch back to size and capacity type conditionally larger approach (appologies 
for the noise here).

Apply performance regression solutions from @nikic


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  llvm/include/llvm/ADT/SmallVector.h
  llvm/lib/Support/SmallVector.cpp

Index: llvm/lib/Support/SmallVector.cpp
===
--- llvm/lib/Support/SmallVector.cpp
+++ llvm/lib/Support/SmallVector.cpp
@@ -37,24 +37,29 @@
   sizeof(unsigned) * 2 + sizeof(void *) * 2,
   "wasted space in SmallVector size 1");
 
-/// grow_pod - This is an implementation of the grow() method which only works
-/// on POD-like datatypes and is out of line to reduce code duplication.
-/// This function will report a fatal error if it cannot increase capacity.
-void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
-   size_t TSize) {
-  // Ensure we can fit the new capacity in 32 bits.
-  if (MinCapacity > UINT32_MAX)
+static_assert(sizeof(SmallVector) ==
+  sizeof(void *) * 2 + sizeof(void *),
+  "1 byte elements have word-sized type for size and capacity");
+
+template 
+void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
+   size_t TSize) {
+  // Ensure we can fit the new capacity.
+  // This is only going to be applicable if the when the capacity is 32 bit.
+  if (MinCapacity > SizeTypeMax)
 report_bad_alloc_error("SmallVector capacity overflow during allocation");
 
   // Ensure we can meet the guarantee of space for at least one more element.
   // The above check alone will not catch the case where grow is called with a
   // default MinCapacity of 0, but the current capacity cannot be increased.
-  if (capacity() == size_t(UINT32_MAX))
+  // This is only going to be applicable if the when the capacity is 32 bit.
+  if (capacity() == SizeTypeMax)
 report_bad_alloc_error("SmallVector capacity unable to grow");
 
+  // In theory 2*capacity can overflow if the capacity is 64 bit, but the
+  // original capacity would never be large enough for this to be a problem.
   size_t NewCapacity = 2 * capacity() + 1; // Always grow.
-  NewCapacity =
-  std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX));
+  NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeTypeMax);
 
   void *NewElts;
   if (BeginX == FirstEl) {
@@ -70,3 +75,6 @@
   this->BeginX = NewElts;
   this->Capacity = NewCapacity;
 }
+
+template class llvm::SmallVectorBase;
+template class llvm::SmallVectorBase;
Index: llvm/include/llvm/ADT/SmallVector.h
===
--- llvm/include/llvm/ADT/SmallVector.h
+++ llvm/include/llvm/ADT/SmallVector.h
@@ -16,10 +16,10 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemAlloc.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/Support/ErrorHandling.h"
 #include 
 #include 
 #include 
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -34,11 +35,21 @@
 
 namespace llvm {
 
-/// This is all the non-templated stuff common to all SmallVectors.
-class SmallVectorBase {
+/// This is all the stuff common to all SmallVectors.
+///
+/// The template parameter specifies the type which should be used to hold the
+/// Size and Capacity of the SmallVector, so it can be adjusted.
+/// Using 32 bit size is desirable to shink the size of the SmallVector.
+/// Using 64 bit size is desirable for cases like SmallVector, where a
+/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
+/// buffering bitcode output - which can exceed 4GB.
+template  class SmallVectorBase {
 protected:
   void *BeginX;
-  unsigned Size = 0, Capacity;
+  Size_T Size = 0, Capacity;
+
+  /// The maximum value of the Size_T used.
+  static constexpr size_t SizeTypeMax = std::numeric_limits::max();
 
   SmallVectorBase() = delete;
   SmallVectorBase(void *FirstEl, size_t TotalCapacity)
@@ -70,9 +81,15 @@
   }
 };
 
+template  const size_t SmallVectorBase::SizeTypeMax;
+
+template 
+using SmallVectorSizeType =
+typename std::conditional::type;
+
 /// Figure out the offset of the first element.
 template  struct SmallVectorAlignmentAndSize {
-  AlignedCharArrayUnion Base;
+  AlignedCharArrayUnion>> Base;
   AlignedCharArrayUnion FirstEl;
 };
 
@@ -80,7 +97,10 @@
 /// the type T is a POD. The extra dummy template argument is used by ArrayRef
 /// to avoid unnecessarily requiring T to be complete.
 template 
-class SmallVectorTemplateCommon : public SmallVectorBase {
+class 

[PATCH] D77621: Change BitcodeWriter buffer to std::vector instead of SmallVector.

2020-04-23 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

I resubmitted the report_fatal_error checks again under D77601 


http://llvm-compile-time-tracker.com/compare.php?from=7375212172951d2fc283c81d03c1a8588c3280c6=a30e7ea88e75568feed020aedae73c52de35=max-rss
http://llvm-compile-time-tracker.com/compare.php?from=7375212172951d2fc283c81d03c1a8588c3280c6=a30e7ea88e75568feed020aedae73c52de35=instructions

Imo impact from this part is insignificant.

Other pieces I see as possibly impacting compile time are:

1. This correction to SmallVectorTemplateCommon::max_size().   But 
SizeTypeMax() is static constexpr, this seems like it could still be optimized 
to a constant.

  -  size_type max_size() const { return size_type(-1) / sizeof(T); }
  +  size_type max_size() const {
  +return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T));
  +  }



2. More function calls. They also appear fairly optimizable to me.

I may not have good insight into the actual optimization behavior here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77621: ADT: SmallVector size & capacity use word-size integers when elements are small.

2020-04-22 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 259480.
browneee added a comment.

Switch approach back to std::vector change.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  clang-tools-extra/clang-doc/Serialize.cpp
  clang-tools-extra/unittests/clang-doc/BitcodeTest.cpp
  clang/include/clang/Serialization/ASTWriter.h
  clang/include/clang/Serialization/PCHContainerOperations.h
  clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
  clang/lib/Frontend/ASTUnit.cpp
  clang/lib/Frontend/PrecompiledPreamble.cpp
  clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/lib/Serialization/GlobalModuleIndex.cpp
  clang/lib/Serialization/PCHContainerOperations.cpp
  llvm/include/llvm/Bitcode/BitcodeWriter.h
  llvm/include/llvm/Bitstream/BitstreamWriter.h
  llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h
  llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
  llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
  llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
  llvm/tools/llvm-cat/llvm-cat.cpp
  llvm/tools/llvm-modextract/llvm-modextract.cpp
  llvm/unittests/Bitstream/BitstreamReaderTest.cpp
  llvm/unittests/Bitstream/BitstreamWriterTest.cpp

Index: llvm/unittests/Bitstream/BitstreamWriterTest.cpp
===
--- llvm/unittests/Bitstream/BitstreamWriterTest.cpp
+++ llvm/unittests/Bitstream/BitstreamWriterTest.cpp
@@ -8,27 +8,31 @@
 
 #include "llvm/Bitstream/BitstreamWriter.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
+#include 
+
 using namespace llvm;
 
+using ::testing::IsEmpty;
+
 namespace {
 
 TEST(BitstreamWriterTest, emitBlob) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("str", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef("str\0", 4), Buffer);
+  EXPECT_EQ(StringRef("str\0", 4), StringRef(Buffer.data(), Buffer.size()));
 }
 
 TEST(BitstreamWriterTest, emitBlobWithSize) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   {
 BitstreamWriter W(Buffer);
 W.emitBlob("str");
   }
-  SmallString<64> Expected;
+  std::vector Expected;
   {
 BitstreamWriter W(Expected);
 W.EmitVBR(3, 6);
@@ -38,21 +42,21 @@
 W.Emit('r', 8);
 W.Emit(0, 8);
   }
-  EXPECT_EQ(StringRef(Expected), Buffer);
+  EXPECT_EQ(Expected, Buffer);
 }
 
 TEST(BitstreamWriterTest, emitBlobEmpty) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef(""), Buffer);
+  EXPECT_THAT(Buffer, IsEmpty());
 }
 
 TEST(BitstreamWriterTest, emitBlob4ByteAligned) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("str0", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef("str0"), Buffer);
+  EXPECT_EQ(StringRef("str0"), StringRef(Buffer.data(), Buffer.size()));
 }
 
 } // end namespace
Index: llvm/unittests/Bitstream/BitstreamReaderTest.cpp
===
--- llvm/unittests/Bitstream/BitstreamReaderTest.cpp
+++ llvm/unittests/Bitstream/BitstreamReaderTest.cpp
@@ -11,6 +11,8 @@
 #include "llvm/Bitstream/BitstreamWriter.h"
 #include "gtest/gtest.h"
 
+#include 
+
 using namespace llvm;
 
 namespace {
@@ -96,7 +98,7 @@
 StringRef BlobIn((const char *)BlobData.begin(), BlobSize);
 
 // Write the bitcode.
-SmallVector Buffer;
+std::vector Buffer;
 unsigned AbbrevID;
 {
   BitstreamWriter Stream(Buffer);
@@ -115,7 +117,7 @@
 
 // Stream the buffer into the reader.
 BitstreamCursor Stream(
-ArrayRef((const uint8_t *)Buffer.begin(), Buffer.size()));
+ArrayRef((const uint8_t *)Buffer.data(), Buffer.size()));
 
 // Header.  Included in test so that we can run llvm-bcanalyzer to debug
 // when there are problems.
Index: llvm/tools/llvm-modextract/llvm-modextract.cpp
===
--- llvm/tools/llvm-modextract/llvm-modextract.cpp
+++ llvm/tools/llvm-modextract/llvm-modextract.cpp
@@ -58,12 +58,12 @@
   ExitOnErr(errorCodeToError(EC));
 
   if (BinaryExtract) {
-SmallVector Result;
+std::vector Result;
 BitcodeWriter Writer(Result);
-Result.append(Ms[ModuleIndex].getBuffer().begin(),
+Result.insert(Result.end(), Ms[ModuleIndex].getBuffer().begin(),
   Ms[ModuleIndex].getBuffer().end());
 Writer.copyStrtab(Ms[ModuleIndex].getStrtab());
-Out->os() << Result;
+Out->os().write(Result.data(), Result.size());
 Out->keep();
 return 0;
   }
Index: llvm/tools/llvm-cat/llvm-cat.cpp
===
--- llvm/tools/llvm-cat/llvm-cat.cpp
+++ llvm/tools/llvm-cat/llvm-cat.cpp
@@ -54,7 +54,7 @@
   ExitOnError ExitOnErr("llvm-cat: 

[PATCH] D77621: ADT: SmallVector size & capacity use word-size integers when elements are small.

2020-04-21 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

Thanks for the revert explanation and notes, nikic.

@dexonsmith what is your current thinking on going back to the original 
std::vector approach?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77621: ADT: SmallVector size & capacity use word-size integers when elements are small.

2020-04-17 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 258444.
browneee added a comment.

Rebase to latest HEAD.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  llvm/include/llvm/ADT/SmallVector.h
  llvm/lib/Support/SmallVector.cpp

Index: llvm/lib/Support/SmallVector.cpp
===
--- llvm/lib/Support/SmallVector.cpp
+++ llvm/lib/Support/SmallVector.cpp
@@ -36,30 +36,6 @@
 static_assert(sizeof(SmallVector) ==
   sizeof(unsigned) * 2 + sizeof(void *) * 2,
   "wasted space in SmallVector size 1");
-
-/// grow_pod - This is an implementation of the grow() method which only works
-/// on POD-like datatypes and is out of line to reduce code duplication.
-void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
-   size_t TSize) {
-  // Ensure we can fit the new capacity in 32 bits.
-  if (MinCapacity > UINT32_MAX)
-report_bad_alloc_error("SmallVector capacity overflow during allocation");
-
-  size_t NewCapacity = 2 * capacity() + 1; // Always grow.
-  NewCapacity =
-  std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX));
-
-  void *NewElts;
-  if (BeginX == FirstEl) {
-NewElts = safe_malloc(NewCapacity * TSize);
-
-// Copy the elements over.  No need to run dtors on PODs.
-memcpy(NewElts, this->BeginX, size() * TSize);
-  } else {
-// If this wasn't grown from the inline copy, grow the allocated space.
-NewElts = safe_realloc(this->BeginX, NewCapacity * TSize);
-  }
-
-  this->BeginX = NewElts;
-  this->Capacity = NewCapacity;
-}
+static_assert(sizeof(SmallVector) ==
+  sizeof(void *) * 2 + sizeof(void *),
+  "1 byte elements have word-sized type for size and capacity");
Index: llvm/include/llvm/ADT/SmallVector.h
===
--- llvm/include/llvm/ADT/SmallVector.h
+++ llvm/include/llvm/ADT/SmallVector.h
@@ -16,10 +16,10 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemAlloc.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/Support/ErrorHandling.h"
 #include 
 #include 
 #include 
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -34,11 +35,23 @@
 
 namespace llvm {
 
-/// This is all the non-templated stuff common to all SmallVectors.
-class SmallVectorBase {
+/// This is all the stuff common to all SmallVectors.
+///
+/// The template parameter specifies the type which should be used to hold the
+/// Size and Capacity of the SmallVector, so it can be adjusted.
+/// Using 32 bit size is desirable to shink the size of the SmallVector.
+/// Using 64 bit size is desirable for cases like SmallVector, where a
+/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
+/// buffering bitcode output - which can exceed 4GB.
+template  class SmallVectorBase {
 protected:
   void *BeginX;
-  unsigned Size = 0, Capacity;
+  Size_T Size = 0, Capacity;
+
+  /// The maximum value of the Size_T used.
+  static constexpr size_t SizeTypeMax() {
+return std::numeric_limits::max();
+  }
 
   SmallVectorBase() = delete;
   SmallVectorBase(void *FirstEl, size_t TotalCapacity)
@@ -46,7 +59,39 @@
 
   /// This is an implementation of the grow() method which only works
   /// on POD-like data types and is out of line to reduce code duplication.
-  void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize);
+  /// This function will report a fatal error if it cannot increase capacity.
+  void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize) {
+// Ensure we can fit the new capacity.
+// This is only going to be applicable if the when the capacity is 32 bit.
+if (MinCapacity > SizeTypeMax())
+  report_bad_alloc_error("SmallVector capacity overflow during allocation");
+
+// Ensure we can meet the guarantee of space for at least one more element.
+// The above check alone will not catch the case where grow is called with a
+// default MinCapacity of 0, but the current capacity cannot be increased.
+// This is only going to be applicable if the when the capacity is 32 bit.
+if (capacity() == SizeTypeMax())
+  report_bad_alloc_error("SmallVector capacity unable to grow");
+
+// In theory 2*capacity can overflow if the capacity is 64 bit, but the
+// original capacity would never be large enough for this to be a problem.
+size_t NewCapacity = 2 * capacity() + 1; // Always grow.
+NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeTypeMax());
+
+void *NewElts;
+if (BeginX == FirstEl) {
+  NewElts = safe_malloc(NewCapacity * TSize);
+
+  // Copy the elements over.  No need to run 

[PATCH] D77621: ADT: SmallVector size & capacity use word-size integers when elements are small.

2020-04-15 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 257861.
browneee added a comment.

Rebase to latest HEAD.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  llvm/include/llvm/ADT/SmallVector.h
  llvm/lib/Support/SmallVector.cpp

Index: llvm/lib/Support/SmallVector.cpp
===
--- llvm/lib/Support/SmallVector.cpp
+++ llvm/lib/Support/SmallVector.cpp
@@ -36,30 +36,6 @@
 static_assert(sizeof(SmallVector) ==
   sizeof(unsigned) * 2 + sizeof(void *) * 2,
   "wasted space in SmallVector size 1");
-
-/// grow_pod - This is an implementation of the grow() method which only works
-/// on POD-like datatypes and is out of line to reduce code duplication.
-void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
-   size_t TSize) {
-  // Ensure we can fit the new capacity in 32 bits.
-  if (MinCapacity > UINT32_MAX)
-report_bad_alloc_error("SmallVector capacity overflow during allocation");
-
-  size_t NewCapacity = 2 * capacity() + 1; // Always grow.
-  NewCapacity =
-  std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX));
-
-  void *NewElts;
-  if (BeginX == FirstEl) {
-NewElts = safe_malloc(NewCapacity * TSize);
-
-// Copy the elements over.  No need to run dtors on PODs.
-memcpy(NewElts, this->BeginX, size() * TSize);
-  } else {
-// If this wasn't grown from the inline copy, grow the allocated space.
-NewElts = safe_realloc(this->BeginX, NewCapacity * TSize);
-  }
-
-  this->BeginX = NewElts;
-  this->Capacity = NewCapacity;
-}
+static_assert(sizeof(SmallVector) ==
+  sizeof(void *) * 2 + sizeof(void *),
+  "1 byte elements have word-sized type for size and capacity");
Index: llvm/include/llvm/ADT/SmallVector.h
===
--- llvm/include/llvm/ADT/SmallVector.h
+++ llvm/include/llvm/ADT/SmallVector.h
@@ -16,10 +16,10 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemAlloc.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/Support/ErrorHandling.h"
 #include 
 #include 
 #include 
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -34,11 +35,23 @@
 
 namespace llvm {
 
-/// This is all the non-templated stuff common to all SmallVectors.
-class SmallVectorBase {
+/// This is all the stuff common to all SmallVectors.
+///
+/// The template parameter specifies the type which should be used to hold the
+/// Size and Capacity of the SmallVector, so it can be adjusted.
+/// Using 32 bit size is desirable to shink the size of the SmallVector.
+/// Using 64 bit size is desirable for cases like SmallVector, where a
+/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
+/// buffering bitcode output - which can exceed 4GB.
+template  class SmallVectorBase {
 protected:
   void *BeginX;
-  unsigned Size = 0, Capacity;
+  Size_T Size = 0, Capacity;
+
+  /// The maximum value of the Size_T used.
+  static constexpr size_t SizeTypeMax() {
+return std::numeric_limits::max();
+  }
 
   SmallVectorBase() = delete;
   SmallVectorBase(void *FirstEl, size_t TotalCapacity)
@@ -46,7 +59,39 @@
 
   /// This is an implementation of the grow() method which only works
   /// on POD-like data types and is out of line to reduce code duplication.
-  void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize);
+  /// This function will report a fatal error if it cannot increase capacity.
+  void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize) {
+// Ensure we can fit the new capacity.
+// This is only going to be applicable if the when the capacity is 32 bit.
+if (MinCapacity > SizeTypeMax())
+  report_bad_alloc_error("SmallVector capacity overflow during allocation");
+
+// Ensure we can meet the guarantee of space for at least one more element.
+// The above check alone will not catch the case where grow is called with a
+// default MinCapacity of 0, but the current capacity cannot be increased.
+// This is only going to be applicable if the when the capacity is 32 bit.
+if (capacity() == SizeTypeMax())
+  report_bad_alloc_error("SmallVector capacity unable to grow");
+
+// In theory 2*capacity can overflow if the capacity is 64 bit, but the
+// original capacity would never be large enough for this to be a problem.
+size_t NewCapacity = 2 * capacity() + 1; // Always grow.
+NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeTypeMax());
+
+void *NewElts;
+if (BeginX == FirstEl) {
+  NewElts = safe_malloc(NewCapacity * TSize);
+
+  // Copy the elements over.  No need to run 

[PATCH] D77621: ADT: SmallVector size & capacity use word-size integers when elements are small.

2020-04-13 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

GIT_COMMITTER_NAME=Andrew Browne
GIT_COMMITTER_EMAIL=brown...@google.com

This would be my second commit. I will request access next time - thanks 
@dexonsmith!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77621: ADT: SmallVector size & capacity use word-size integers when elements are small.

2020-04-13 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

I'm open to suggestions to resolve the clang tidy naming warnings. I would 
prefer to leave grow_pod the same, to minimize changes.

@dexonsmith I am not a committer, if the last changes looks good please submit 
for me. Thanks!




Comment at: llvm/include/llvm/ADT/SmallVector.h:52
+  // The maximum size depends on size_type used.
+  static constexpr size_t SizeMax() {
+return std::numeric_limits::max();

dexonsmith wrote:
> STL data structures have a name for this called `max_size()`.  Should we be 
> consistent with that?
Good question.

This brought my attention to the existing SmallVectorTemplateCommon::max_size() 
which also needed to be updated.
I'm going to name this new function SizeTypeMax to best describe what it 
provides, and leave it separate from max_size().



Comment at: llvm/include/llvm/ADT/SmallVector.h:179
+  using Base::empty;
+  using Base::size;
+

dexonsmith wrote:
> Optionally we could expose `max_size()` as well.
Not done. Updated existing max_size() instead.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77621: ADT: SmallVector size & capacity use word-size integers when elements are small.

2020-04-13 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 257130.
browneee marked 3 inline comments as done.
browneee added a comment.

Rename SizeMax() to SizeTypeMax(). Fix max_size().


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  llvm/include/llvm/ADT/SmallVector.h
  llvm/lib/Support/SmallVector.cpp

Index: llvm/lib/Support/SmallVector.cpp
===
--- llvm/lib/Support/SmallVector.cpp
+++ llvm/lib/Support/SmallVector.cpp
@@ -36,30 +36,6 @@
 static_assert(sizeof(SmallVector) ==
   sizeof(unsigned) * 2 + sizeof(void *) * 2,
   "wasted space in SmallVector size 1");
-
-/// grow_pod - This is an implementation of the grow() method which only works
-/// on POD-like datatypes and is out of line to reduce code duplication.
-void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
-   size_t TSize) {
-  // Ensure we can fit the new capacity in 32 bits.
-  if (MinCapacity > UINT32_MAX)
-report_bad_alloc_error("SmallVector capacity overflow during allocation");
-
-  size_t NewCapacity = 2 * capacity() + 1; // Always grow.
-  NewCapacity =
-  std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX));
-
-  void *NewElts;
-  if (BeginX == FirstEl) {
-NewElts = safe_malloc(NewCapacity * TSize);
-
-// Copy the elements over.  No need to run dtors on PODs.
-memcpy(NewElts, this->BeginX, size() * TSize);
-  } else {
-// If this wasn't grown from the inline copy, grow the allocated space.
-NewElts = safe_realloc(this->BeginX, NewCapacity * TSize);
-  }
-
-  this->BeginX = NewElts;
-  this->Capacity = NewCapacity;
-}
+static_assert(sizeof(SmallVector) ==
+  sizeof(void *) * 2 + sizeof(void *),
+  "1 byte elements have word-sized type for size and capacity");
Index: llvm/include/llvm/ADT/SmallVector.h
===
--- llvm/include/llvm/ADT/SmallVector.h
+++ llvm/include/llvm/ADT/SmallVector.h
@@ -16,10 +16,10 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemAlloc.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/Support/ErrorHandling.h"
 #include 
 #include 
 #include 
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -34,11 +35,23 @@
 
 namespace llvm {
 
-/// This is all the non-templated stuff common to all SmallVectors.
-class SmallVectorBase {
+/// This is all the stuff common to all SmallVectors.
+///
+/// The template parameter specifies the type which should be used to hold the
+/// Size and Capacity of the SmallVector, so it can be adjusted.
+/// Using 32 bit size is desirable to shink the size of the SmallVector.
+/// Using 64 bit size is desirable for cases like SmallVector, where a
+/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
+/// buffering bitcode output - which can exceed 4GB.
+template  class SmallVectorBase {
 protected:
   void *BeginX;
-  unsigned Size = 0, Capacity;
+  Size_T Size = 0, Capacity;
+
+  /// The maximum value of the Size_T used.
+  static constexpr size_t SizeTypeMax() {
+return std::numeric_limits::max();
+  }
 
   SmallVectorBase() = delete;
   SmallVectorBase(void *FirstEl, size_t TotalCapacity)
@@ -46,7 +59,39 @@
 
   /// This is an implementation of the grow() method which only works
   /// on POD-like data types and is out of line to reduce code duplication.
-  void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize);
+  /// This function will report a fatal error if it cannot increase capacity.
+  void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize) {
+// Ensure we can fit the new capacity.
+// This is only going to be applicable if the when the capacity is 32 bit.
+if (MinCapacity > SizeTypeMax())
+  report_bad_alloc_error("SmallVector capacity overflow during allocation");
+
+// Ensure we can meet the guarantee of space for at least one more element.
+// The above check alone will not catch the case where grow is called with a
+// default MinCapacity of 0, but the current capacity cannot be increased.
+// This is only going to be applicable if the when the capacity is 32 bit.
+if (capacity() == SizeTypeMax())
+  report_bad_alloc_error("SmallVector capacity unable to grow");
+
+// In theory 2*capacity can overflow if the capacity is 64 bit, but the
+// original capacity would never be large enough for this to be a problem.
+size_t NewCapacity = 2 * capacity() + 1; // Always grow.
+NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeTypeMax());
+
+void *NewElts;
+if (BeginX == FirstEl) {
+  NewElts = 

[PATCH] D77621: ADT: SmallVector size & capacity use word-size integers when elements are small.

2020-04-13 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 257044.
browneee added a comment.

Changed SizeMax to static constexpr function.
Changed static asserts.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  llvm/include/llvm/ADT/SmallVector.h
  llvm/lib/Support/SmallVector.cpp

Index: llvm/lib/Support/SmallVector.cpp
===
--- llvm/lib/Support/SmallVector.cpp
+++ llvm/lib/Support/SmallVector.cpp
@@ -36,30 +36,6 @@
 static_assert(sizeof(SmallVector) ==
   sizeof(unsigned) * 2 + sizeof(void *) * 2,
   "wasted space in SmallVector size 1");
-
-/// grow_pod - This is an implementation of the grow() method which only works
-/// on POD-like datatypes and is out of line to reduce code duplication.
-void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
-   size_t TSize) {
-  // Ensure we can fit the new capacity in 32 bits.
-  if (MinCapacity > UINT32_MAX)
-report_bad_alloc_error("SmallVector capacity overflow during allocation");
-
-  size_t NewCapacity = 2 * capacity() + 1; // Always grow.
-  NewCapacity =
-  std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX));
-
-  void *NewElts;
-  if (BeginX == FirstEl) {
-NewElts = safe_malloc(NewCapacity * TSize);
-
-// Copy the elements over.  No need to run dtors on PODs.
-memcpy(NewElts, this->BeginX, size() * TSize);
-  } else {
-// If this wasn't grown from the inline copy, grow the allocated space.
-NewElts = safe_realloc(this->BeginX, NewCapacity * TSize);
-  }
-
-  this->BeginX = NewElts;
-  this->Capacity = NewCapacity;
-}
+static_assert(sizeof(SmallVector) ==
+  sizeof(void *) * 2 + sizeof(void *),
+  "1 byte elements have word-sized type for size and capacity");
Index: llvm/include/llvm/ADT/SmallVector.h
===
--- llvm/include/llvm/ADT/SmallVector.h
+++ llvm/include/llvm/ADT/SmallVector.h
@@ -16,10 +16,10 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemAlloc.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/Support/ErrorHandling.h"
 #include 
 #include 
 #include 
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -34,11 +35,23 @@
 
 namespace llvm {
 
-/// This is all the non-templated stuff common to all SmallVectors.
-class SmallVectorBase {
+/// This is all the stuff common to all SmallVectors.
+///
+/// The template parameter specifies the type which should be used to hold the
+/// Size and Capacity of the SmallVector, so it can be adjusted.
+/// Using 32 bit size is desirable to shink the size of the SmallVector.
+/// Using 64 bit size is desirable for cases like SmallVector, where a
+/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
+/// buffering bitcode output - which can exceed 4GB.
+template  class SmallVectorBase {
 protected:
   void *BeginX;
-  unsigned Size = 0, Capacity;
+  Size_T Size = 0, Capacity;
+
+  // The maximum size depends on size_type used.
+  static constexpr size_t SizeMax() {
+return std::numeric_limits::max();
+  }
 
   SmallVectorBase() = delete;
   SmallVectorBase(void *FirstEl, size_t TotalCapacity)
@@ -46,7 +59,39 @@
 
   /// This is an implementation of the grow() method which only works
   /// on POD-like data types and is out of line to reduce code duplication.
-  void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize);
+  /// This function will report a fatal error if it cannot increase capacity.
+  void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize) {
+// Ensure we can fit the new capacity.
+// This is only going to be applicable if the when the capacity is 32 bit.
+if (MinCapacity > SizeMax())
+  report_bad_alloc_error("SmallVector capacity overflow during allocation");
+
+// Ensure we can meet the guarantee of space for at least one more element.
+// The above check alone will not catch the case where grow is called with a
+// default MinCapacity of 0, but the current capacity cannot be increased.
+// This is only going to be applicable if the when the capacity is 32 bit.
+if (capacity() == SizeMax())
+  report_bad_alloc_error("SmallVector capacity unable to grow");
+
+// In theory 2*capacity can overflow if the capacity is 64 bit, but the
+// original capacity would never be large enough for this to be a problem.
+size_t NewCapacity = 2 * capacity() + 1; // Always grow.
+NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeMax());
+
+void *NewElts;
+if (BeginX == FirstEl) {
+  NewElts = safe_malloc(NewCapacity * TSize);
+
+  // Copy 

[PATCH] D77621: ADT: SmallVector size & capacity use word-size integers when elements are small.

2020-04-11 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 256803.
browneee marked 3 inline comments as done.
browneee added a comment.

Address comments from dblaikie.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  llvm/include/llvm/ADT/SmallVector.h
  llvm/lib/Support/SmallVector.cpp

Index: llvm/lib/Support/SmallVector.cpp
===
--- llvm/lib/Support/SmallVector.cpp
+++ llvm/lib/Support/SmallVector.cpp
@@ -37,29 +37,11 @@
   sizeof(unsigned) * 2 + sizeof(void *) * 2,
   "wasted space in SmallVector size 1");
 
-/// grow_pod - This is an implementation of the grow() method which only works
-/// on POD-like datatypes and is out of line to reduce code duplication.
-void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
-   size_t TSize) {
-  // Ensure we can fit the new capacity in 32 bits.
-  if (MinCapacity > UINT32_MAX)
-report_bad_alloc_error("SmallVector capacity overflow during allocation");
-
-  size_t NewCapacity = 2 * capacity() + 1; // Always grow.
-  NewCapacity =
-  std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX));
-
-  void *NewElts;
-  if (BeginX == FirstEl) {
-NewElts = safe_malloc(NewCapacity * TSize);
-
-// Copy the elements over.  No need to run dtors on PODs.
-memcpy(NewElts, this->BeginX, size() * TSize);
-  } else {
-// If this wasn't grown from the inline copy, grow the allocated space.
-NewElts = safe_realloc(this->BeginX, NewCapacity * TSize);
-  }
-
-  this->BeginX = NewElts;
-  this->Capacity = NewCapacity;
-}
+// Check that SmallVector with 1 byte elements takes extra space due to using
+// uintptr_r instead of uint32_t for size and capacity.
+static_assert(sizeof(SmallVector) > sizeof(SmallVector) ||
+  sizeof(uintptr_t) == sizeof(uint32_t),
+  "1 byte elements should cause larger size and capacity type");
+static_assert(sizeof(SmallVector) ==
+  sizeof(SmallVector),
+  "larger elements should keep the same size and capacity type");
Index: llvm/include/llvm/ADT/SmallVector.h
===
--- llvm/include/llvm/ADT/SmallVector.h
+++ llvm/include/llvm/ADT/SmallVector.h
@@ -16,10 +16,10 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemAlloc.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/Support/ErrorHandling.h"
 #include 
 #include 
 #include 
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -34,11 +35,21 @@
 
 namespace llvm {
 
-/// This is all the non-templated stuff common to all SmallVectors.
-class SmallVectorBase {
+/// This is all the stuff common to all SmallVectors.
+///
+/// The template parameter specifies the type which should be used to hold the
+/// Size and Capacity of the SmallVector, so it can be adjusted.
+/// Using 32 bit size is desirable to shink the size of the SmallVector.
+/// Using 64 bit size is desirable for cases like SmallVector, where a
+/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
+/// buffering bitcode output - which can exceed 4GB.
+template  class SmallVectorBase {
 protected:
   void *BeginX;
-  unsigned Size = 0, Capacity;
+  Size_T Size = 0, Capacity;
+
+  // The maximum size depends on size_type used.
+  static constexpr size_t SizeMax = std::numeric_limits::max();
 
   SmallVectorBase() = delete;
   SmallVectorBase(void *FirstEl, size_t TotalCapacity)
@@ -46,7 +57,39 @@
 
   /// This is an implementation of the grow() method which only works
   /// on POD-like data types and is out of line to reduce code duplication.
-  void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize);
+  /// This function will report a fatal error if it cannot increase capacity.
+  void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize) {
+// Ensure we can fit the new capacity.
+// This is only going to be applicable if the when the capacity is 32 bit.
+if (MinCapacity > SizeMax)
+  report_bad_alloc_error("SmallVector capacity overflow during allocation");
+
+// Ensure we can meet the guarantee of space for at least one more element.
+// The above check alone will not catch the case where grow is called with a
+// default MinCapacity of 0, but the current capacity cannot be increased.
+// This is only going to be applicable if the when the capacity is 32 bit.
+if (capacity() == SizeMax)
+  report_bad_alloc_error("SmallVector capacity unable to grow");
+
+// In theory 2*capacity can overflow if the capacity is 64 bit, but the
+// original capacity would never be large enough for this to be a 

[PATCH] D77621: Change BitcodeWriter buffer to std::vector instead of SmallVector.

2020-04-10 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 256621.
browneee added a comment.

Change to suggested approach: size and capacity type conditionally larger for 
small element types.

Also incorporate https://reviews.llvm.org/D77601


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  llvm/include/llvm/ADT/SmallVector.h
  llvm/lib/Support/SmallVector.cpp

Index: llvm/lib/Support/SmallVector.cpp
===
--- llvm/lib/Support/SmallVector.cpp
+++ llvm/lib/Support/SmallVector.cpp
@@ -37,29 +37,11 @@
   sizeof(unsigned) * 2 + sizeof(void *) * 2,
   "wasted space in SmallVector size 1");
 
-/// grow_pod - This is an implementation of the grow() method which only works
-/// on POD-like datatypes and is out of line to reduce code duplication.
-void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
-   size_t TSize) {
-  // Ensure we can fit the new capacity in 32 bits.
-  if (MinCapacity > UINT32_MAX)
-report_bad_alloc_error("SmallVector capacity overflow during allocation");
-
-  size_t NewCapacity = 2 * capacity() + 1; // Always grow.
-  NewCapacity =
-  std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX));
-
-  void *NewElts;
-  if (BeginX == FirstEl) {
-NewElts = safe_malloc(NewCapacity * TSize);
-
-// Copy the elements over.  No need to run dtors on PODs.
-memcpy(NewElts, this->BeginX, size() * TSize);
-  } else {
-// If this wasn't grown from the inline copy, grow the allocated space.
-NewElts = safe_realloc(this->BeginX, NewCapacity * TSize);
-  }
-
-  this->BeginX = NewElts;
-  this->Capacity = NewCapacity;
-}
+// Check that SmallVector with 1 byte elements takes extra space due to using
+// uintptr_r instead of uint32_t for size and capacity.
+static_assert(sizeof(SmallVector) > sizeof(SmallVector) ||
+  sizeof(uintptr_t) == sizeof(uint32_t),
+  "1 byte elements should cause larger size and capacity type");
+static_assert(sizeof(SmallVector) ==
+  sizeof(SmallVector),
+  "larger elements should keep the same size and capacity type");
Index: llvm/include/llvm/ADT/SmallVector.h
===
--- llvm/include/llvm/ADT/SmallVector.h
+++ llvm/include/llvm/ADT/SmallVector.h
@@ -34,11 +34,23 @@
 
 namespace llvm {
 
-/// This is all the non-templated stuff common to all SmallVectors.
-class SmallVectorBase {
+/// This is all the stuff common to all SmallVectors.
+///
+/// The template parameter specifies the type which should be used to hold the
+/// Size and Capacity of the SmallVector, so it can be adjusted.
+/// Using 32 bit size is desirable to shink the size of the SmallVector.
+/// Using 64 bit size is desirable for cases like SmallVector, where a
+/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
+/// buffering bitcode output - which can exceed 4GB.
+template  class SmallVectorBase {
 protected:
+  typedef Size_T size_type;
+
   void *BeginX;
-  unsigned Size = 0, Capacity;
+  size_type Size = 0, Capacity;
+
+  // The maximum size depends on size_type used.
+  size_t SizeMax() { return size_type(-1ULL); }
 
   SmallVectorBase() = delete;
   SmallVectorBase(void *FirstEl, size_t TotalCapacity)
@@ -46,7 +58,39 @@
 
   /// This is an implementation of the grow() method which only works
   /// on POD-like data types and is out of line to reduce code duplication.
-  void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize);
+  /// This function will report a fatal error if it cannot increase capacity.
+  void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize) {
+// Ensure we can fit the new capacity.
+// This is only going to be applicable if the when the capacity is 32 bit.
+if (MinCapacity > SizeMax())
+  report_bad_alloc_error("SmallVector capacity overflow during allocation");
+
+// Ensure we can meet the guarantee of space for at least one more element.
+// The above check alone will not catch the case where grow is called with a
+// default MinCapacity of 0, but the current capacity cannot be increased.
+// This is only going to be applicable if the when the capacity is 32 bit.
+if (capacity() == SizeMax())
+  report_bad_alloc_error("SmallVector capacity unable to grow");
+
+// In theory 2*capacity can overflow if the capacity is 64 bit, but the
+// original capacity would never be large enough for this to be a problem.
+size_t NewCapacity = 2 * capacity() + 1; // Always grow.
+NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeMax());
+
+void *NewElts;
+if (BeginX == FirstEl) {
+  NewElts = safe_malloc(NewCapacity * TSize);
+
+  // Copy the elements over.  No need to run dtors on PODs.
+  memcpy(NewElts, this->BeginX, size() 

[PATCH] D77621: Change BitcodeWriter buffer to std::vector instead of SmallVector.

2020-04-07 Thread Andrew via Phabricator via cfe-commits
browneee added a comment.

In D77621#1968437 , @dexonsmith wrote:

> This is thanks to a commit of mine that shaved a word off of `SmallVector`.  
> Some options to consider:
>
> 1. Revert to word-size integers (`size_t`? `uintptr_t`?) for Size and 
> Capacity for small-enough types.  Could be just if `sizeof(T)==1`.  Or maybe 
> just for `char` and `unsigned char`.
> 2. Revert my patch entirely and go back to words (these used to be `void*`).
> 3. (Your patch, stop using `SmallVector`.)
>
>   I think I would prefer some variation of (1) over (3).


Hi Duncan, thanks for raising these alternatives.

Links to your prior commit for context: Review 
, Commit 


I agree any of these options would solve the issue I'm experiencing.

Option 1:

- I think SmallVectorBase would need to become templated.
- The size related code would need support to two sets of edge cases.
- The varying capacity may be surprising, and adds another variation to both 
both small mode and heap mode.

Option 3:

- This patch is somewhat widespread. A more localized fix may be desirable.
- Some inconvenience of an API change for downstream.

Do we want to increase the complexity of SmallVector somewhat? or do we want to 
keep the limit and affirm SmallVector is for small things?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77621: Change BitcodeWriter buffer to std::vector instead of SmallVector.

2020-04-07 Thread Andrew via Phabricator via cfe-commits
browneee added inline comments.



Comment at: llvm/include/llvm/Bitstream/BitstreamWriter.h:19
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"

RKSimon wrote:
> Can this be dropped?
It is still used to construct records (line 512).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77621: Change BitcodeWriter buffer to std::vector instead of SmallVector.

2020-04-07 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 255891.
browneee added a comment.

Fix formatting.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  clang-tools-extra/clang-doc/Serialize.cpp
  clang-tools-extra/unittests/clang-doc/BitcodeTest.cpp
  clang/include/clang/Serialization/ASTWriter.h
  clang/include/clang/Serialization/PCHContainerOperations.h
  clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
  clang/lib/Frontend/ASTUnit.cpp
  clang/lib/Frontend/PrecompiledPreamble.cpp
  clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/lib/Serialization/GlobalModuleIndex.cpp
  clang/lib/Serialization/PCHContainerOperations.cpp
  llvm/include/llvm/Bitcode/BitcodeWriter.h
  llvm/include/llvm/Bitstream/BitstreamWriter.h
  llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h
  llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
  llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
  llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
  llvm/tools/llvm-cat/llvm-cat.cpp
  llvm/tools/llvm-modextract/llvm-modextract.cpp
  llvm/unittests/Bitstream/BitstreamReaderTest.cpp
  llvm/unittests/Bitstream/BitstreamWriterTest.cpp

Index: llvm/unittests/Bitstream/BitstreamWriterTest.cpp
===
--- llvm/unittests/Bitstream/BitstreamWriterTest.cpp
+++ llvm/unittests/Bitstream/BitstreamWriterTest.cpp
@@ -8,27 +8,31 @@
 
 #include "llvm/Bitstream/BitstreamWriter.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
+#include 
+
 using namespace llvm;
 
+using ::testing::IsEmpty;
+
 namespace {
 
 TEST(BitstreamWriterTest, emitBlob) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("str", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef("str\0", 4), Buffer);
+  EXPECT_EQ(StringRef("str\0", 4), StringRef(Buffer.data(), Buffer.size()));
 }
 
 TEST(BitstreamWriterTest, emitBlobWithSize) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   {
 BitstreamWriter W(Buffer);
 W.emitBlob("str");
   }
-  SmallString<64> Expected;
+  std::vector Expected;
   {
 BitstreamWriter W(Expected);
 W.EmitVBR(3, 6);
@@ -38,21 +42,21 @@
 W.Emit('r', 8);
 W.Emit(0, 8);
   }
-  EXPECT_EQ(StringRef(Expected), Buffer);
+  EXPECT_EQ(Expected, Buffer);
 }
 
 TEST(BitstreamWriterTest, emitBlobEmpty) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef(""), Buffer);
+  EXPECT_THAT(Buffer, IsEmpty());
 }
 
 TEST(BitstreamWriterTest, emitBlob4ByteAligned) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("str0", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef("str0"), Buffer);
+  EXPECT_EQ(StringRef("str0"), StringRef(Buffer.data(), Buffer.size()));
 }
 
 } // end namespace
Index: llvm/unittests/Bitstream/BitstreamReaderTest.cpp
===
--- llvm/unittests/Bitstream/BitstreamReaderTest.cpp
+++ llvm/unittests/Bitstream/BitstreamReaderTest.cpp
@@ -11,6 +11,8 @@
 #include "llvm/Bitstream/BitstreamWriter.h"
 #include "gtest/gtest.h"
 
+#include 
+
 using namespace llvm;
 
 namespace {
@@ -96,7 +98,7 @@
 StringRef BlobIn((const char *)BlobData.begin(), BlobSize);
 
 // Write the bitcode.
-SmallVector Buffer;
+std::vector Buffer;
 unsigned AbbrevID;
 {
   BitstreamWriter Stream(Buffer);
@@ -115,7 +117,7 @@
 
 // Stream the buffer into the reader.
 BitstreamCursor Stream(
-ArrayRef((const uint8_t *)Buffer.begin(), Buffer.size()));
+ArrayRef((const uint8_t *)Buffer.data(), Buffer.size()));
 
 // Header.  Included in test so that we can run llvm-bcanalyzer to debug
 // when there are problems.
Index: llvm/tools/llvm-modextract/llvm-modextract.cpp
===
--- llvm/tools/llvm-modextract/llvm-modextract.cpp
+++ llvm/tools/llvm-modextract/llvm-modextract.cpp
@@ -58,12 +58,12 @@
   ExitOnErr(errorCodeToError(EC));
 
   if (BinaryExtract) {
-SmallVector Result;
+std::vector Result;
 BitcodeWriter Writer(Result);
-Result.append(Ms[ModuleIndex].getBuffer().begin(),
+Result.insert(Result.end(), Ms[ModuleIndex].getBuffer().begin(),
   Ms[ModuleIndex].getBuffer().end());
 Writer.copyStrtab(Ms[ModuleIndex].getStrtab());
-Out->os() << Result;
+Out->os().write(Result.data(), Result.size());
 Out->keep();
 return 0;
   }
Index: llvm/tools/llvm-cat/llvm-cat.cpp
===
--- llvm/tools/llvm-cat/llvm-cat.cpp
+++ llvm/tools/llvm-cat/llvm-cat.cpp
@@ -54,7 +54,7 @@
   ExitOnError ExitOnErr("llvm-cat: ");
   LLVMContext Context;
 

[PATCH] D77621: Change BitcodeWriter buffer to std::vector instead of SmallVector.

2020-04-07 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 255875.
browneee marked 2 inline comments as done and an inline comment as not done.
browneee added a comment.

Build fixes in additional projects.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  clang-tools-extra/clang-doc/Serialize.cpp
  clang-tools-extra/unittests/clang-doc/BitcodeTest.cpp
  clang/include/clang/Serialization/ASTWriter.h
  clang/include/clang/Serialization/PCHContainerOperations.h
  clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
  clang/lib/Frontend/ASTUnit.cpp
  clang/lib/Frontend/PrecompiledPreamble.cpp
  clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/lib/Serialization/GlobalModuleIndex.cpp
  clang/lib/Serialization/PCHContainerOperations.cpp
  llvm/include/llvm/Bitcode/BitcodeWriter.h
  llvm/include/llvm/Bitstream/BitstreamWriter.h
  llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h
  llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
  llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
  llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
  llvm/tools/llvm-cat/llvm-cat.cpp
  llvm/tools/llvm-modextract/llvm-modextract.cpp
  llvm/unittests/Bitstream/BitstreamReaderTest.cpp
  llvm/unittests/Bitstream/BitstreamWriterTest.cpp

Index: llvm/unittests/Bitstream/BitstreamWriterTest.cpp
===
--- llvm/unittests/Bitstream/BitstreamWriterTest.cpp
+++ llvm/unittests/Bitstream/BitstreamWriterTest.cpp
@@ -8,27 +8,31 @@
 
 #include "llvm/Bitstream/BitstreamWriter.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
+#include 
+
 using namespace llvm;
 
+using ::testing::IsEmpty;
+
 namespace {
 
 TEST(BitstreamWriterTest, emitBlob) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("str", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef("str\0", 4), Buffer);
+  EXPECT_EQ(StringRef("str\0", 4), StringRef(Buffer.data(), Buffer.size()));
 }
 
 TEST(BitstreamWriterTest, emitBlobWithSize) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   {
 BitstreamWriter W(Buffer);
 W.emitBlob("str");
   }
-  SmallString<64> Expected;
+  std::vector Expected;
   {
 BitstreamWriter W(Expected);
 W.EmitVBR(3, 6);
@@ -38,21 +42,21 @@
 W.Emit('r', 8);
 W.Emit(0, 8);
   }
-  EXPECT_EQ(StringRef(Expected), Buffer);
+  EXPECT_EQ(Expected, Buffer);
 }
 
 TEST(BitstreamWriterTest, emitBlobEmpty) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef(""), Buffer);
+  EXPECT_THAT(Buffer, IsEmpty());
 }
 
 TEST(BitstreamWriterTest, emitBlob4ByteAligned) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("str0", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef("str0"), Buffer);
+  EXPECT_EQ(StringRef("str0"), StringRef(Buffer.data(), Buffer.size()));
 }
 
 } // end namespace
Index: llvm/unittests/Bitstream/BitstreamReaderTest.cpp
===
--- llvm/unittests/Bitstream/BitstreamReaderTest.cpp
+++ llvm/unittests/Bitstream/BitstreamReaderTest.cpp
@@ -11,6 +11,8 @@
 #include "llvm/Bitstream/BitstreamWriter.h"
 #include "gtest/gtest.h"
 
+#include 
+
 using namespace llvm;
 
 namespace {
@@ -96,7 +98,7 @@
 StringRef BlobIn((const char *)BlobData.begin(), BlobSize);
 
 // Write the bitcode.
-SmallVector Buffer;
+std::vector Buffer;
 unsigned AbbrevID;
 {
   BitstreamWriter Stream(Buffer);
@@ -115,7 +117,7 @@
 
 // Stream the buffer into the reader.
 BitstreamCursor Stream(
-ArrayRef((const uint8_t *)Buffer.begin(), Buffer.size()));
+ArrayRef((const uint8_t *)Buffer.data(), Buffer.size()));
 
 // Header.  Included in test so that we can run llvm-bcanalyzer to debug
 // when there are problems.
Index: llvm/tools/llvm-modextract/llvm-modextract.cpp
===
--- llvm/tools/llvm-modextract/llvm-modextract.cpp
+++ llvm/tools/llvm-modextract/llvm-modextract.cpp
@@ -58,12 +58,12 @@
   ExitOnErr(errorCodeToError(EC));
 
   if (BinaryExtract) {
-SmallVector Result;
+std::vector Result;
 BitcodeWriter Writer(Result);
-Result.append(Ms[ModuleIndex].getBuffer().begin(),
+Result.insert(Result.end(), Ms[ModuleIndex].getBuffer().begin(),
   Ms[ModuleIndex].getBuffer().end());
 Writer.copyStrtab(Ms[ModuleIndex].getStrtab());
-Out->os() << Result;
+Out->os().write(Result.data(), Result.size());
 Out->keep();
 return 0;
   }
Index: llvm/tools/llvm-cat/llvm-cat.cpp
===
--- llvm/tools/llvm-cat/llvm-cat.cpp
+++ 

[PATCH] D77621: Change BitcodeWriter buffer to std::vector instead of SmallVector.

2020-04-07 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 255856.
browneee added a comment.

Fix build errors. Missed -DLLVM_ENABLE_PROJECTS in previous local test builds.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  clang/include/clang/Serialization/ASTWriter.h
  clang/include/clang/Serialization/PCHContainerOperations.h
  clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
  clang/lib/Frontend/ASTUnit.cpp
  clang/lib/Frontend/PrecompiledPreamble.cpp
  clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/lib/Serialization/GlobalModuleIndex.cpp
  clang/lib/Serialization/PCHContainerOperations.cpp
  llvm/include/llvm/Bitcode/BitcodeWriter.h
  llvm/include/llvm/Bitstream/BitstreamWriter.h
  llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h
  llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
  llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
  llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
  llvm/tools/llvm-cat/llvm-cat.cpp
  llvm/tools/llvm-modextract/llvm-modextract.cpp
  llvm/unittests/Bitstream/BitstreamReaderTest.cpp
  llvm/unittests/Bitstream/BitstreamWriterTest.cpp

Index: llvm/unittests/Bitstream/BitstreamWriterTest.cpp
===
--- llvm/unittests/Bitstream/BitstreamWriterTest.cpp
+++ llvm/unittests/Bitstream/BitstreamWriterTest.cpp
@@ -8,27 +8,31 @@
 
 #include "llvm/Bitstream/BitstreamWriter.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
+#include 
+
 using namespace llvm;
 
+using ::testing::IsEmpty;
+
 namespace {
 
 TEST(BitstreamWriterTest, emitBlob) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("str", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef("str\0", 4), Buffer);
+  EXPECT_EQ(StringRef("str\0", 4), StringRef(Buffer.data(), Buffer.size()));
 }
 
 TEST(BitstreamWriterTest, emitBlobWithSize) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   {
 BitstreamWriter W(Buffer);
 W.emitBlob("str");
   }
-  SmallString<64> Expected;
+  std::vector Expected;
   {
 BitstreamWriter W(Expected);
 W.EmitVBR(3, 6);
@@ -38,21 +42,21 @@
 W.Emit('r', 8);
 W.Emit(0, 8);
   }
-  EXPECT_EQ(StringRef(Expected), Buffer);
+  EXPECT_EQ(Expected, Buffer);
 }
 
 TEST(BitstreamWriterTest, emitBlobEmpty) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef(""), Buffer);
+  EXPECT_THAT(Buffer, IsEmpty());
 }
 
 TEST(BitstreamWriterTest, emitBlob4ByteAligned) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("str0", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef("str0"), Buffer);
+  EXPECT_EQ(StringRef("str0"), StringRef(Buffer.data(), Buffer.size()));
 }
 
 } // end namespace
Index: llvm/unittests/Bitstream/BitstreamReaderTest.cpp
===
--- llvm/unittests/Bitstream/BitstreamReaderTest.cpp
+++ llvm/unittests/Bitstream/BitstreamReaderTest.cpp
@@ -11,6 +11,8 @@
 #include "llvm/Bitstream/BitstreamWriter.h"
 #include "gtest/gtest.h"
 
+#include 
+
 using namespace llvm;
 
 namespace {
@@ -96,7 +98,7 @@
 StringRef BlobIn((const char *)BlobData.begin(), BlobSize);
 
 // Write the bitcode.
-SmallVector Buffer;
+std::vector Buffer;
 unsigned AbbrevID;
 {
   BitstreamWriter Stream(Buffer);
@@ -115,7 +117,7 @@
 
 // Stream the buffer into the reader.
 BitstreamCursor Stream(
-ArrayRef((const uint8_t *)Buffer.begin(), Buffer.size()));
+ArrayRef((const uint8_t *)Buffer.data(), Buffer.size()));
 
 // Header.  Included in test so that we can run llvm-bcanalyzer to debug
 // when there are problems.
Index: llvm/tools/llvm-modextract/llvm-modextract.cpp
===
--- llvm/tools/llvm-modextract/llvm-modextract.cpp
+++ llvm/tools/llvm-modextract/llvm-modextract.cpp
@@ -58,12 +58,12 @@
   ExitOnErr(errorCodeToError(EC));
 
   if (BinaryExtract) {
-SmallVector Result;
+std::vector Result;
 BitcodeWriter Writer(Result);
-Result.append(Ms[ModuleIndex].getBuffer().begin(),
+Result.insert(Result.end(), Ms[ModuleIndex].getBuffer().begin(),
   Ms[ModuleIndex].getBuffer().end());
 Writer.copyStrtab(Ms[ModuleIndex].getStrtab());
-Out->os() << Result;
+Out->os().write(Result.data(), Result.size());
 Out->keep();
 return 0;
   }
Index: llvm/tools/llvm-cat/llvm-cat.cpp
===
--- llvm/tools/llvm-cat/llvm-cat.cpp
+++ llvm/tools/llvm-cat/llvm-cat.cpp
@@ -54,7 +54,7 @@
   ExitOnError ExitOnErr("llvm-cat: ");
   LLVMContext Context;
 
-  SmallVector Buffer;
+  

[PATCH] D77621: Change BitcodeWriter buffer to std::vector instead of SmallVector.

2020-04-07 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 255831.
browneee added a comment.

Fix more build errors.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  clang/include/clang/Serialization/ASTWriter.h
  clang/include/clang/Serialization/PCHContainerOperations.h
  clang/lib/Frontend/ASTUnit.cpp
  clang/lib/Frontend/PrecompiledPreamble.cpp
  clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/lib/Serialization/GlobalModuleIndex.cpp
  llvm/include/llvm/Bitcode/BitcodeWriter.h
  llvm/include/llvm/Bitstream/BitstreamWriter.h
  llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h
  llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
  llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
  llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
  llvm/tools/llvm-cat/llvm-cat.cpp
  llvm/tools/llvm-modextract/llvm-modextract.cpp
  llvm/unittests/Bitstream/BitstreamReaderTest.cpp
  llvm/unittests/Bitstream/BitstreamWriterTest.cpp

Index: llvm/unittests/Bitstream/BitstreamWriterTest.cpp
===
--- llvm/unittests/Bitstream/BitstreamWriterTest.cpp
+++ llvm/unittests/Bitstream/BitstreamWriterTest.cpp
@@ -8,27 +8,31 @@
 
 #include "llvm/Bitstream/BitstreamWriter.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
+#include 
+
 using namespace llvm;
 
+using ::testing::IsEmpty;
+
 namespace {
 
 TEST(BitstreamWriterTest, emitBlob) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("str", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef("str\0", 4), Buffer);
+  EXPECT_EQ(StringRef("str\0", 4), StringRef(Buffer.data(), Buffer.size()));
 }
 
 TEST(BitstreamWriterTest, emitBlobWithSize) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   {
 BitstreamWriter W(Buffer);
 W.emitBlob("str");
   }
-  SmallString<64> Expected;
+  std::vector Expected;
   {
 BitstreamWriter W(Expected);
 W.EmitVBR(3, 6);
@@ -38,21 +42,21 @@
 W.Emit('r', 8);
 W.Emit(0, 8);
   }
-  EXPECT_EQ(StringRef(Expected), Buffer);
+  EXPECT_EQ(Expected, Buffer);
 }
 
 TEST(BitstreamWriterTest, emitBlobEmpty) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef(""), Buffer);
+  EXPECT_THAT(Buffer, IsEmpty());
 }
 
 TEST(BitstreamWriterTest, emitBlob4ByteAligned) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("str0", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef("str0"), Buffer);
+  EXPECT_EQ(StringRef("str0"), StringRef(Buffer.data(), Buffer.size()));
 }
 
 } // end namespace
Index: llvm/unittests/Bitstream/BitstreamReaderTest.cpp
===
--- llvm/unittests/Bitstream/BitstreamReaderTest.cpp
+++ llvm/unittests/Bitstream/BitstreamReaderTest.cpp
@@ -11,6 +11,8 @@
 #include "llvm/Bitstream/BitstreamWriter.h"
 #include "gtest/gtest.h"
 
+#include 
+
 using namespace llvm;
 
 namespace {
@@ -96,7 +98,7 @@
 StringRef BlobIn((const char *)BlobData.begin(), BlobSize);
 
 // Write the bitcode.
-SmallVector Buffer;
+std::vector Buffer;
 unsigned AbbrevID;
 {
   BitstreamWriter Stream(Buffer);
@@ -115,7 +117,7 @@
 
 // Stream the buffer into the reader.
 BitstreamCursor Stream(
-ArrayRef((const uint8_t *)Buffer.begin(), Buffer.size()));
+ArrayRef((const uint8_t *)Buffer.data(), Buffer.size()));
 
 // Header.  Included in test so that we can run llvm-bcanalyzer to debug
 // when there are problems.
Index: llvm/tools/llvm-modextract/llvm-modextract.cpp
===
--- llvm/tools/llvm-modextract/llvm-modextract.cpp
+++ llvm/tools/llvm-modextract/llvm-modextract.cpp
@@ -58,12 +58,12 @@
   ExitOnErr(errorCodeToError(EC));
 
   if (BinaryExtract) {
-SmallVector Result;
+std::vector Result;
 BitcodeWriter Writer(Result);
-Result.append(Ms[ModuleIndex].getBuffer().begin(),
+Result.insert(Result.end(), Ms[ModuleIndex].getBuffer().begin(),
   Ms[ModuleIndex].getBuffer().end());
 Writer.copyStrtab(Ms[ModuleIndex].getStrtab());
-Out->os() << Result;
+Out->os().write(Result.data(), Result.size());
 Out->keep();
 return 0;
   }
Index: llvm/tools/llvm-cat/llvm-cat.cpp
===
--- llvm/tools/llvm-cat/llvm-cat.cpp
+++ llvm/tools/llvm-cat/llvm-cat.cpp
@@ -54,7 +54,7 @@
   ExitOnError ExitOnErr("llvm-cat: ");
   LLVMContext Context;
 
-  SmallVector Buffer;
+  std::vector Buffer;
   BitcodeWriter Writer(Buffer);
   if (BinaryCat) {
 for (const auto  : InputFilenames) {
Index: llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp

[PATCH] D77621: Change BitcodeWriter buffer to std::vector instead of SmallVector.

2020-04-07 Thread Andrew via Phabricator via cfe-commits
browneee updated this revision to Diff 255773.
browneee marked an inline comment as done.
browneee added a comment.
Herald added subscribers: cfe-commits, arphaman.
Herald added a project: clang.

Fix build.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77621/new/

https://reviews.llvm.org/D77621

Files:
  clang/include/clang/Serialization/ASTWriter.h
  clang/include/clang/Serialization/PCHContainerOperations.h
  clang/lib/Frontend/ASTUnit.cpp
  clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/lib/Serialization/GlobalModuleIndex.cpp
  llvm/include/llvm/Bitcode/BitcodeWriter.h
  llvm/include/llvm/Bitstream/BitstreamWriter.h
  llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h
  llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
  llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
  llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
  llvm/tools/llvm-cat/llvm-cat.cpp
  llvm/tools/llvm-modextract/llvm-modextract.cpp
  llvm/unittests/Bitstream/BitstreamReaderTest.cpp
  llvm/unittests/Bitstream/BitstreamWriterTest.cpp

Index: llvm/unittests/Bitstream/BitstreamWriterTest.cpp
===
--- llvm/unittests/Bitstream/BitstreamWriterTest.cpp
+++ llvm/unittests/Bitstream/BitstreamWriterTest.cpp
@@ -8,27 +8,31 @@
 
 #include "llvm/Bitstream/BitstreamWriter.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
+#include 
+
 using namespace llvm;
 
+using ::testing::IsEmpty;
+
 namespace {
 
 TEST(BitstreamWriterTest, emitBlob) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("str", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef("str\0", 4), Buffer);
+  EXPECT_EQ(StringRef("str\0", 4), StringRef(Buffer.data(), Buffer.size()));
 }
 
 TEST(BitstreamWriterTest, emitBlobWithSize) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   {
 BitstreamWriter W(Buffer);
 W.emitBlob("str");
   }
-  SmallString<64> Expected;
+  std::vector Expected;
   {
 BitstreamWriter W(Expected);
 W.EmitVBR(3, 6);
@@ -38,21 +42,21 @@
 W.Emit('r', 8);
 W.Emit(0, 8);
   }
-  EXPECT_EQ(StringRef(Expected), Buffer);
+  EXPECT_EQ(Expected, Buffer);
 }
 
 TEST(BitstreamWriterTest, emitBlobEmpty) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef(""), Buffer);
+  EXPECT_THAT(Buffer, IsEmpty());
 }
 
 TEST(BitstreamWriterTest, emitBlob4ByteAligned) {
-  SmallString<64> Buffer;
+  std::vector Buffer;
   BitstreamWriter W(Buffer);
   W.emitBlob("str0", /* ShouldEmitSize */ false);
-  EXPECT_EQ(StringRef("str0"), Buffer);
+  EXPECT_EQ(StringRef("str0"), StringRef(Buffer.data(), Buffer.size()));
 }
 
 } // end namespace
Index: llvm/unittests/Bitstream/BitstreamReaderTest.cpp
===
--- llvm/unittests/Bitstream/BitstreamReaderTest.cpp
+++ llvm/unittests/Bitstream/BitstreamReaderTest.cpp
@@ -11,6 +11,8 @@
 #include "llvm/Bitstream/BitstreamWriter.h"
 #include "gtest/gtest.h"
 
+#include 
+
 using namespace llvm;
 
 namespace {
@@ -96,7 +98,7 @@
 StringRef BlobIn((const char *)BlobData.begin(), BlobSize);
 
 // Write the bitcode.
-SmallVector Buffer;
+std::vector Buffer;
 unsigned AbbrevID;
 {
   BitstreamWriter Stream(Buffer);
@@ -115,7 +117,7 @@
 
 // Stream the buffer into the reader.
 BitstreamCursor Stream(
-ArrayRef((const uint8_t *)Buffer.begin(), Buffer.size()));
+ArrayRef((const uint8_t *)Buffer.data(), Buffer.size()));
 
 // Header.  Included in test so that we can run llvm-bcanalyzer to debug
 // when there are problems.
Index: llvm/tools/llvm-modextract/llvm-modextract.cpp
===
--- llvm/tools/llvm-modextract/llvm-modextract.cpp
+++ llvm/tools/llvm-modextract/llvm-modextract.cpp
@@ -58,12 +58,12 @@
   ExitOnErr(errorCodeToError(EC));
 
   if (BinaryExtract) {
-SmallVector Result;
+std::vector Result;
 BitcodeWriter Writer(Result);
-Result.append(Ms[ModuleIndex].getBuffer().begin(),
+Result.insert(Result.end(), Ms[ModuleIndex].getBuffer().begin(),
   Ms[ModuleIndex].getBuffer().end());
 Writer.copyStrtab(Ms[ModuleIndex].getStrtab());
-Out->os() << Result;
+Out->os().write(Result.data(), Result.size());
 Out->keep();
 return 0;
   }
Index: llvm/tools/llvm-cat/llvm-cat.cpp
===
--- llvm/tools/llvm-cat/llvm-cat.cpp
+++ llvm/tools/llvm-cat/llvm-cat.cpp
@@ -54,7 +54,7 @@
   ExitOnError ExitOnErr("llvm-cat: ");
   LLVMContext Context;
 
-  SmallVector Buffer;
+  std::vector Buffer;
   BitcodeWriter Writer(Buffer);
   if (BinaryCat) {
 for (const auto  :