Re: [RFA][PATCH 3/4] Trim mem* calls in DSE

2017-01-12 Thread Jeff Law


Richi indicated he wants this to be included in gcc-7.  So I've updated 
the mem* trimming patch with his comments.


The unnecessary SSA updates are gone.  Use wide_int_to_tree rather than 
building/folding expressions we know will result in constants.  Simplify 
incrementing the start address by re-folding the MEM_REF.


Again, these were almost verbatim drop-ins, so I'm going to assume the 
"looks ok" to be approval once prereqs are approved.


This has been bootstrapped and regression tested on top of patches 1a, 
1b, 2 and with patch #4 as well (patch #4 is not on the table for gcc-7 
and isn't being reposted at this point).


Jeff
* tree-ssa-dse.c (decrement_count): New function.
(increment_start_addr, maybe_trim_memstar_call): Likewise.
(dse_dom_walker::optimize_stmt): Call maybe_trim_memstar_call directly
when we know the partially dead statement is a mem* function.

* gcc.dg/tree-ssa/ssa-dse-25.c: New test.

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c
new file mode 100644
index 000..8b7db3a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse1-details -w" } */
+
+char z[32];
+
+
+int
+foo(void)
+{
+  memset (z, 0, 16);
+  memset (z+8, 0, 24);
+}
+
+/* { dg-final { scan-tree-dump "memset ., 0, 8." "dse1" } } */
+
+
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 83ce29b..20cf3b4 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -332,6 +332,99 @@ maybe_trim_constructor_store (ao_ref *ref, sbitmap live, 
gimple *stmt)
 }
 }
 
+/* STMT is a memcpy, memmove or memset.  Decrement the number of bytes
+   copied/set by DECREMENT.  */
+static void
+decrement_count (gimple *stmt, int decrement)
+{
+  tree *countp = gimple_call_arg_ptr (stmt, 2);
+  gcc_assert (TREE_CODE (*countp) == INTEGER_CST);
+  *countp = wide_int_to_tree (TREE_TYPE (*countp), (TREE_INT_CST_LOW (*countp)
+   - decrement));
+
+}
+
+static void
+increment_start_addr (gimple *stmt, tree *where, int increment)
+{
+  if (TREE_CODE (*where) == SSA_NAME)
+{
+  tree tem = make_ssa_name (TREE_TYPE (*where));
+  gassign *newop
+= gimple_build_assign (tem, POINTER_PLUS_EXPR, *where,
+  build_int_cst (sizetype, increment));
+  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+  gsi_insert_before (, newop, GSI_SAME_STMT);
+  *where = tem;
+  update_stmt (gsi_stmt (gsi));
+  return;
+}
+
+  *where = build_fold_addr_expr (fold_build2 (MEM_REF, char_type_node,
+ *where,
+ build_int_cst (ptr_type_node,
+increment)));
+}
+
+/* STMT is builtin call that writes bytes in bitmap ORIG, some bytes are dead
+   (ORIG & ~NEW) and need not be stored.  Try to rewrite STMT to reduce
+   the amount of data it actually writes.
+
+   Right now we only support trimming from the head or the tail of the
+   memory region.  In theory we could split the mem* call, but it's
+   likely of marginal value.  */
+
+static void
+maybe_trim_memstar_call (ao_ref *ref, sbitmap live, gimple *stmt)
+{
+  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
+{
+case BUILT_IN_MEMCPY:
+case BUILT_IN_MEMMOVE:
+  {
+   int head_trim, tail_trim;
+   compute_trims (ref, live, _trim, _trim);
+
+   /* Tail trimming is easy, we can just reduce the count.  */
+if (tail_trim)
+ decrement_count (stmt, tail_trim);
+
+   /* Head trimming requires adjusting all the arguments.  */
+if (head_trim)
+  {
+   tree *dst = gimple_call_arg_ptr (stmt, 0);
+   increment_start_addr (stmt, dst, head_trim);
+   tree *src = gimple_call_arg_ptr (stmt, 1);
+   increment_start_addr (stmt, src, head_trim);
+   decrement_count (stmt, head_trim);
+ }
+break;
+  }
+
+case BUILT_IN_MEMSET:
+  {
+   int head_trim, tail_trim;
+   compute_trims (ref, live, _trim, _trim);
+
+   /* Tail trimming is easy, we can just reduce the count.  */
+if (tail_trim)
+ decrement_count (stmt, tail_trim);
+
+   /* Head trimming requires adjusting all the arguments.  */
+if (head_trim)
+  {
+   tree *dst = gimple_call_arg_ptr (stmt, 0);
+   increment_start_addr (stmt, dst, head_trim);
+   decrement_count (stmt, head_trim);
+ }
+   break;
+  }
+
+  default:
+   break;
+}
+}
+
 /* STMT is a memory write where one or more bytes written are dead
stores.  ORIG is the bitmap of bytes stored by STMT.  LIVE is the
bitmap of stores that are actually live.
@@ -619,7 +712,7 @@ dse_dom_walker::dse_optimize_stmt 

Re: [RFA][PATCH 3/4] Trim mem* calls in DSE

2017-01-12 Thread Jeff Law

On 01/04/2017 07:04 AM, Richard Biener wrote:




* tree-ssa-dse.c (need_ssa_update): New file scoped boolean.
(decrement_count): New function.
(increment_start_addr, trim_memstar_call): Likewise.
(trim_partially_dead_store): Call trim_memstar_call.
(pass_dse::execute): Initialize need_ssa_update.  If set, then
return TODO_ssa_update.

* gcc.dg/tree-ssa/ssa-dse-25.c: New test.

diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 1482c7f..b21b9b5 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -79,6 +80,10 @@ static bitmap need_eh_cleanup;
It is always safe to return FALSE.  But typically better optimziation
can be achieved by analyzing more statements.  */

+/* If trimming stores requires insertion of new statements, then we
+   will need an SSA update.  */
+static bool need_ssa_update;
+


huh?  You set this to true after inserting a POINTER_PLUS_EXPR, I don't see
how you need an SSA update for this.

It doesn't seem needed anymore.  I'm ripping that out.


+/* STMT is a memcpy, memmove or memset.  Decrement the number of bytes
+   copied/set by DECREMENT.  */
+static void
+decrement_count (gimple *stmt, int decrement)
+{
+  tree *countp = gimple_call_arg_ptr (stmt, 2);
+  gcc_assert (TREE_CODE (*countp) == INTEGER_CST);
+  tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp,
+   build_int_cst (TREE_TYPE (*countp), decrement));
+  *countp = x;


thanks to wide-int the following should work

   *countp = wide_int_to_tree (TREE_TYPE (*countp), *countp - decrement);
*countp is still a tree, but we know its an INTEGER_CST, so we can 
extract its value trivially.



+}
+
+static void
+increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int
increment)
+{
+  /* If the address wasn't initially a MEM_REF, make it a MEM_REF.  */
+  if (TREE_CODE (*where) == ADDR_EXPR
+  && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF)
+{
+  tree t = TREE_OPERAND (*where, 0);
+  t = build_ref_for_offset (EXPR_LOCATION (t), t,
+   increment * BITS_PER_UNIT, false,
+   ptr_type_node, NULL, false);


please don't use build_ref_for_offset for this.  Simply only handle the SSA_NAME
case here and below ...
Done.  I'm pretty sure calling into build_ref_for_offset was to handle 
 kinds of cases and as you note, we can just re-fold the thing 
as a MEM_REF which simplifies everything a little.


Jeff


Re: [RFA][PATCH 3/4] Trim mem* calls in DSE

2017-01-11 Thread Jeff Law

On 01/04/2017 07:04 AM, Richard Biener wrote:


Didn't see a re-post of this one so reviewing the old.
Didn't figure mem* trimming was suitable for gcc-7 as I couldn't justify 
it as a bugfix, so I didn't ping it.


I don't think it changed materially.  All your comments are still 
applicable to the version in my tree.






* tree-ssa-dse.c (need_ssa_update): New file scoped boolean.
(decrement_count): New function.
(increment_start_addr, trim_memstar_call): Likewise.
(trim_partially_dead_store): Call trim_memstar_call.
(pass_dse::execute): Initialize need_ssa_update.  If set, then
return TODO_ssa_update.

* gcc.dg/tree-ssa/ssa-dse-25.c: New test.

diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 1482c7f..b21b9b5 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -79,6 +80,10 @@ static bitmap need_eh_cleanup;
It is always safe to return FALSE.  But typically better optimziation
can be achieved by analyzing more statements.  */

+/* If trimming stores requires insertion of new statements, then we
+   will need an SSA update.  */
+static bool need_ssa_update;
+


huh?  You set this to true after inserting a POINTER_PLUS_EXPR, I don't see
how you need an SSA update for this.
I'll go back and re-investigate.  I could easily have goof'd the 
in-place update and be papering over that with the ssa update.







 static bool
 initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write)
 {
@@ -309,6 +314,113 @@ trim_constructor_store (bitmap orig, bitmap live,
gimple *stmt)
 }
 }

+/* STMT is a memcpy, memmove or memset.  Decrement the number of bytes
+   copied/set by DECREMENT.  */
+static void
+decrement_count (gimple *stmt, int decrement)
+{
+  tree *countp = gimple_call_arg_ptr (stmt, 2);
+  gcc_assert (TREE_CODE (*countp) == INTEGER_CST);
+  tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp,
+   build_int_cst (TREE_TYPE (*countp), decrement));
+  *countp = x;


thanks to wide-int the following should work

   *countp = wide_int_to_tree (TREE_TYPE (*countp), *countp - decrement);

Sweet.  I like that much better.



(if not please use int_const_binop rather than fold_build2 here and
below as well)


+}
+
+static void
+increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int
increment)
+{
+  /* If the address wasn't initially a MEM_REF, make it a MEM_REF.  */
+  if (TREE_CODE (*where) == ADDR_EXPR
+  && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF)
+{
+  tree t = TREE_OPERAND (*where, 0);
+  t = build_ref_for_offset (EXPR_LOCATION (t), t,
+   increment * BITS_PER_UNIT, false,
+   ptr_type_node, NULL, false);


please don't use build_ref_for_offset for this.  Simply only handle the SSA_NAME
case here and below ...
I think build_ref_for_offset was what spurred the tree-sra.h inclusion. 
IIRC I was seeing a goodly number of cases where the argument wasn't a 
MEM_REF or SSA_NAME at this point.  But I'll double-check.


If we don't need build_ref_for_offset, do you still want me to pull its 
prototype into the new tree-sra.h, or just leave it as-is?







+  *where = build_fold_addr_expr (t);
+  return;
+}
+  else if (TREE_CODE (*where) == SSA_NAME)
+{
+  tree tem = make_ssa_name (TREE_TYPE (*where));
+  gassign *newop
+= gimple_build_assign (tem, POINTER_PLUS_EXPR, *where,
+  build_int_cst (sizetype, increment));
+  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+  gsi_insert_before (, newop, GSI_SAME_STMT);
+  need_ssa_update = true;
+  *where = tem;
+  update_stmt (gsi_stmt (gsi));
+  return;
+}
+
+  /* We can just adjust the offset in the MEM_REF expression.  */
+  tree x1 = TREE_OPERAND (TREE_OPERAND (*where, 0), 1);
+  tree x = fold_build2 (PLUS_EXPR, TREE_TYPE (x1), x1,
+   build_int_cst (TREE_TYPE (x1), increment));
+  TREE_OPERAND (TREE_OPERAND (*where, 0), 1) = x;

...

re-fold the thing as MEM_REF which will do all the magic for you:

  *where = build_fold_addr_expr (fold_build2 (MEM_REF, char_type_node,
*where, build_int_cst (ptr_type_node, increment)));

that handles [] and  just fine and avoids adding magic here.
And that () is likely what I was looking to handle with the 
first if clause above where I called build_ref_for_offset.




Otherwise looks ok.  I think I'd like to see this in GCC 7 given it's
so much similar to the constructor pruning.
OK.  I'll sort through the issues noted above and get this one reposted 
as well.


jeff



Re: [RFA][PATCH 3/4] Trim mem* calls in DSE

2017-01-04 Thread Richard Biener
On Fri, Dec 16, 2016 at 2:54 AM, Jeff Law  wrote:
> This is the 3rd patch in the kit to improve our DSE implementation.
>
> This patch supports trimming of the head or tail of a memset, memcpy or
> memmove call.  It's conceptually similar to trimming CONSTRUCTORS (and was
> in fact developed first).
>
> Try as I might, I couldn't find a BZ in our database that would be resolved
> by this patch.  There's BZs in the LLVM database in this space, but I didn't
> actually test those.  With that in mind, I don't think I can strictly call
> this a bugfix.  It does represent closing a deficiency when compared to
> LLVM.So while I'd like to see it go onto the trunk, I won't lose sleep
> if we defer to gcc-8.
>
> Note this patch relies on the alignment tweak I mentioned in the discussion
> of patch #2 to avoid creating code that the strlen folding optimization
> can't optimize.  The code is still correct/valid, it's just in a form that
> the strlen folders don't grok.
>
> This includes a trivial test that I used for development purposes.  It hits
> fairly often building GCC itself.  If we wanted more coverage i the
> testsuite, I could extract some tests from GCC and reduce them.
>
> This patch has (of course) been bootstrapped and regression tested on
> x86_64-linux-gnu.  OK for the trunk or defer to gcc-8?

Didn't see a re-post of this one so reviewing the old.

>
> * tree-ssa-dse.c (need_ssa_update): New file scoped boolean.
> (decrement_count): New function.
> (increment_start_addr, trim_memstar_call): Likewise.
> (trim_partially_dead_store): Call trim_memstar_call.
> (pass_dse::execute): Initialize need_ssa_update.  If set, then
> return TODO_ssa_update.
>
> * gcc.dg/tree-ssa/ssa-dse-25.c: New test.
>
> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
> index 1482c7f..b21b9b5 100644
> --- a/gcc/tree-ssa-dse.c
> +++ b/gcc/tree-ssa-dse.c
> @@ -79,6 +80,10 @@ static bitmap need_eh_cleanup;
> It is always safe to return FALSE.  But typically better optimziation
> can be achieved by analyzing more statements.  */
>
> +/* If trimming stores requires insertion of new statements, then we
> +   will need an SSA update.  */
> +static bool need_ssa_update;
> +

huh?  You set this to true after inserting a POINTER_PLUS_EXPR, I don't see
how you need an SSA update for this.

>  static bool
>  initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write)
>  {
> @@ -309,6 +314,113 @@ trim_constructor_store (bitmap orig, bitmap live,
> gimple *stmt)
>  }
>  }
>
> +/* STMT is a memcpy, memmove or memset.  Decrement the number of bytes
> +   copied/set by DECREMENT.  */
> +static void
> +decrement_count (gimple *stmt, int decrement)
> +{
> +  tree *countp = gimple_call_arg_ptr (stmt, 2);
> +  gcc_assert (TREE_CODE (*countp) == INTEGER_CST);
> +  tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp,
> +   build_int_cst (TREE_TYPE (*countp), decrement));
> +  *countp = x;

thanks to wide-int the following should work

   *countp = wide_int_to_tree (TREE_TYPE (*countp), *countp - decrement);

(if not please use int_const_binop rather than fold_build2 here and
below as well)

> +}
> +
> +static void
> +increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int
> increment)
> +{
> +  /* If the address wasn't initially a MEM_REF, make it a MEM_REF.  */
> +  if (TREE_CODE (*where) == ADDR_EXPR
> +  && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF)
> +{
> +  tree t = TREE_OPERAND (*where, 0);
> +  t = build_ref_for_offset (EXPR_LOCATION (t), t,
> +   increment * BITS_PER_UNIT, false,
> +   ptr_type_node, NULL, false);

please don't use build_ref_for_offset for this.  Simply only handle the SSA_NAME
case here and below ...

> +  *where = build_fold_addr_expr (t);
> +  return;
> +}
> +  else if (TREE_CODE (*where) == SSA_NAME)
> +{
> +  tree tem = make_ssa_name (TREE_TYPE (*where));
> +  gassign *newop
> += gimple_build_assign (tem, POINTER_PLUS_EXPR, *where,
> +  build_int_cst (sizetype, increment));
> +  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
> +  gsi_insert_before (, newop, GSI_SAME_STMT);
> +  need_ssa_update = true;
> +  *where = tem;
> +  update_stmt (gsi_stmt (gsi));
> +  return;
> +}
> +
> +  /* We can just adjust the offset in the MEM_REF expression.  */
> +  tree x1 = TREE_OPERAND (TREE_OPERAND (*where, 0), 1);
> +  tree x = fold_build2 (PLUS_EXPR, TREE_TYPE (x1), x1,
> +   build_int_cst (TREE_TYPE (x1), increment));
> +  TREE_OPERAND (TREE_OPERAND (*where, 0), 1) = x;
...

re-fold the thing as MEM_REF which will do all the magic for you:

  *where = build_fold_addr_expr (fold_build2 (MEM_REF, char_type_node,
*where, build_int_cst (ptr_type_node, increment)));

that handles [] and  just fine and avoids adding magic 

[RFA][PATCH 3/4] Trim mem* calls in DSE

2016-12-15 Thread Jeff Law

This is the 3rd patch in the kit to improve our DSE implementation.

This patch supports trimming of the head or tail of a memset, memcpy or 
memmove call.  It's conceptually similar to trimming CONSTRUCTORS (and 
was in fact developed first).


Try as I might, I couldn't find a BZ in our database that would be 
resolved by this patch.  There's BZs in the LLVM database in this space, 
but I didn't actually test those.  With that in mind, I don't think I 
can strictly call this a bugfix.  It does represent closing a deficiency 
when compared to LLVM.So while I'd like to see it go onto the trunk, 
I won't lose sleep if we defer to gcc-8.


Note this patch relies on the alignment tweak I mentioned in the 
discussion of patch #2 to avoid creating code that the strlen folding 
optimization can't optimize.  The code is still correct/valid, it's just 
in a form that the strlen folders don't grok.


This includes a trivial test that I used for development purposes.  It 
hits fairly often building GCC itself.  If we wanted more coverage i the 
testsuite, I could extract some tests from GCC and reduce them.


This patch has (of course) been bootstrapped and regression tested on 
x86_64-linux-gnu.  OK for the trunk or defer to gcc-8?


* tree-ssa-dse.c (need_ssa_update): New file scoped boolean.
(decrement_count): New function.
(increment_start_addr, trim_memstar_call): Likewise.
(trim_partially_dead_store): Call trim_memstar_call.
(pass_dse::execute): Initialize need_ssa_update.  If set, then
return TODO_ssa_update.

* gcc.dg/tree-ssa/ssa-dse-25.c: New test.
 
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 1482c7f..b21b9b5 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -79,6 +80,10 @@ static bitmap need_eh_cleanup;
It is always safe to return FALSE.  But typically better optimziation
can be achieved by analyzing more statements.  */
 
+/* If trimming stores requires insertion of new statements, then we
+   will need an SSA update.  */
+static bool need_ssa_update;
+
 static bool
 initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write)
 {
@@ -309,6 +314,113 @@ trim_constructor_store (bitmap orig, bitmap live, gimple 
*stmt)
 }
 }
 
+/* STMT is a memcpy, memmove or memset.  Decrement the number of bytes
+   copied/set by DECREMENT.  */
+static void
+decrement_count (gimple *stmt, int decrement)
+{
+  tree *countp = gimple_call_arg_ptr (stmt, 2);
+  gcc_assert (TREE_CODE (*countp) == INTEGER_CST);
+  tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp,
+   build_int_cst (TREE_TYPE (*countp), decrement));
+  *countp = x;
+}
+
+static void
+increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int 
increment)
+{
+  /* If the address wasn't initially a MEM_REF, make it a MEM_REF.  */
+  if (TREE_CODE (*where) == ADDR_EXPR
+  && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF)
+{
+  tree t = TREE_OPERAND (*where, 0);
+  t = build_ref_for_offset (EXPR_LOCATION (t), t,
+   increment * BITS_PER_UNIT, false,
+   ptr_type_node, NULL, false);
+  *where = build_fold_addr_expr (t);
+  return;
+}
+  else if (TREE_CODE (*where) == SSA_NAME)
+{
+  tree tem = make_ssa_name (TREE_TYPE (*where));
+  gassign *newop
+= gimple_build_assign (tem, POINTER_PLUS_EXPR, *where,
+  build_int_cst (sizetype, increment));
+  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+  gsi_insert_before (, newop, GSI_SAME_STMT);
+  need_ssa_update = true;
+  *where = tem;
+  update_stmt (gsi_stmt (gsi));
+  return;
+}
+
+  /* We can just adjust the offset in the MEM_REF expression.  */
+  tree x1 = TREE_OPERAND (TREE_OPERAND (*where, 0), 1);
+  tree x = fold_build2 (PLUS_EXPR, TREE_TYPE (x1), x1,
+   build_int_cst (TREE_TYPE (x1), increment));
+  TREE_OPERAND (TREE_OPERAND (*where, 0), 1) = x;
+
+}
+
+/* STMT is builtin call that writes bytes in bitmap ORIG, some bytes are dead
+   (ORIG & ~NEW) and need not be stored.  Try to rewrite STMT to reduce
+   the amount of data it actually writes.
+
+   Right now we only support trimming from the head or the tail of the
+   memory region.  In theory we could split the mem* call, but it's
+   likely of marginal value.  */
+
+static void
+trim_memstar_call (bitmap orig, bitmap live, gimple *stmt)
+{
+  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
+{
+case BUILT_IN_MEMCPY:
+case BUILT_IN_MEMMOVE:
+  {
+   int head_trim, tail_trim;
+   compute_trims (orig, live, _trim, _trim);
+
+   /* Tail trimming is easy, we can just reduce the count.  */
+if (tail_trim)
+ decrement_count (stmt, tail_trim);
+
+   /* Head trimming requires adjusting all the arguments.  */
+if (head_trim)
+  {
+   tree *dst = gimple_call_arg_ptr