Re: [RFA][PATCH 3/4] Trim mem* calls in DSE
Richi indicated he wants this to be included in gcc-7. So I've updated the mem* trimming patch with his comments. The unnecessary SSA updates are gone. Use wide_int_to_tree rather than building/folding expressions we know will result in constants. Simplify incrementing the start address by re-folding the MEM_REF. Again, these were almost verbatim drop-ins, so I'm going to assume the "looks ok" to be approval once prereqs are approved. This has been bootstrapped and regression tested on top of patches 1a, 1b, 2 and with patch #4 as well (patch #4 is not on the table for gcc-7 and isn't being reposted at this point). Jeff * tree-ssa-dse.c (decrement_count): New function. (increment_start_addr, maybe_trim_memstar_call): Likewise. (dse_dom_walker::optimize_stmt): Call maybe_trim_memstar_call directly when we know the partially dead statement is a mem* function. * gcc.dg/tree-ssa/ssa-dse-25.c: New test. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c new file mode 100644 index 000..8b7db3a --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-dse1-details -w" } */ + +char z[32]; + + +int +foo(void) +{ + memset (z, 0, 16); + memset (z+8, 0, 24); +} + +/* { dg-final { scan-tree-dump "memset ., 0, 8." "dse1" } } */ + + diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c index 83ce29b..20cf3b4 100644 --- a/gcc/tree-ssa-dse.c +++ b/gcc/tree-ssa-dse.c @@ -332,6 +332,99 @@ maybe_trim_constructor_store (ao_ref *ref, sbitmap live, gimple *stmt) } } +/* STMT is a memcpy, memmove or memset. Decrement the number of bytes + copied/set by DECREMENT. */ +static void +decrement_count (gimple *stmt, int decrement) +{ + tree *countp = gimple_call_arg_ptr (stmt, 2); + gcc_assert (TREE_CODE (*countp) == INTEGER_CST); + *countp = wide_int_to_tree (TREE_TYPE (*countp), (TREE_INT_CST_LOW (*countp) + - decrement)); + +} + +static void +increment_start_addr (gimple *stmt, tree *where, int increment) +{ + if (TREE_CODE (*where) == SSA_NAME) +{ + tree tem = make_ssa_name (TREE_TYPE (*where)); + gassign *newop += gimple_build_assign (tem, POINTER_PLUS_EXPR, *where, + build_int_cst (sizetype, increment)); + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gsi_insert_before (, newop, GSI_SAME_STMT); + *where = tem; + update_stmt (gsi_stmt (gsi)); + return; +} + + *where = build_fold_addr_expr (fold_build2 (MEM_REF, char_type_node, + *where, + build_int_cst (ptr_type_node, +increment))); +} + +/* STMT is builtin call that writes bytes in bitmap ORIG, some bytes are dead + (ORIG & ~NEW) and need not be stored. Try to rewrite STMT to reduce + the amount of data it actually writes. + + Right now we only support trimming from the head or the tail of the + memory region. In theory we could split the mem* call, but it's + likely of marginal value. */ + +static void +maybe_trim_memstar_call (ao_ref *ref, sbitmap live, gimple *stmt) +{ + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) +{ +case BUILT_IN_MEMCPY: +case BUILT_IN_MEMMOVE: + { + int head_trim, tail_trim; + compute_trims (ref, live, _trim, _trim); + + /* Tail trimming is easy, we can just reduce the count. */ +if (tail_trim) + decrement_count (stmt, tail_trim); + + /* Head trimming requires adjusting all the arguments. */ +if (head_trim) + { + tree *dst = gimple_call_arg_ptr (stmt, 0); + increment_start_addr (stmt, dst, head_trim); + tree *src = gimple_call_arg_ptr (stmt, 1); + increment_start_addr (stmt, src, head_trim); + decrement_count (stmt, head_trim); + } +break; + } + +case BUILT_IN_MEMSET: + { + int head_trim, tail_trim; + compute_trims (ref, live, _trim, _trim); + + /* Tail trimming is easy, we can just reduce the count. */ +if (tail_trim) + decrement_count (stmt, tail_trim); + + /* Head trimming requires adjusting all the arguments. */ +if (head_trim) + { + tree *dst = gimple_call_arg_ptr (stmt, 0); + increment_start_addr (stmt, dst, head_trim); + decrement_count (stmt, head_trim); + } + break; + } + + default: + break; +} +} + /* STMT is a memory write where one or more bytes written are dead stores. ORIG is the bitmap of bytes stored by STMT. LIVE is the bitmap of stores that are actually live. @@ -619,7 +712,7 @@ dse_dom_walker::dse_optimize_stmt
Re: [RFA][PATCH 3/4] Trim mem* calls in DSE
On 01/04/2017 07:04 AM, Richard Biener wrote: * tree-ssa-dse.c (need_ssa_update): New file scoped boolean. (decrement_count): New function. (increment_start_addr, trim_memstar_call): Likewise. (trim_partially_dead_store): Call trim_memstar_call. (pass_dse::execute): Initialize need_ssa_update. If set, then return TODO_ssa_update. * gcc.dg/tree-ssa/ssa-dse-25.c: New test. diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c index 1482c7f..b21b9b5 100644 --- a/gcc/tree-ssa-dse.c +++ b/gcc/tree-ssa-dse.c @@ -79,6 +80,10 @@ static bitmap need_eh_cleanup; It is always safe to return FALSE. But typically better optimziation can be achieved by analyzing more statements. */ +/* If trimming stores requires insertion of new statements, then we + will need an SSA update. */ +static bool need_ssa_update; + huh? You set this to true after inserting a POINTER_PLUS_EXPR, I don't see how you need an SSA update for this. It doesn't seem needed anymore. I'm ripping that out. +/* STMT is a memcpy, memmove or memset. Decrement the number of bytes + copied/set by DECREMENT. */ +static void +decrement_count (gimple *stmt, int decrement) +{ + tree *countp = gimple_call_arg_ptr (stmt, 2); + gcc_assert (TREE_CODE (*countp) == INTEGER_CST); + tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp, + build_int_cst (TREE_TYPE (*countp), decrement)); + *countp = x; thanks to wide-int the following should work *countp = wide_int_to_tree (TREE_TYPE (*countp), *countp - decrement); *countp is still a tree, but we know its an INTEGER_CST, so we can extract its value trivially. +} + +static void +increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int increment) +{ + /* If the address wasn't initially a MEM_REF, make it a MEM_REF. */ + if (TREE_CODE (*where) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF) +{ + tree t = TREE_OPERAND (*where, 0); + t = build_ref_for_offset (EXPR_LOCATION (t), t, + increment * BITS_PER_UNIT, false, + ptr_type_node, NULL, false); please don't use build_ref_for_offset for this. Simply only handle the SSA_NAME case here and below ... Done. I'm pretty sure calling into build_ref_for_offset was to handle kinds of cases and as you note, we can just re-fold the thing as a MEM_REF which simplifies everything a little. Jeff
Re: [RFA][PATCH 3/4] Trim mem* calls in DSE
On 01/04/2017 07:04 AM, Richard Biener wrote: Didn't see a re-post of this one so reviewing the old. Didn't figure mem* trimming was suitable for gcc-7 as I couldn't justify it as a bugfix, so I didn't ping it. I don't think it changed materially. All your comments are still applicable to the version in my tree. * tree-ssa-dse.c (need_ssa_update): New file scoped boolean. (decrement_count): New function. (increment_start_addr, trim_memstar_call): Likewise. (trim_partially_dead_store): Call trim_memstar_call. (pass_dse::execute): Initialize need_ssa_update. If set, then return TODO_ssa_update. * gcc.dg/tree-ssa/ssa-dse-25.c: New test. diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c index 1482c7f..b21b9b5 100644 --- a/gcc/tree-ssa-dse.c +++ b/gcc/tree-ssa-dse.c @@ -79,6 +80,10 @@ static bitmap need_eh_cleanup; It is always safe to return FALSE. But typically better optimziation can be achieved by analyzing more statements. */ +/* If trimming stores requires insertion of new statements, then we + will need an SSA update. */ +static bool need_ssa_update; + huh? You set this to true after inserting a POINTER_PLUS_EXPR, I don't see how you need an SSA update for this. I'll go back and re-investigate. I could easily have goof'd the in-place update and be papering over that with the ssa update. static bool initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write) { @@ -309,6 +314,113 @@ trim_constructor_store (bitmap orig, bitmap live, gimple *stmt) } } +/* STMT is a memcpy, memmove or memset. Decrement the number of bytes + copied/set by DECREMENT. */ +static void +decrement_count (gimple *stmt, int decrement) +{ + tree *countp = gimple_call_arg_ptr (stmt, 2); + gcc_assert (TREE_CODE (*countp) == INTEGER_CST); + tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp, + build_int_cst (TREE_TYPE (*countp), decrement)); + *countp = x; thanks to wide-int the following should work *countp = wide_int_to_tree (TREE_TYPE (*countp), *countp - decrement); Sweet. I like that much better. (if not please use int_const_binop rather than fold_build2 here and below as well) +} + +static void +increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int increment) +{ + /* If the address wasn't initially a MEM_REF, make it a MEM_REF. */ + if (TREE_CODE (*where) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF) +{ + tree t = TREE_OPERAND (*where, 0); + t = build_ref_for_offset (EXPR_LOCATION (t), t, + increment * BITS_PER_UNIT, false, + ptr_type_node, NULL, false); please don't use build_ref_for_offset for this. Simply only handle the SSA_NAME case here and below ... I think build_ref_for_offset was what spurred the tree-sra.h inclusion. IIRC I was seeing a goodly number of cases where the argument wasn't a MEM_REF or SSA_NAME at this point. But I'll double-check. If we don't need build_ref_for_offset, do you still want me to pull its prototype into the new tree-sra.h, or just leave it as-is? + *where = build_fold_addr_expr (t); + return; +} + else if (TREE_CODE (*where) == SSA_NAME) +{ + tree tem = make_ssa_name (TREE_TYPE (*where)); + gassign *newop += gimple_build_assign (tem, POINTER_PLUS_EXPR, *where, + build_int_cst (sizetype, increment)); + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gsi_insert_before (, newop, GSI_SAME_STMT); + need_ssa_update = true; + *where = tem; + update_stmt (gsi_stmt (gsi)); + return; +} + + /* We can just adjust the offset in the MEM_REF expression. */ + tree x1 = TREE_OPERAND (TREE_OPERAND (*where, 0), 1); + tree x = fold_build2 (PLUS_EXPR, TREE_TYPE (x1), x1, + build_int_cst (TREE_TYPE (x1), increment)); + TREE_OPERAND (TREE_OPERAND (*where, 0), 1) = x; ... re-fold the thing as MEM_REF which will do all the magic for you: *where = build_fold_addr_expr (fold_build2 (MEM_REF, char_type_node, *where, build_int_cst (ptr_type_node, increment))); that handles [] and just fine and avoids adding magic here. And that () is likely what I was looking to handle with the first if clause above where I called build_ref_for_offset. Otherwise looks ok. I think I'd like to see this in GCC 7 given it's so much similar to the constructor pruning. OK. I'll sort through the issues noted above and get this one reposted as well. jeff
Re: [RFA][PATCH 3/4] Trim mem* calls in DSE
On Fri, Dec 16, 2016 at 2:54 AM, Jeff Lawwrote: > This is the 3rd patch in the kit to improve our DSE implementation. > > This patch supports trimming of the head or tail of a memset, memcpy or > memmove call. It's conceptually similar to trimming CONSTRUCTORS (and was > in fact developed first). > > Try as I might, I couldn't find a BZ in our database that would be resolved > by this patch. There's BZs in the LLVM database in this space, but I didn't > actually test those. With that in mind, I don't think I can strictly call > this a bugfix. It does represent closing a deficiency when compared to > LLVM.So while I'd like to see it go onto the trunk, I won't lose sleep > if we defer to gcc-8. > > Note this patch relies on the alignment tweak I mentioned in the discussion > of patch #2 to avoid creating code that the strlen folding optimization > can't optimize. The code is still correct/valid, it's just in a form that > the strlen folders don't grok. > > This includes a trivial test that I used for development purposes. It hits > fairly often building GCC itself. If we wanted more coverage i the > testsuite, I could extract some tests from GCC and reduce them. > > This patch has (of course) been bootstrapped and regression tested on > x86_64-linux-gnu. OK for the trunk or defer to gcc-8? Didn't see a re-post of this one so reviewing the old. > > * tree-ssa-dse.c (need_ssa_update): New file scoped boolean. > (decrement_count): New function. > (increment_start_addr, trim_memstar_call): Likewise. > (trim_partially_dead_store): Call trim_memstar_call. > (pass_dse::execute): Initialize need_ssa_update. If set, then > return TODO_ssa_update. > > * gcc.dg/tree-ssa/ssa-dse-25.c: New test. > > diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c > index 1482c7f..b21b9b5 100644 > --- a/gcc/tree-ssa-dse.c > +++ b/gcc/tree-ssa-dse.c > @@ -79,6 +80,10 @@ static bitmap need_eh_cleanup; > It is always safe to return FALSE. But typically better optimziation > can be achieved by analyzing more statements. */ > > +/* If trimming stores requires insertion of new statements, then we > + will need an SSA update. */ > +static bool need_ssa_update; > + huh? You set this to true after inserting a POINTER_PLUS_EXPR, I don't see how you need an SSA update for this. > static bool > initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write) > { > @@ -309,6 +314,113 @@ trim_constructor_store (bitmap orig, bitmap live, > gimple *stmt) > } > } > > +/* STMT is a memcpy, memmove or memset. Decrement the number of bytes > + copied/set by DECREMENT. */ > +static void > +decrement_count (gimple *stmt, int decrement) > +{ > + tree *countp = gimple_call_arg_ptr (stmt, 2); > + gcc_assert (TREE_CODE (*countp) == INTEGER_CST); > + tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp, > + build_int_cst (TREE_TYPE (*countp), decrement)); > + *countp = x; thanks to wide-int the following should work *countp = wide_int_to_tree (TREE_TYPE (*countp), *countp - decrement); (if not please use int_const_binop rather than fold_build2 here and below as well) > +} > + > +static void > +increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int > increment) > +{ > + /* If the address wasn't initially a MEM_REF, make it a MEM_REF. */ > + if (TREE_CODE (*where) == ADDR_EXPR > + && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF) > +{ > + tree t = TREE_OPERAND (*where, 0); > + t = build_ref_for_offset (EXPR_LOCATION (t), t, > + increment * BITS_PER_UNIT, false, > + ptr_type_node, NULL, false); please don't use build_ref_for_offset for this. Simply only handle the SSA_NAME case here and below ... > + *where = build_fold_addr_expr (t); > + return; > +} > + else if (TREE_CODE (*where) == SSA_NAME) > +{ > + tree tem = make_ssa_name (TREE_TYPE (*where)); > + gassign *newop > += gimple_build_assign (tem, POINTER_PLUS_EXPR, *where, > + build_int_cst (sizetype, increment)); > + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); > + gsi_insert_before (, newop, GSI_SAME_STMT); > + need_ssa_update = true; > + *where = tem; > + update_stmt (gsi_stmt (gsi)); > + return; > +} > + > + /* We can just adjust the offset in the MEM_REF expression. */ > + tree x1 = TREE_OPERAND (TREE_OPERAND (*where, 0), 1); > + tree x = fold_build2 (PLUS_EXPR, TREE_TYPE (x1), x1, > + build_int_cst (TREE_TYPE (x1), increment)); > + TREE_OPERAND (TREE_OPERAND (*where, 0), 1) = x; ... re-fold the thing as MEM_REF which will do all the magic for you: *where = build_fold_addr_expr (fold_build2 (MEM_REF, char_type_node, *where, build_int_cst (ptr_type_node, increment))); that handles [] and just fine and avoids adding magic
[RFA][PATCH 3/4] Trim mem* calls in DSE
This is the 3rd patch in the kit to improve our DSE implementation. This patch supports trimming of the head or tail of a memset, memcpy or memmove call. It's conceptually similar to trimming CONSTRUCTORS (and was in fact developed first). Try as I might, I couldn't find a BZ in our database that would be resolved by this patch. There's BZs in the LLVM database in this space, but I didn't actually test those. With that in mind, I don't think I can strictly call this a bugfix. It does represent closing a deficiency when compared to LLVM.So while I'd like to see it go onto the trunk, I won't lose sleep if we defer to gcc-8. Note this patch relies on the alignment tweak I mentioned in the discussion of patch #2 to avoid creating code that the strlen folding optimization can't optimize. The code is still correct/valid, it's just in a form that the strlen folders don't grok. This includes a trivial test that I used for development purposes. It hits fairly often building GCC itself. If we wanted more coverage i the testsuite, I could extract some tests from GCC and reduce them. This patch has (of course) been bootstrapped and regression tested on x86_64-linux-gnu. OK for the trunk or defer to gcc-8? * tree-ssa-dse.c (need_ssa_update): New file scoped boolean. (decrement_count): New function. (increment_start_addr, trim_memstar_call): Likewise. (trim_partially_dead_store): Call trim_memstar_call. (pass_dse::execute): Initialize need_ssa_update. If set, then return TODO_ssa_update. * gcc.dg/tree-ssa/ssa-dse-25.c: New test. diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c index 1482c7f..b21b9b5 100644 --- a/gcc/tree-ssa-dse.c +++ b/gcc/tree-ssa-dse.c @@ -79,6 +80,10 @@ static bitmap need_eh_cleanup; It is always safe to return FALSE. But typically better optimziation can be achieved by analyzing more statements. */ +/* If trimming stores requires insertion of new statements, then we + will need an SSA update. */ +static bool need_ssa_update; + static bool initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write) { @@ -309,6 +314,113 @@ trim_constructor_store (bitmap orig, bitmap live, gimple *stmt) } } +/* STMT is a memcpy, memmove or memset. Decrement the number of bytes + copied/set by DECREMENT. */ +static void +decrement_count (gimple *stmt, int decrement) +{ + tree *countp = gimple_call_arg_ptr (stmt, 2); + gcc_assert (TREE_CODE (*countp) == INTEGER_CST); + tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp, + build_int_cst (TREE_TYPE (*countp), decrement)); + *countp = x; +} + +static void +increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int increment) +{ + /* If the address wasn't initially a MEM_REF, make it a MEM_REF. */ + if (TREE_CODE (*where) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF) +{ + tree t = TREE_OPERAND (*where, 0); + t = build_ref_for_offset (EXPR_LOCATION (t), t, + increment * BITS_PER_UNIT, false, + ptr_type_node, NULL, false); + *where = build_fold_addr_expr (t); + return; +} + else if (TREE_CODE (*where) == SSA_NAME) +{ + tree tem = make_ssa_name (TREE_TYPE (*where)); + gassign *newop += gimple_build_assign (tem, POINTER_PLUS_EXPR, *where, + build_int_cst (sizetype, increment)); + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gsi_insert_before (, newop, GSI_SAME_STMT); + need_ssa_update = true; + *where = tem; + update_stmt (gsi_stmt (gsi)); + return; +} + + /* We can just adjust the offset in the MEM_REF expression. */ + tree x1 = TREE_OPERAND (TREE_OPERAND (*where, 0), 1); + tree x = fold_build2 (PLUS_EXPR, TREE_TYPE (x1), x1, + build_int_cst (TREE_TYPE (x1), increment)); + TREE_OPERAND (TREE_OPERAND (*where, 0), 1) = x; + +} + +/* STMT is builtin call that writes bytes in bitmap ORIG, some bytes are dead + (ORIG & ~NEW) and need not be stored. Try to rewrite STMT to reduce + the amount of data it actually writes. + + Right now we only support trimming from the head or the tail of the + memory region. In theory we could split the mem* call, but it's + likely of marginal value. */ + +static void +trim_memstar_call (bitmap orig, bitmap live, gimple *stmt) +{ + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) +{ +case BUILT_IN_MEMCPY: +case BUILT_IN_MEMMOVE: + { + int head_trim, tail_trim; + compute_trims (orig, live, _trim, _trim); + + /* Tail trimming is easy, we can just reduce the count. */ +if (tail_trim) + decrement_count (stmt, tail_trim); + + /* Head trimming requires adjusting all the arguments. */ +if (head_trim) + { + tree *dst = gimple_call_arg_ptr