On Fri, Dec 16, 2016 at 2:54 AM, Jeff Law <l...@redhat.com> wrote: > This is the 3rd patch in the kit to improve our DSE implementation. > > This patch supports trimming of the head or tail of a memset, memcpy or > memmove call. It's conceptually similar to trimming CONSTRUCTORS (and was > in fact developed first). > > Try as I might, I couldn't find a BZ in our database that would be resolved > by this patch. There's BZs in the LLVM database in this space, but I didn't > actually test those. With that in mind, I don't think I can strictly call > this a bugfix. It does represent closing a deficiency when compared to > LLVM. So while I'd like to see it go onto the trunk, I won't lose sleep > if we defer to gcc-8. > > Note this patch relies on the alignment tweak I mentioned in the discussion > of patch #2 to avoid creating code that the strlen folding optimization > can't optimize. The code is still correct/valid, it's just in a form that > the strlen folders don't grok. > > This includes a trivial test that I used for development purposes. It hits > fairly often building GCC itself. If we wanted more coverage i the > testsuite, I could extract some tests from GCC and reduce them. > > This patch has (of course) been bootstrapped and regression tested on > x86_64-linux-gnu. OK for the trunk or defer to gcc-8?
Didn't see a re-post of this one so reviewing the old. > > * tree-ssa-dse.c (need_ssa_update): New file scoped boolean. > (decrement_count): New function. > (increment_start_addr, trim_memstar_call): Likewise. > (trim_partially_dead_store): Call trim_memstar_call. > (pass_dse::execute): Initialize need_ssa_update. If set, then > return TODO_ssa_update. > > * gcc.dg/tree-ssa/ssa-dse-25.c: New test. > > diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c > index 1482c7f..b21b9b5 100644 > --- a/gcc/tree-ssa-dse.c > +++ b/gcc/tree-ssa-dse.c > @@ -79,6 +80,10 @@ static bitmap need_eh_cleanup; > It is always safe to return FALSE. But typically better optimziation > can be achieved by analyzing more statements. */ > > +/* If trimming stores requires insertion of new statements, then we > + will need an SSA update. */ > +static bool need_ssa_update; > + huh? You set this to true after inserting a POINTER_PLUS_EXPR, I don't see how you need an SSA update for this. > static bool > initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write) > { > @@ -309,6 +314,113 @@ trim_constructor_store (bitmap orig, bitmap live, > gimple *stmt) > } > } > > +/* STMT is a memcpy, memmove or memset. Decrement the number of bytes > + copied/set by DECREMENT. */ > +static void > +decrement_count (gimple *stmt, int decrement) > +{ > + tree *countp = gimple_call_arg_ptr (stmt, 2); > + gcc_assert (TREE_CODE (*countp) == INTEGER_CST); > + tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp, > + build_int_cst (TREE_TYPE (*countp), decrement)); > + *countp = x; thanks to wide-int the following should work *countp = wide_int_to_tree (TREE_TYPE (*countp), *countp - decrement); (if not please use int_const_binop rather than fold_build2 here and below as well) > +} > + > +static void > +increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int > increment) > +{ > + /* If the address wasn't initially a MEM_REF, make it a MEM_REF. */ > + if (TREE_CODE (*where) == ADDR_EXPR > + && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF) > + { > + tree t = TREE_OPERAND (*where, 0); > + t = build_ref_for_offset (EXPR_LOCATION (t), t, > + increment * BITS_PER_UNIT, false, > + ptr_type_node, NULL, false); please don't use build_ref_for_offset for this. Simply only handle the SSA_NAME case here and below ... > + *where = build_fold_addr_expr (t); > + return; > + } > + else if (TREE_CODE (*where) == SSA_NAME) > + { > + tree tem = make_ssa_name (TREE_TYPE (*where)); > + gassign *newop > + = gimple_build_assign (tem, POINTER_PLUS_EXPR, *where, > + build_int_cst (sizetype, increment)); > + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); > + gsi_insert_before (&gsi, newop, GSI_SAME_STMT); > + need_ssa_update = true; > + *where = tem; > + update_stmt (gsi_stmt (gsi)); > + return; > + } > + > + /* We can just adjust the offset in the MEM_REF expression. */ > + tree x1 = TREE_OPERAND (TREE_OPERAND (*where, 0), 1); > + tree x = fold_build2 (PLUS_EXPR, TREE_TYPE (x1), x1, > + build_int_cst (TREE_TYPE (x1), increment)); > + TREE_OPERAND (TREE_OPERAND (*where, 0), 1) = x; ... re-fold the thing as MEM_REF which will do all the magic for you: *where = build_fold_addr_expr (fold_build2 (MEM_REF, char_type_node, *where, build_int_cst (ptr_type_node, increment))); that handles &MEM[] and &foo.bar just fine and avoids adding magic here. Otherwise looks ok. I think I'd like to see this in GCC 7 given it's so much similar to the constructor pruning. Richard. > + > +} > + > +/* STMT is builtin call that writes bytes in bitmap ORIG, some bytes are > dead > + (ORIG & ~NEW) and need not be stored. Try to rewrite STMT to reduce > + the amount of data it actually writes. > + > + Right now we only support trimming from the head or the tail of the > + memory region. In theory we could split the mem* call, but it's > + likely of marginal value. */ > + > +static void > +trim_memstar_call (bitmap orig, bitmap live, gimple *stmt) > +{ > + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) > + { > + case BUILT_IN_MEMCPY: > + case BUILT_IN_MEMMOVE: > + { > + int head_trim, tail_trim; > + compute_trims (orig, live, &head_trim, &tail_trim); > + > + /* Tail trimming is easy, we can just reduce the count. */ > + if (tail_trim) > + decrement_count (stmt, tail_trim); > + > + /* Head trimming requires adjusting all the arguments. */ > + if (head_trim) > + { > + tree *dst = gimple_call_arg_ptr (stmt, 0); > + increment_start_addr (stmt, dst, head_trim); > + tree *src = gimple_call_arg_ptr (stmt, 1); > + increment_start_addr (stmt, src, head_trim); > + decrement_count (stmt, head_trim); > + } > + break; > + } > + > + case BUILT_IN_MEMSET: > + { > + int head_trim, tail_trim; > + compute_trims (orig, live, &head_trim, &tail_trim); > + > + /* Tail trimming is easy, we can just reduce the count. */ > + if (tail_trim) > + decrement_count (stmt, tail_trim); > + > + /* Head trimming requires adjusting all the arguments. */ > + if (head_trim) > + { > + tree *dst = gimple_call_arg_ptr (stmt, 0); > + increment_start_addr (stmt, dst, head_trim); > + decrement_count (stmt, head_trim); > + } > + break; > + } > + > + default: > + break; > + } > +} > + > /* STMT is a memory write where one or more bytes written are dead > stores. ORIG is the bitmap of bytes stored by STMT. LIVE is the > bitmap of stores that are actually live. > @@ -321,7 +433,9 @@ trim_constructor_store (bitmap orig, bitmap live, gimple > *stmt) > static void > trim_partially_dead_store (bitmap orig, bitmap live, gimple *stmt) > { > - if (is_gimple_assign (stmt)) > + if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) > + trim_memstar_call (orig, live, stmt); > + else if (is_gimple_assign (stmt)) > { > switch (gimple_assign_rhs_code (stmt)) > { > @@ -712,6 +826,7 @@ unsigned int > pass_dse::execute (function *fun) > { > need_eh_cleanup = BITMAP_ALLOC (NULL); > + need_ssa_update = false; > > renumber_gimple_stmt_uids (); > > @@ -738,7 +853,7 @@ pass_dse::execute (function *fun) > > /* For now, just wipe the post-dominator information. */ > free_dominance_info (CDI_POST_DOMINATORS); > - return 0; > + return (need_ssa_update ? TODO_update_ssa : 0); > } > > } // anon namespace > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c > new file mode 100644 > index 0000000..8b7db3a > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-dse1-details -w" } */ > + > +char z[32]; > + > + > +int > +foo(void) > +{ > + memset (z, 0, 16); > + memset (z+8, 0, 24); > +} > + > +/* { dg-final { scan-tree-dump "memset .&z, 0, 8." "dse1" } } */ > + > + >