This is the 3rd patch in the kit to improve our DSE implementation.
This patch supports trimming of the head or tail of a memset, memcpy or
memmove call. It's conceptually similar to trimming CONSTRUCTORS (and
was in fact developed first).
Try as I might, I couldn't find a BZ in our database that would be
resolved by this patch. There's BZs in the LLVM database in this space,
but I didn't actually test those. With that in mind, I don't think I
can strictly call this a bugfix. It does represent closing a deficiency
when compared to LLVM. So while I'd like to see it go onto the trunk,
I won't lose sleep if we defer to gcc-8.
Note this patch relies on the alignment tweak I mentioned in the
discussion of patch #2 to avoid creating code that the strlen folding
optimization can't optimize. The code is still correct/valid, it's just
in a form that the strlen folders don't grok.
This includes a trivial test that I used for development purposes. It
hits fairly often building GCC itself. If we wanted more coverage i the
testsuite, I could extract some tests from GCC and reduce them.
This patch has (of course) been bootstrapped and regression tested on
x86_64-linux-gnu. OK for the trunk or defer to gcc-8?
* tree-ssa-dse.c (need_ssa_update): New file scoped boolean.
(decrement_count): New function.
(increment_start_addr, trim_memstar_call): Likewise.
(trim_partially_dead_store): Call trim_memstar_call.
(pass_dse::execute): Initialize need_ssa_update. If set, then
return TODO_ssa_update.
* gcc.dg/tree-ssa/ssa-dse-25.c: New test.
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 1482c7f..b21b9b5 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -79,6 +80,10 @@ static bitmap need_eh_cleanup;
It is always safe to return FALSE. But typically better optimziation
can be achieved by analyzing more statements. */
+/* If trimming stores requires insertion of new statements, then we
+ will need an SSA update. */
+static bool need_ssa_update;
+
static bool
initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write)
{
@@ -309,6 +314,113 @@ trim_constructor_store (bitmap orig, bitmap live, gimple
*stmt)
}
}
+/* STMT is a memcpy, memmove or memset. Decrement the number of bytes
+ copied/set by DECREMENT. */
+static void
+decrement_count (gimple *stmt, int decrement)
+{
+ tree *countp = gimple_call_arg_ptr (stmt, 2);
+ gcc_assert (TREE_CODE (*countp) == INTEGER_CST);
+ tree x = fold_build2 (MINUS_EXPR, TREE_TYPE (*countp), *countp,
+ build_int_cst (TREE_TYPE (*countp), decrement));
+ *countp = x;
+}
+
+static void
+increment_start_addr (gimple *stmt ATTRIBUTE_UNUSED, tree *where, int
increment)
+{
+ /* If the address wasn't initially a MEM_REF, make it a MEM_REF. */
+ if (TREE_CODE (*where) == ADDR_EXPR
+ && TREE_CODE (TREE_OPERAND (*where, 0)) != MEM_REF)
+ {
+ tree t = TREE_OPERAND (*where, 0);
+ t = build_ref_for_offset (EXPR_LOCATION (t), t,
+ increment * BITS_PER_UNIT, false,
+ ptr_type_node, NULL, false);
+ *where = build_fold_addr_expr (t);
+ return;
+ }
+ else if (TREE_CODE (*where) == SSA_NAME)
+ {
+ tree tem = make_ssa_name (TREE_TYPE (*where));
+ gassign *newop
+ = gimple_build_assign (tem, POINTER_PLUS_EXPR, *where,
+ build_int_cst (sizetype, increment));
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ gsi_insert_before (&gsi, newop, GSI_SAME_STMT);
+ need_ssa_update = true;
+ *where = tem;
+ update_stmt (gsi_stmt (gsi));
+ return;
+ }
+
+ /* We can just adjust the offset in the MEM_REF expression. */
+ tree x1 = TREE_OPERAND (TREE_OPERAND (*where, 0), 1);
+ tree x = fold_build2 (PLUS_EXPR, TREE_TYPE (x1), x1,
+ build_int_cst (TREE_TYPE (x1), increment));
+ TREE_OPERAND (TREE_OPERAND (*where, 0), 1) = x;
+
+}
+
+/* STMT is builtin call that writes bytes in bitmap ORIG, some bytes are dead
+ (ORIG & ~NEW) and need not be stored. Try to rewrite STMT to reduce
+ the amount of data it actually writes.
+
+ Right now we only support trimming from the head or the tail of the
+ memory region. In theory we could split the mem* call, but it's
+ likely of marginal value. */
+
+static void
+trim_memstar_call (bitmap orig, bitmap live, gimple *stmt)
+{
+ switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
+ {
+ case BUILT_IN_MEMCPY:
+ case BUILT_IN_MEMMOVE:
+ {
+ int head_trim, tail_trim;
+ compute_trims (orig, live, &head_trim, &tail_trim);
+
+ /* Tail trimming is easy, we can just reduce the count. */
+ if (tail_trim)
+ decrement_count (stmt, tail_trim);
+
+ /* Head trimming requires adjusting all the arguments. */
+ if (head_trim)
+ {
+ tree *dst = gimple_call_arg_ptr (stmt, 0);
+ increment_start_addr (stmt, dst, head_trim);
+ tree *src = gimple_call_arg_ptr (stmt, 1);
+ increment_start_addr (stmt, src, head_trim);
+ decrement_count (stmt, head_trim);
+ }
+ break;
+ }
+
+ case BUILT_IN_MEMSET:
+ {
+ int head_trim, tail_trim;
+ compute_trims (orig, live, &head_trim, &tail_trim);
+
+ /* Tail trimming is easy, we can just reduce the count. */
+ if (tail_trim)
+ decrement_count (stmt, tail_trim);
+
+ /* Head trimming requires adjusting all the arguments. */
+ if (head_trim)
+ {
+ tree *dst = gimple_call_arg_ptr (stmt, 0);
+ increment_start_addr (stmt, dst, head_trim);
+ decrement_count (stmt, head_trim);
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+}
+
/* STMT is a memory write where one or more bytes written are dead
stores. ORIG is the bitmap of bytes stored by STMT. LIVE is the
bitmap of stores that are actually live.
@@ -321,7 +433,9 @@ trim_constructor_store (bitmap orig, bitmap live, gimple
*stmt)
static void
trim_partially_dead_store (bitmap orig, bitmap live, gimple *stmt)
{
- if (is_gimple_assign (stmt))
+ if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
+ trim_memstar_call (orig, live, stmt);
+ else if (is_gimple_assign (stmt))
{
switch (gimple_assign_rhs_code (stmt))
{
@@ -712,6 +826,7 @@ unsigned int
pass_dse::execute (function *fun)
{
need_eh_cleanup = BITMAP_ALLOC (NULL);
+ need_ssa_update = false;
renumber_gimple_stmt_uids ();
@@ -738,7 +853,7 @@ pass_dse::execute (function *fun)
/* For now, just wipe the post-dominator information. */
free_dominance_info (CDI_POST_DOMINATORS);
- return 0;
+ return (need_ssa_update ? TODO_update_ssa : 0);
}
} // anon namespace
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c
new file mode 100644
index 0000000..8b7db3a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-25.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse1-details -w" } */
+
+char z[32];
+
+
+int
+foo(void)
+{
+ memset (z, 0, 16);
+ memset (z+8, 0, 24);
+}
+
+/* { dg-final { scan-tree-dump "memset .&z, 0, 8." "dse1" } } */
+
+