The patch below extents memset recognition to cover a few more
non-byte-size store loops and all byte-size store loops. This exposes
issues with our builtins.exp testsuite which has custom memset
routines like
void *
my_memset (void *d, int c, size_t n)
{
char *dst = (char *) d;
while (n--)
*dst++ = c;
return (char *) d;
}
Now, for LTO we have papered over similar issues by attaching
the used attribute to the functions. But the general question is - when
can we be sure the function we are dealing with are not the actual
implementation for the builtin call we want to generate? A few
things come to my mind:
1) the function already calls the function we want to generate (well,
it might be a tail-recursive memset implementation ...)
2) the function availability is AVAIL_LOCAL
3) ... ?
For sure 2) would work, but it would severely restrict the transform
(do we care?).
We have a similar issue with sin/cos -> sincos transform and a
trivial sincos implementation.
Any ideas?
Bootstrapped (with memset recognition enabled by default) and tested
on x86_64-unknown-linux-gnu with the aforementioned issues.
Thanks,
Richard.
2012-05-30 Richard Guenther <[email protected]>
PR tree-optimization/53081
* tree-data-ref.h (stores_zero_from_loop): Rename to ...
(stores_bytes_from_loop): ... this.
(stmt_with_adjacent_zero_store_dr_p): Rename to ...
(stmt_with_adjacent_byte_store_dr_p): ... this.
* tree-data-ref.c (stmt_with_adjacent_zero_store_dr_p): Rename to ...
(stmt_with_adjacent_byte_store_dr_p): ... this. Handle all kinds
of byte-sized stores.
(stores_zero_from_loop): Rename to ...
(stores_bytes_from_loop): ... this.
* tree-loop-distribution.c (generate_memset_zero): Rename to ...
(generate_memset): ... this. Handle all kinds of byte-sized
stores.
(generate_builtin): Adjust.
(can_generate_builtin): Likewise.
(tree_loop_distribution): Likewise.
Index: gcc/tree-data-ref.h
===================================================================
*** gcc/tree-data-ref.h (revision 188004)
--- gcc/tree-data-ref.h (working copy)
*************** index_in_loop_nest (int var, VEC (loop_p
*** 606,616 ****
}
void stores_from_loop (struct loop *, VEC (gimple, heap) **);
! void stores_zero_from_loop (struct loop *, VEC (gimple, heap) **);
void remove_similar_memory_refs (VEC (gimple, heap) **);
bool rdg_defs_used_in_other_loops_p (struct graph *, int);
bool have_similar_memory_accesses (gimple, gimple);
! bool stmt_with_adjacent_zero_store_dr_p (gimple);
/* Returns true when STRIDE is equal in absolute value to the size of
the unit type of TYPE. */
--- 606,616 ----
}
void stores_from_loop (struct loop *, VEC (gimple, heap) **);
! void stores_bytes_from_loop (struct loop *, VEC (gimple, heap) **);
void remove_similar_memory_refs (VEC (gimple, heap) **);
bool rdg_defs_used_in_other_loops_p (struct graph *, int);
bool have_similar_memory_accesses (gimple, gimple);
! bool stmt_with_adjacent_byte_store_dr_p (gimple);
/* Returns true when STRIDE is equal in absolute value to the size of
the unit type of TYPE. */
Index: gcc/tree-data-ref.c
===================================================================
*** gcc/tree-data-ref.c (revision 188004)
--- gcc/tree-data-ref.c (working copy)
*************** stores_from_loop (struct loop *loop, VEC
*** 5248,5259 ****
free (bbs);
}
! /* Returns true when the statement at STMT is of the form "A[i] = 0"
that contains a data reference on its LHS with a stride of the same
! size as its unit type. */
bool
! stmt_with_adjacent_zero_store_dr_p (gimple stmt)
{
tree lhs, rhs;
bool res;
--- 5248,5260 ----
free (bbs);
}
! /* Returns true when the statement at STMT is of the form "A[i] = x"
that contains a data reference on its LHS with a stride of the same
! size as its unit type that can be rewritten as a series of byte
! stores with the same value. */
bool
! stmt_with_adjacent_byte_store_dr_p (gimple stmt)
{
tree lhs, rhs;
bool res;
*************** stmt_with_adjacent_zero_store_dr_p (gimp
*** 5272,5278 ****
&& DECL_BIT_FIELD (TREE_OPERAND (lhs, 1)))
return false;
! if (!(integer_zerop (rhs) || real_zerop (rhs)))
return false;
dr = XCNEW (struct data_reference);
--- 5273,5286 ----
&& DECL_BIT_FIELD (TREE_OPERAND (lhs, 1)))
return false;
! if (!(integer_zerop (rhs)
! || integer_all_onesp (rhs)
! || real_zerop (rhs)
! || (TREE_CODE (rhs) == CONSTRUCTOR
! && !TREE_CLOBBER_P (rhs))
! || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
! && (TYPE_MODE (TREE_TYPE (lhs))
! == TYPE_MODE (unsigned_char_type_node)))))
return false;
dr = XCNEW (struct data_reference);
*************** stmt_with_adjacent_zero_store_dr_p (gimp
*** 5291,5297 ****
store to memory of the form "A[i] = 0". */
void
! stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
{
unsigned int i;
basic_block bb;
--- 5299,5305 ----
store to memory of the form "A[i] = 0". */
void
! stores_bytes_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
{
unsigned int i;
basic_block bb;
*************** stores_zero_from_loop (struct loop *loop
*** 5302,5308 ****
for (i = 0; i < loop->num_nodes; i++)
for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
if ((stmt = gsi_stmt (si))
! && stmt_with_adjacent_zero_store_dr_p (stmt))
VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
free (bbs);
--- 5310,5316 ----
for (i = 0; i < loop->num_nodes; i++)
for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
if ((stmt = gsi_stmt (si))
! && stmt_with_adjacent_byte_store_dr_p (stmt))
VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
free (bbs);
Index: gcc/tree-loop-distribution.c
===================================================================
*** gcc/tree-loop-distribution.c (revision 188004)
--- gcc/tree-loop-distribution.c (working copy)
*************** build_size_arg_loc (location_t loc, tree
*** 300,307 ****
/* Generate a call to memset. Return true when the operation succeeded. */
static void
! generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
! gimple_stmt_iterator bsi)
{
tree addr_base, nb_bytes;
bool res = false;
--- 300,307 ----
/* Generate a call to memset. Return true when the operation succeeded. */
static void
! generate_memset (gimple stmt, tree op0, tree nb_iter,
! gimple_stmt_iterator bsi)
{
tree addr_base, nb_bytes;
bool res = false;
*************** generate_memset_zero (gimple stmt, tree
*** 310,315 ****
--- 310,316 ----
tree mem, fn;
struct data_reference *dr = XCNEW (struct data_reference);
location_t loc = gimple_location (stmt);
+ tree val;
DR_STMT (dr) = stmt;
DR_REF (dr) = op0;
*************** generate_memset_zero (gimple stmt, tree
*** 334,346 ****
mem = force_gimple_operand (addr_base, &stmts, true, NULL);
gimple_seq_add_seq (&stmt_list, stmts);
fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
! fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
gimple_seq_add_stmt (&stmt_list, fn_call);
gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
if (dump_file && (dump_flags & TDF_DETAILS))
! fprintf (dump_file, "generated memset zero\n");
free_data_ref (dr);
}
--- 335,379 ----
mem = force_gimple_operand (addr_base, &stmts, true, NULL);
gimple_seq_add_seq (&stmt_list, stmts);
+ /* This exactly matches stmt_with_adjacent_byte_store_dr_p which detects
+ stores of zero or byte-size integer stores. */
+ val = gimple_assign_rhs1 (stmt);
+ if (integer_zerop (val)
+ || real_zerop (val)
+ || TREE_CODE (val) == CONSTRUCTOR)
+ val = integer_zero_node;
+ else if (integer_all_onesp (val))
+ val = build_int_cst (integer_type_node, -1);
+ else
+ {
+ if (TREE_CODE (val) == INTEGER_CST)
+ val = fold_convert (integer_type_node, val);
+ else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE
(val)))
+ {
+ gimple cstmt;
+ tree tem = create_tmp_reg (integer_type_node, NULL);
+ tem = make_ssa_name (tem, NULL);
+ cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
+ gimple_seq_add_stmt (&stmt_list, cstmt);
+ val = tem;
+ }
+ }
+
fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
! fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
gimple_seq_add_stmt (&stmt_list, fn_call);
gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
if (dump_file && (dump_flags & TDF_DETAILS))
! {
! fprintf (dump_file, "generated memset");
! if (integer_zerop (val))
! fprintf (dump_file, " zero\n");
! else if (integer_all_onesp (val))
! fprintf (dump_file, " minus one\n");
! else
! fprintf (dump_file, "\n");
! }
free_data_ref (dr);
}
*************** generate_builtin (struct loop *loop, bit
*** 386,399 ****
if (stmt_has_scalar_dependences_outside_loop (stmt))
goto end;
! if (is_gimple_assign (stmt)
&& !is_gimple_reg (gimple_assign_lhs (stmt)))
{
/* Don't generate the builtins when there are more than
one memory write. */
if (write != NULL)
goto end;
write = stmt;
if (bb == loop->latch)
nb_iter = number_of_latch_executions (loop);
--- 419,443 ----
if (stmt_has_scalar_dependences_outside_loop (stmt))
goto end;
! if (gimple_assign_single_p (stmt)
&& !is_gimple_reg (gimple_assign_lhs (stmt)))
{
+ tree rhs;
+
/* Don't generate the builtins when there are more than
one memory write. */
if (write != NULL)
goto end;
+ /* If the store is from a non-constant, verify the value
+ is defined outside of the loop. */
+ rhs = gimple_assign_rhs1 (stmt);
+ if (TREE_CODE (rhs) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (rhs)
+ && flow_bb_inside_loop_p
+ (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
+ goto end;
+
write = stmt;
if (bb == loop->latch)
nb_iter = number_of_latch_executions (loop);
*************** generate_builtin (struct loop *loop, bit
*** 401,412 ****
}
}
! if (!stmt_with_adjacent_zero_store_dr_p (write))
goto end;
/* The new statements will be placed before LOOP. */
bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
! generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
res = true;
/* If this is the last partition for which we generate code, we have
--- 445,456 ----
}
}
! if (!stmt_with_adjacent_byte_store_dr_p (write))
goto end;
/* The new statements will be placed before LOOP. */
bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
! generate_memset (write, gimple_assign_lhs (write), nb_iter, bsi);
res = true;
/* If this is the last partition for which we generate code, we have
*************** can_generate_builtin (struct graph *rdg,
*** 825,831 ****
gimple stmt = RDG_STMT (rdg, i);
nb_writes++;
if (!gimple_has_volatile_ops (stmt)
! && stmt_with_adjacent_zero_store_dr_p (stmt))
stores_zero++;
}
--- 869,875 ----
gimple stmt = RDG_STMT (rdg, i);
nb_writes++;
if (!gimple_has_volatile_ops (stmt)
! && stmt_with_adjacent_byte_store_dr_p (stmt))
stores_zero++;
}
*************** tree_loop_distribution (void)
*** 1266,1272 ****
/* With the following working list, we're asking
distribute_loop to separate from the rest of the loop the
stores of the form "A[i] = 0". */
! stores_zero_from_loop (loop, &work_list);
/* Do nothing if there are no patterns to be distributed. */
if (VEC_length (gimple, work_list) > 0)
--- 1310,1316 ----
/* With the following working list, we're asking
distribute_loop to separate from the rest of the loop the
stores of the form "A[i] = 0". */
! stores_bytes_from_loop (loop, &work_list);
/* Do nothing if there are no patterns to be distributed. */
if (VEC_length (gimple, work_list) > 0)
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-19.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ldist-19.c (revision 0)
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-19.c (revision 0)
***************
*** 0 ****
--- 1,49 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -fdump-tree-ldist-details" } */
+
+ struct Foo
+ {
+ char a;
+ };
+
+ struct Foo x[256];
+
+ void foo()
+ {
+ int i;
+ for (i = 0; i < 256; ++i)
+ x[i] = (struct Foo){};
+ }
+
+ void bar()
+ {
+ int i;
+ for (i = 0; i < 256; ++i)
+ x[i].a = 1;
+ }
+
+ void foobar(unsigned char c)
+ {
+ int i;
+ for (i = 0; i < 256; ++i)
+ x[i].a = c;
+ }
+
+ struct Baz
+ {
+ short a;
+ };
+
+ struct Baz y[256];
+
+ void baz()
+ {
+ int i;
+ for (i = 0; i < 256; ++i)
+ y[i].a = -1;
+ }
+
+ /* { dg-final { scan-tree-dump-times "generated memset zero" 1 "ldist" } } */
+ /* { dg-final { scan-tree-dump-times "generated memset minus one" 1 "ldist" }
} */
+ /* { dg-final { scan-tree-dump-times "generated memset" 4 "ldist" } } */
+ /* { dg-final { cleanup-tree-dump "ldist" } } */