2017-11-16 Jakub Jelinek <ja...@redhat.com>
PR tree-optimization/78821
* gimple-ssa-store-merging.c (find_bswap_or_nop_load): Give up
if base is TARGET_MEM_REF. If base is not MEM_REF, set base_addr
to the address of the base rather than the base itself.
(find_bswap_or_nop_1): Just use pointer comparison for vuse check.
(find_bswap_or_nop_finalize): New function.
(find_bswap_or_nop): Use it.
(bswap_replace): Return a tree rather than bool, change first
argument from gimple * to gimple_stmt_iterator, allow inserting
into an empty sequence, allow ins_stmt to be NULL - then emit
all stmts into gsi. Fix up MEM_REF address gimplification.
(pass_optimize_bswap::execute): Adjust bswap_replace caller.
Formatting fix.
(struct store_immediate_info): Add N and INS_STMT non-static
data members.
(store_immediate_info::store_immediate_info): Initialize them
from newly added ctor args.
(merged_store_group::apply_stores): Formatting fixes. Sort by
bitpos at the end.
(stmts_may_clobber_ref_p): For stores call also
refs_anti_dependent_p.
(gather_bswap_load_refs): New function.
(imm_store_chain_info::try_coalesce_bswap): New method.
(imm_store_chain_info::coalesce_immediate_stores): Use it.
(split_group): Handle LROTATE_EXPR and NOP_EXPR rhs_code specially.
(imm_store_chain_info::output_merged_store): Fail if number of
new estimated stmts is bigger or equal than old. Handle LROTATE_EXPR
and NOP_EXPR rhs_code.
(pass_store_merging::process_store): Compute n and ins_stmt, if
ins_stmt is non-NULL and the store rhs is otherwise invalid, use
LROTATE_EXPR rhs_code. Pass n and ins_stmt to store_immediate_info
ctor.
(pass_store_merging::execute): Calculate dominators.
* gcc.dg/store_merging_16.c: New test.
--- gcc/gimple-ssa-store-merging.c.jj 2017-11-16 10:45:09.239185205 +0100
+++ gcc/gimple-ssa-store-merging.c 2017-11-16 15:34:08.560080214 +0100
@@ -369,7 +369,10 @@ find_bswap_or_nop_load (gimple *stmt, tr
base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
&unsignedp, &reversep, &volatilep);
- if (TREE_CODE (base_addr) == MEM_REF)
+ if (TREE_CODE (base_addr) == TARGET_MEM_REF)
+ /* Do not rewrite TARGET_MEM_REF. */
+ return false;
+ else if (TREE_CODE (base_addr) == MEM_REF)
{
offset_int bit_offset = 0;
tree off = TREE_OPERAND (base_addr, 1);
@@ -401,6 +404,8 @@ find_bswap_or_nop_load (gimple *stmt, tr
bitpos += bit_offset.to_shwi ();
}
+ else
+ base_addr = build_fold_addr_expr (base_addr);
if (bitpos % BITS_PER_UNIT)
return false;
@@ -743,8 +748,7 @@ find_bswap_or_nop_1 (gimple *stmt, struc
if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
return NULL;
- if (!n1.vuse != !n2.vuse
- || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
+ if (n1.vuse != n2.vuse)
return NULL;
source_stmt
@@ -765,39 +769,21 @@ find_bswap_or_nop_1 (gimple *stmt, struc
return NULL;
}
-/* Check if STMT completes a bswap implementation or a read in a given
- endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
- accordingly. It also sets N to represent the kind of operations
- performed: size of the resulting expression and whether it works on
- a memory source, and if so alias-set and vuse. At last, the
- function returns a stmt whose rhs's first tree is the source
- expression. */
+/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
+ *CMPXCHG, *CMPNOP and adjust *N. */
-gimple *
-find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
+void
+find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
+ uint64_t *cmpnop)
{
unsigned rsize;
uint64_t tmpn, mask;
-/* The number which the find_bswap_or_nop_1 result should match in order
- to have a full byte swap. The number is shifted to the right
- according to the size of the symbolic number before using it. */
- uint64_t cmpxchg = CMPXCHG;
- uint64_t cmpnop = CMPNOP;
-
- gimple *ins_stmt;
- int limit;
- /* The last parameter determines the depth search limit. It usually
- correlates directly to the number n of bytes to be touched. We
- increase that number by log2(n) + 1 here in order to also
- cover signed -> unsigned conversions of the src operand as can be seen
- in libgcc, and for initial shift/and operation of the src operand. */
- limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
- limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
- ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
-
- if (!ins_stmt)
- return NULL;
+ /* The number which the find_bswap_or_nop_1 result should match in order
+ to have a full byte swap. The number is shifted to the right
+ according to the size of the symbolic number before using it. */
+ *cmpxchg = CMPXCHG;
+ *cmpnop = CMPNOP;
/* Find real size of result (highest non-zero byte). */
if (n->base_addr)
@@ -810,8 +796,8 @@ find_bswap_or_nop (gimple *stmt, struct
if (n->range < (int) sizeof (int64_t))
{
mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
- cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
- cmpnop &= mask;
+ *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
+ *cmpnop &= mask;
}
/* Zero out the bits corresponding to unused bytes in the result of the
@@ -821,18 +807,47 @@ find_bswap_or_nop (gimple *stmt, struct
if (BYTES_BIG_ENDIAN)
{
mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
- cmpxchg &= mask;
- cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
+ *cmpxchg &= mask;
+ *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
}
else
{
mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
- cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
- cmpnop &= mask;
+ *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
+ *cmpnop &= mask;
}
n->range = rsize;
}
+ n->range *= BITS_PER_UNIT;
+}
+
+/* Check if STMT completes a bswap implementation or a read in a given
+ endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
+ accordingly. It also sets N to represent the kind of operations
+ performed: size of the resulting expression and whether it works on
+ a memory source, and if so alias-set and vuse. At last, the
+ function returns a stmt whose rhs's first tree is the source
+ expression. */
+
+gimple *
+find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
+{
+ /* The last parameter determines the depth search limit. It usually
+ correlates directly to the number n of bytes to be touched. We
+ increase that number by log2(n) + 1 here in order to also
+ cover signed -> unsigned conversions of the src operand as can be seen
+ in libgcc, and for initial shift/and operation of the src operand. */
+ int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
+ limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
+ gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
+
+ if (!ins_stmt)
+ return NULL;
+
+ uint64_t cmpxchg, cmpnop;
+ find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
+
/* A complete byte swap should make the symbolic number to start with
the largest digit in the highest order byte. Unchanged symbolic
number indicates a read with same endianness as target architecture. */
@@ -847,7 +862,6 @@ find_bswap_or_nop (gimple *stmt, struct
if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
return NULL;
- n->range *= BITS_PER_UNIT;
return ins_stmt;
}
@@ -882,68 +896,89 @@ public:
}; // class pass_optimize_bswap
/* Perform the bswap optimization: replace the expression computed in the rhs
- of CUR_STMT by an equivalent bswap, load or load + bswap expression.
+ of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
+ bswap, load or load + bswap expression.
Which of these alternatives replace the rhs is given by N->base_addr (non
null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
load to perform are also given in N while the builtin bswap invoke is given
- in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the
- load statements involved to construct the rhs in CUR_STMT and N->range gives
- the size of the rhs expression for maintaining some statistics.
-
- Note that if the replacement involve a load, CUR_STMT is moved just after
- SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
- changing of basic block. */
+ in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
+ load statements involved to construct the rhs in gsi_stmt (GSI) and
+ N->range gives the size of the rhs expression for maintaining some
+ statistics.
+
+ Note that if the replacement involve a load and if gsi_stmt (GSI) is
+ non-NULL, that stmt is moved just after INS_STMT to do the load with the
+ same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
-bool
-bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
+tree
+bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
tree bswap_type, tree load_type, struct symbolic_number *n,
bool bswap)
{
- gimple_stmt_iterator gsi;
- tree src, tmp, tgt;
+ tree src, tmp, tgt = NULL_TREE;
gimple *bswap_stmt;
- gsi = gsi_for_stmt (cur_stmt);
+ gimple *cur_stmt = gsi_stmt (gsi);
src = n->src;
- tgt = gimple_assign_lhs (cur_stmt);
+ if (cur_stmt)
+ tgt = gimple_assign_lhs (cur_stmt);
/* Need to load the value from memory first. */
if (n->base_addr)
{
- gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
+ gimple_stmt_iterator gsi_ins = gsi;
+ if (ins_stmt)
+ gsi_ins = gsi_for_stmt (ins_stmt);
tree addr_expr, addr_tmp, val_expr, val_tmp;
tree load_offset_ptr, aligned_load_type;
- gimple *addr_stmt, *load_stmt;
- unsigned align;
+ gimple *load_stmt;
+ unsigned align = get_object_alignment (src);
HOST_WIDE_INT load_offset = 0;
- basic_block ins_bb, cur_bb;
- ins_bb = gimple_bb (ins_stmt);
- cur_bb = gimple_bb (cur_stmt);
- if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
- return false;
-
- align = get_object_alignment (src);
-
- /* Move cur_stmt just before one of the load of the original
- to ensure it has the same VUSE. See PR61517 for what could
- go wrong. */
- if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
- reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
- gsi_move_before (&gsi, &gsi_ins);
- gsi = gsi_for_stmt (cur_stmt);
+ if (cur_stmt)
+ {
+ basic_block ins_bb = gimple_bb (ins_stmt);
+ basic_block cur_bb = gimple_bb (cur_stmt);
+ if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
+ return NULL_TREE;
+
+ /* Move cur_stmt just before one of the load of the original
+ to ensure it has the same VUSE. See PR61517 for what could
+ go wrong. */
+ if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
+ reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
+ gsi_move_before (&gsi, &gsi_ins);
+ gsi = gsi_for_stmt (cur_stmt);
+ }
+ else
+ gsi = gsi_ins;
/* Compute address to load from and cast according to the size
of the load. */
- addr_expr = build_fold_addr_expr (unshare_expr (src));
+ addr_expr = build_fold_addr_expr (src);
if (is_gimple_mem_ref_addr (addr_expr))
- addr_tmp = addr_expr;
+ addr_tmp = unshare_expr (addr_expr);
else
{
- addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
- "load_src");
- addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
- gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
+ addr_tmp = unshare_expr (n->base_addr);
+ if (!is_gimple_mem_ref_addr (addr_tmp))
+ addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
+ is_gimple_mem_ref_addr,
+ NULL_TREE, true,
+ GSI_SAME_STMT);
+ load_offset = n->bytepos;
+ if (n->offset)
+ {
+ tree off
+ = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
+ true, NULL_TREE, true,
+ GSI_SAME_STMT);
+ gimple *stmt
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
+ POINTER_PLUS_EXPR, addr_tmp, off);
+ gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+ addr_tmp = gimple_assign_lhs (stmt);
+ }
}
/* Perform the load. */
@@ -967,7 +1002,7 @@ bswap_replace (gimple *cur_stmt, gimple
}
/* Convert the result of load if necessary. */
- if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
+ if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
{
val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
"load_dst");
@@ -975,13 +1010,21 @@ bswap_replace (gimple *cur_stmt, gimple
gimple_set_vuse (load_stmt, n->vuse);
gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
+ update_stmt (cur_stmt);
}
- else
+ else if (cur_stmt)
{
gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
gimple_set_vuse (cur_stmt, n->vuse);
+ update_stmt (cur_stmt);
+ }
+ else
+ {
+ tgt = make_ssa_name (load_type);
+ cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
+ gimple_set_vuse (cur_stmt, n->vuse);
+ gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
}
- update_stmt (cur_stmt);
if (dump_file)
{
@@ -990,7 +1033,7 @@ bswap_replace (gimple *cur_stmt, gimple
(int) n->range);
print_gimple_stmt (dump_file, cur_stmt, 0);
}
- return true;
+ return tgt;
}
else
{
@@ -1003,15 +1046,17 @@ bswap_replace (gimple *cur_stmt, gimple
}
else if (!bswap)
{
- gimple *g;
- if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
+ gimple *g = NULL;
+ if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
{
if (!is_gimple_val (src))
- return false;
+ return NULL_TREE;
g = gimple_build_assign (tgt, NOP_EXPR, src);
}
- else
+ else if (cur_stmt)
g = gimple_build_assign (tgt, src);
+ else
+ tgt = src;
if (n->range == 16)
nop_stats.found_16bit++;
else if (n->range == 32)
@@ -1026,10 +1071,17 @@ bswap_replace (gimple *cur_stmt, gimple
fprintf (dump_file,
"%d bit reshuffle in target endianness found at: ",
(int) n->range);
- print_gimple_stmt (dump_file, cur_stmt, 0);
+ if (cur_stmt)
+ print_gimple_stmt (dump_file, cur_stmt, 0);
+ else
+ {
+ print_generic_expr (dump_file, tgt, 0);
+ fprintf (dump_file, "\n");
+ }
}
- gsi_replace (&gsi, g, true);
- return true;
+ if (cur_stmt)
+ gsi_replace (&gsi, g, true);
+ return tgt;
}
else if (TREE_CODE (src) == BIT_FIELD_REF)
src = TREE_OPERAND (src, 0);
@@ -1069,6 +1121,8 @@ bswap_replace (gimple *cur_stmt, gimple
else
bswap_stmt = gimple_build_call (fndecl, 1, tmp);
+ if (tgt == NULL_TREE)
+ tgt = make_ssa_name (bswap_type);
tmp = tgt;
/* Convert the result if necessary. */
@@ -1087,12 +1141,23 @@ bswap_replace (gimple *cur_stmt, gimple
{
fprintf (dump_file, "%d bit bswap implementation found at: ",
(int) n->range);
- print_gimple_stmt (dump_file, cur_stmt, 0);
+ if (cur_stmt)
+ print_gimple_stmt (dump_file, cur_stmt, 0);
+ else
+ {
+ print_generic_expr (dump_file, tgt, 0);
+ fprintf (dump_file, "\n");
+ }
}
- gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
- gsi_remove (&gsi, true);
- return true;
+ if (cur_stmt)
+ {
+ gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
+ }
+ else
+ gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
+ return tgt;
}
/* Find manual byte swap implementations as well as load in a given
@@ -1141,7 +1206,7 @@ pass_optimize_bswap::execute (function *
inserted smaller bswap replacements as sub-patterns, the wider
variant wouldn't be detected. */
for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
- {
+ {