On Thu, 17 Dec 2020, Jakub Jelinek wrote:

> On Wed, Dec 16, 2020 at 09:29:31AM +0100, Richard Biener wrote:
> > I think it probably makes sense to have some helper split out that
> > collects & classifies vector constructor components we can use from
> > both forwprop (where matching the V_C_E from integer could be done
> > as well IMHO) and bswap (when a permute is involved) and store-merging.
> 
> I've tried to add such helper, but handling over just analysis and letting
> each pass handle it differently seems complicated given the limitations of
> the bswap infrastructure.
> 
> So, this patch just hooks the optimization also into store-merging so that
> the original testcase from the PR can be fixed.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK, but ...

> Note, I have no idea why the bswap code needs TODO_update_ssa if it changed
> things, for the vuses it copies them from the surrounding vuses, which looks
> correct to me.  Perhaps because it uses force_gimple_operand_gsi* in a few
> spots in bswap_replace?  Confused...

.. that shouldn't cause updating SSA to be necessary.  Maybe it at some
point did not update virtual operands appropriately.

Richard.

> 2020-12-17  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/96239
>       * gimple-ssa-store-merging.c (maybe_optimize_vector_constructor): New
>       function.
>       (get_status_for_store_merging): Don't return BB_INVALID for blocks
>       with potential bswap optimizable CONSTRUCTORs.
>       (pass_store_merging::execute): Optimize vector CONSTRUCTORs with bswap
>       if possible.
> 
>       * gcc.dg/tree-ssa/pr96239.c: New test.
> 
> --- gcc/gimple-ssa-store-merging.c.jj 2020-12-16 13:07:51.729733816 +0100
> +++ gcc/gimple-ssa-store-merging.c    2020-12-16 16:02:06.238868137 +0100
> @@ -1255,6 +1255,75 @@ bswap_replace (gimple_stmt_iterator gsi,
>    return tgt;
>  }
>  
> +/* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
> +   using bswap optimizations.  CDI_DOMINATORS need to be
> +   computed on entry.  Return true if it has been optimized and
> +   TODO_update_ssa is needed.  */
> +
> +static bool
> +maybe_optimize_vector_constructor (gimple *cur_stmt)
> +{
> +  tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
> +  struct symbolic_number n;
> +  bool bswap;
> +
> +  gcc_assert (is_gimple_assign (cur_stmt)
> +           && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
> +
> +  tree rhs = gimple_assign_rhs1 (cur_stmt);
> +  if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
> +      || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
> +      || gimple_assign_lhs (cur_stmt) == NULL_TREE)
> +    return false;
> +
> +  HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
> +  switch (sz)
> +    {
> +    case 16:
> +      load_type = bswap_type = uint16_type_node;
> +      break;
> +    case 32:
> +      if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
> +       && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
> +     {
> +       load_type = uint32_type_node;
> +       fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
> +       bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
> +     }
> +      else
> +     return false;
> +      break;
> +    case 64:
> +      if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
> +       && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
> +           || (word_mode == SImode
> +               && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
> +               && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
> +     {
> +       load_type = uint64_type_node;
> +       fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
> +       bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
> +     }
> +      else
> +     return false;
> +      break;
> +    default:
> +      return false;
> +    }
> +
> +  gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
> +  if (!ins_stmt || n.range != (unsigned HOST_WIDE_INT) sz)
> +    return false;
> +
> +  if (bswap && !fndecl && n.range != 16)
> +    return false;
> +
> +  memset (&nop_stats, 0, sizeof (nop_stats));
> +  memset (&bswap_stats, 0, sizeof (bswap_stats));
> +  return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
> +                     bswap_type, load_type, &n, bswap) != NULL_TREE;
> +}
> +
>  /* Find manual byte swap implementations as well as load in a given
>     endianness. Byte swaps are turned into a bswap builtin invokation
>     while endian loads are converted to bswap builtin invokation or
> @@ -5126,6 +5195,7 @@ static enum basic_block_status
>  get_status_for_store_merging (basic_block bb)
>  {
>    unsigned int num_statements = 0;
> +  unsigned int num_constructors = 0;
>    gimple_stmt_iterator gsi;
>    edge e;
>  
> @@ -5138,9 +5208,27 @@ get_status_for_store_merging (basic_bloc
>  
>        if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
>       break;
> +
> +      if (is_gimple_assign (stmt)
> +       && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
> +     {
> +       tree rhs = gimple_assign_rhs1 (stmt);
> +       if (VECTOR_TYPE_P (TREE_TYPE (rhs))
> +           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
> +           && gimple_assign_lhs (stmt) != NULL_TREE)
> +         {
> +           HOST_WIDE_INT sz
> +             = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
> +           if (sz == 16 || sz == 32 || sz == 64)
> +             {
> +               num_constructors = 1;
> +               break;
> +             }
> +         }
> +     }
>      }
>  
> -  if (num_statements == 0)
> +  if (num_statements == 0 && num_constructors == 0)
>      return BB_INVALID;
>  
>    if (cfun->can_throw_non_call_exceptions && cfun->eh
> @@ -5149,7 +5237,7 @@ get_status_for_store_merging (basic_bloc
>        && e->dest == bb->next_bb)
>      return BB_EXTENDED_VALID;
>  
> -  return num_statements >= 2 ? BB_VALID : BB_INVALID;
> +  return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
>  }
>  
>  /* Entry point for the pass.  Go over each basic block recording chains of
> @@ -5163,6 +5251,7 @@ pass_store_merging::execute (function *f
>    basic_block bb;
>    hash_set<gimple *> orig_stmts;
>    bool changed = false, open_chains = false;
> +  unsigned todo = 0;
>  
>    /* If the function can throw and catch non-call exceptions, we'll be trying
>       to merge stores across different basic blocks so we need to first 
> unsplit
> @@ -5189,9 +5278,10 @@ pass_store_merging::execute (function *f
>        if (dump_file && (dump_flags & TDF_DETAILS))
>       fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
>  
> -      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
>       {
>         gimple *stmt = gsi_stmt (gsi);
> +       gsi_next (&gsi);
>  
>         if (is_gimple_debug (stmt))
>           continue;
> @@ -5207,6 +5297,14 @@ pass_store_merging::execute (function *f
>             continue;
>           }
>  
> +       if (is_gimple_assign (stmt)
> +           && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
> +           && maybe_optimize_vector_constructor (stmt))
> +         {
> +           todo = TODO_update_ssa;
> +           continue;
> +         }
> +
>         if (store_valid_for_store_merging_p (stmt))
>           changed |= process_store (stmt);
>         else
> @@ -5230,10 +5328,10 @@ pass_store_merging::execute (function *f
>    if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
>      {
>        free_dominance_info (CDI_DOMINATORS);
> -      return TODO_cleanup_cfg;
> +      return TODO_cleanup_cfg | todo;
>      }
>  
> -  return 0;
> +  return todo;
>  }
>  
>  } // anon namespace
> --- gcc/testsuite/gcc.dg/tree-ssa/pr96239.c.jj        2020-12-16 
> 16:10:30.013256862 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr96239.c   2020-12-16 16:11:08.802822537 
> +0100
> @@ -0,0 +1,18 @@
> +/* PR tree-optimization/96239 */
> +/* { dg-do compile { target { ilp32 || lp64 } } } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump " r>> 8;" "optimized" { target bswap } } } */
> +
> +union U { unsigned char c[2]; unsigned short s; };
> +
> +unsigned short
> +foo (unsigned short x)
> +{
> +  union U u;
> +  u.s = x;
> +  unsigned char v = u.c[0];
> +  unsigned char w = u.c[1];
> +  u.c[0] = w;
> +  u.c[1] = v;
> +  return u.s;
> +}
> 
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to