mult (PR 88064)

Richard Biener Wed, 05 Dec 2018 02:02:55 -0800

On Wed, Dec 5, 2018 at 9:39 AM Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> As Jakub pointed out, if we narrow a plus, minus or mult operation based
> on the number of bits that consumers need, we have to convert a signed
> operation to an unsigned one in order to avoid new undefined behaviour.
> This patch does that and generalises vect_convert_input and
> vect_recog_over_widening_pattern to cope with the extra casts.
> (The changes to both functions are covered by existing tests.)
>
> Tested on x86_64-linux-gnu, aarch64-linux-gnu and aarch64_be-elf.
> OK to install?


OK.

Richard.

> Richard
>
>
> 2018-12-05  Richard Sandiford  <richard.sandif...@arm.com>
>
> gcc/
>         PR tree-optimization/88064
>         * tree-vect-patterns.c (vect_convert_input): Convert the result of
>         an existing cast if it has the right width but the wrong sign.
>         Do not test the signedness of the required result when
>         considering whether to split an existing cast; instead split to
>         a type with the same signedness as the source of the cast, then
>         convert it to the opposite signedness where necessary.
>         (vect_recog_over_widening_pattern): Handle sign changes between
>         the final PLUS_EXPR and the RSHIFT_EXPR.
>         (vect_recog_average_pattern): Use an unsigned operation when
>         truncating an addition, subtraction or multiplication.  Cast the
>         result back to the "real" signedness before promoting.
>
> gcc/testsuite/
>         PR tree-optimization/88064
>         * gcc.dg/vect/vect-over-widen-23.c: New test.
>
> Index: gcc/tree-vect-patterns.c
> ===================================================================
> --- gcc/tree-vect-patterns.c    2018-12-05 08:33:40.658922796 +0000
> +++ gcc/tree-vect-patterns.c    2018-12-05 08:33:55.130797055 +0000
> @@ -720,34 +720,60 @@ vect_convert_input (stmt_vec_info stmt_i
>    if (TREE_CODE (unprom->op) == INTEGER_CST)
>      return wide_int_to_tree (type, wi::to_widest (unprom->op));
>
> -  /* See if we can reuse an existing result.  */
> +  tree input = unprom->op;
>    if (unprom->caster)
>      {
>        tree lhs = gimple_get_lhs (unprom->caster->stmt);
> -      if (types_compatible_p (TREE_TYPE (lhs), type))
> -       return lhs;
> +      tree lhs_type = TREE_TYPE (lhs);
> +
> +      /* If the result of the existing cast is the right width, use it
> +        instead of the source of the cast.  */
> +      if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
> +       input = lhs;
> +      /* If the precision we want is between the source and result
> +        precisions of the existing cast, try splitting the cast into
> +        two and tapping into a mid-way point.  */
> +      else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
> +              && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
> +       {
> +         /* In order to preserve the semantics of the original cast,
> +            give the mid-way point the same signedness as the input value.
> +
> +            It would be possible to use a signed type here instead if
> +            TYPE is signed and UNPROM->TYPE is unsigned, but that would
> +            make the sign of the midtype sensitive to the order in
> +            which we process the statements, since the signedness of
> +            TYPE is the signedness required by just one of possibly
> +            many users.  Also, unsigned promotions are usually as cheap
> +            as or cheaper than signed ones, so it's better to keep an
> +            unsigned promotion.  */
> +         tree midtype = build_nonstandard_integer_type
> +           (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
> +         tree vec_midtype = get_vectype_for_scalar_type (midtype);
> +         if (vec_midtype)
> +           {
> +             input = vect_recog_temp_ssa_var (midtype, NULL);
> +             gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
> +                                                      unprom->op);
> +             if (!vect_split_statement (unprom->caster, input, new_stmt,
> +                                        vec_midtype))
> +               append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
> +           }
> +       }
> +
> +      /* See if we can reuse an existing result.  */
> +      if (types_compatible_p (type, TREE_TYPE (input)))
> +       return input;
>      }
>
>    /* We need a new conversion statement.  */
>    tree new_op = vect_recog_temp_ssa_var (type, NULL);
> -  gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
> -
> -  /* If the operation is the input to a vectorizable cast, try splitting
> -     that cast into two, taking the required result as a mid-way point.  */
> -  if (unprom->caster)
> -    {
> -      tree lhs = gimple_get_lhs (unprom->caster->stmt);
> -      if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
> -         && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
> -         && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
> -         && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
> -       return new_op;
> -    }
> +  gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
>
>    /* If OP is an external value, see if we can insert the new statement
>       on an incoming edge.  */
> -  if (unprom->dt == vect_external_def)
> -    if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
> +  if (input == unprom->op && unprom->dt == vect_external_def)
> +    if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
>        {
>         basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
>         gcc_assert (!new_bb);
> @@ -1644,28 +1670,37 @@ vect_recog_over_widening_pattern (stmt_v
>    bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
>    tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
>
> +  /* If we're truncating an operation, we need to make sure that we
> +     don't introduce new undefined overflow.  The codes tested here are
> +     a subset of those accepted by vect_truncatable_operation_p.  */
> +  tree op_type = new_type;
> +  if (TYPE_OVERFLOW_UNDEFINED (new_type)
> +      && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
> +    op_type = build_nonstandard_integer_type (new_precision, true);
> +
>    /* We specifically don't check here whether the target supports the
>       new operation, since it might be something that a later pattern
>       wants to rewrite anyway.  If targets have a minimum element size
>       for some optabs, we should pattern-match smaller ops to larger ops
>       where beneficial.  */
>    tree new_vectype = get_vectype_for_scalar_type (new_type);
> -  if (!new_vectype)
> +  tree op_vectype = get_vectype_for_scalar_type (op_type);
> +  if (!new_vectype || !op_vectype)
>      return NULL;
>
>    if (dump_enabled_p ())
>      dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
>                      type, new_type);
>
> -  /* Calculate the rhs operands for an operation on NEW_TYPE.  */
> +  /* Calculate the rhs operands for an operation on OP_TYPE.  */
>    tree ops[3] = {};
>    for (unsigned int i = 1; i < first_op; ++i)
>      ops[i - 1] = gimple_op (last_stmt, i);
>    vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
> -                      new_type, &unprom[0], new_vectype);
> +                      op_type, &unprom[0], op_vectype);
>
> -  /* Use the operation to produce a result of type NEW_TYPE.  */
> -  tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
> +  /* Use the operation to produce a result of type OP_TYPE.  */
> +  tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
>    gimple *pattern_stmt = gimple_build_assign (new_var, code,
>                                               ops[0], ops[1], ops[2]);
>    gimple_set_location (pattern_stmt, gimple_location (last_stmt));
> @@ -1674,6 +1709,13 @@ vect_recog_over_widening_pattern (stmt_v
>      dump_printf_loc (MSG_NOTE, vect_location,
>                      "created pattern stmt: %G", pattern_stmt);
>
> +  /* Convert back to the original signedness, if OP_TYPE is different
> +     from NEW_TYPE.  */
> +  if (op_type != new_type)
> +    pattern_stmt = vect_convert_output (last_stmt_info, new_type,
> +                                       pattern_stmt, op_vectype);
> +
> +  /* Promote the result to the original type.  */
>    pattern_stmt = vect_convert_output (last_stmt_info, type,
>                                       pattern_stmt, new_vectype);
>
> @@ -1719,8 +1761,16 @@ vect_recog_average_pattern (stmt_vec_inf
>    if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
>      return NULL;
>
> -  /* Get the definition of the shift input.  */
> +  /* Look through any change in sign on the shift input.  */
>    tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
> +  vect_unpromoted_value unprom_plus;
> +  rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
> +                                                    &unprom_plus);
> +  if (!rshift_rhs
> +      || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
> +    return NULL;
> +
> +  /* Get the definition of the shift input.  */
>    stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
>    if (!plus_stmt_info)
>      return NULL;
> Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c
> ===================================================================
> --- /dev/null   2018-11-29 13:15:04.463550658 +0000
> +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c      2018-12-05 
> 08:33:55.130797055 +0000
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-fno-tree-forwprop -fno-tree-vrp" }
> +/* { dg-require-effective-target vect_int } */
> +
> +#include "tree-vect.h"
> +
> +#if VECTOR_BITS > 128
> +#define N (VECTOR_BITS / 2)
> +#else
> +#define N 64
> +#endif
> +
> +int a[N], b[N], c[N];
> +
> +void
> +foo ()
> +{
> +  int i;
> +  for (i = 0; i < N; i++)
> +    {
> +      long long d = a[i];
> +      long long e = b[i];
> +      d += e;
> +      c[i] = d;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: 
> detected" 2 "vect" { target vect_widen_shift } } } */
> +/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: 
> detected:[^\n]* \+} "vect" } } */
> +/* { dg-final { scan-tree-dump {VIEW_CONVERT_EXPR<vector[^ ]* unsigned} 
> "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */

Re: Use unsigned arithmetic for demoted vector plus/minus/mult (PR 88064)

Reply via email to