On Thu, 19 Jan 2023, Jakub Jelinek wrote:

> Hi!
> 
> As mentioned in the simplify_rotate comment, for e.g.
>    ((T) ((T2) X << (Y & (B - 1)))) | ((T) ((T2) X >> ((-Y) & (B - 1))))
> we already emit
>    X r<< (Y & (B - 1))
> as replacement.  This PR is about the
>    ((T) ((T2) X << Y)) OP ((T) ((T2) X >> (B - Y)))
>    ((T) ((T2) X << (int) Y)) OP ((T) ((T2) X >> (int) (B - Y)))
> forms if T2 is wider than T.  Unlike e.g.
>    (X << Y) OP (X >> (B - Y))
> which is valid just for Y in [1, B - 1], the above 2 forms are actually
> valid and do the rotates for Y in [0, B] - for Y 0 the X value is preserved
> by the left shift and right logical shift by B adds just zeros (but because
> the shift is in wider precision B is still valid shift count), while for
> Y equal to B X is preserved through the latter shift and the former adds
> just zeros.
> Now, it is unclear if we in the middle-end treat rotates with rotate count
> equal or larger than precision as UB or not, unlike shifts there are less
> reasons to do so, but e.g. expansion of X r<< Y if there is no rotate optab
> for the mode is emitted as (X << Y) | (((unsigned) X) >> ((-Y) & (B - 1)))
> and so with UB on Y == B.
> 
> The following patch does multiple things:
> 1) for the above 2, asks the ranger if Y could be equal to B and if so,
>    instead of using X r<< Y uses X r<< (Y & (B - 1))
> 2) for the
>    ((T) ((T2) X << Y)) | ((T) ((T2) X >> ((-Y) & (B - 1))))
>    ((T) ((T2) X << (int) Y)) | ((T) ((T2) X >> (int) ((-Y) & (B - 1))))
>    forms that were fixed 2 days ago it only punts if Y might be in the
>    [B,B2-1] range but isn't known to be in the
>    [0,B][2*B,2*B][3*B,3*B]... range.  Because for Y which is a multiple
>    of B but smaller than B2 it acts as a rotate too, left shift provides
>    0 and (-Y) & (B - 1) is 0 and so preserves X.  Though, for the cases
>    where Y is not known to be in [0,B-1] the patch also uses
>    X r<< (Y & (B - 1)) rather than X r<< Y
> 3) as discussed with Aldy, instead of using global ranger it uses a pass
>    specific copy but lazily created on first simplify_rotate that needs it;
>    this e.g. handles rotate inside of if body where the guarding condition
>    limits the shift count to some range which will not work with the
>    global ranger (unless there is some SSA_NAME to attach the range to).
> 
> Note, e.g. on x86 X r<< (Y & (B - 1)) and X r<< Y actually emit the
> same assembly because rotates work the same even for larger rotate counts,
> but that is handled only during combine.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks
Richard.

> 2023-01-19  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/108440
>       * tree-ssa-forwprop.cc: Include gimple-range.h.
>       (simplify_rotate): For the forms with T2 wider than T and shift counts 
> of
>       Y and B - Y add & (B - 1) masking for the rotate count if Y could be 
> equal
>       to B.  For the forms with T2 wider than T and shift counts of
>       Y and (-Y) & (B - 1), don't punt if range could be [B, B2], but only if
>       range doesn't guarantee Y < B or Y = N * B.  If range doesn't guarantee
>       Y < B, also add & (B - 1) masking for the rotate count.  Use lazily 
> created
>       pass specific ranger instead of get_global_range_query.
>       (pass_forwprop::execute): Disable that ranger at the end of pass if it 
> has
>       been created.
> 
>       * c-c++-common/rotate-10.c: New test.
>       * c-c++-common/rotate-11.c: New test.
> 
> --- gcc/tree-ssa-forwprop.cc.jj       2023-01-17 12:14:15.845088330 +0100
> +++ gcc/tree-ssa-forwprop.cc  2023-01-18 13:30:59.337914945 +0100
> @@ -52,6 +52,7 @@ along with GCC; see the file COPYING3.
>  #include "internal-fn.h"
>  #include "cgraph.h"
>  #include "tree-ssa.h"
> +#include "gimple-range.h"
>  
>  /* This pass propagates the RHS of assignment statements into use
>     sites of the LHS of the assignment.  It's basically a specialized
> @@ -1837,8 +1838,12 @@ defcodefor_name (tree name, enum tree_co
>     ((T) ((T2) X << Y)) | ((T) ((T2) X >> ((-Y) & (B - 1))))
>     ((T) ((T2) X << (int) Y)) | ((T) ((T2) X >> (int) ((-Y) & (B - 1))))
>  
> -   transform these into (last 2 only if ranger can prove Y < B):
> +   transform these into (last 2 only if ranger can prove Y < B
> +   or Y = N * B):
>     X r<< Y
> +   or
> +   X r<< (& & (B - 1))
> +   The latter for the forms with T2 wider than T if ranger can't prove Y < B.
>  
>     Or for:
>     (X << (Y & (B - 1))) | (X >> ((-Y) & (B - 1)))
> @@ -1868,6 +1873,7 @@ simplify_rotate (gimple_stmt_iterator *g
>    gimple *g;
>    gimple *def_arg_stmt[2] = { NULL, NULL };
>    int wider_prec = 0;
> +  bool add_masking = false;
>  
>    arg[0] = gimple_assign_rhs1 (stmt);
>    arg[1] = gimple_assign_rhs2 (stmt);
> @@ -1995,7 +2001,7 @@ simplify_rotate (gimple_stmt_iterator *g
>        tree cdef_arg1[2], cdef_arg2[2], def_arg2_alt[2];
>        enum tree_code cdef_code[2];
>        gimple *def_arg_alt_stmt[2] = { NULL, NULL };
> -      bool check_range = false;
> +      int check_range = 0;
>        gimple *check_range_stmt = NULL;
>        /* Look through conversion of the shift count argument.
>        The C/C++ FE cast any shift count argument to integer_type_node.
> @@ -2036,6 +2042,11 @@ simplify_rotate (gimple_stmt_iterator *g
>               || cdef_arg2[i] == def_arg2_alt[1 - i])
>             {
>               rotcnt = cdef_arg2[i];
> +             check_range = -1;
> +             if (cdef_arg2[i] == def_arg2[1 - i])
> +               check_range_stmt = def_arg_stmt[1 - i];
> +             else
> +               check_range_stmt = def_arg_alt_stmt[1 - i];
>               break;
>             }
>           defcodefor_name (cdef_arg2[i], &code, &tem, NULL);
> @@ -2048,6 +2059,11 @@ simplify_rotate (gimple_stmt_iterator *g
>                   || tem == def_arg2_alt[1 - i]))
>             {
>               rotcnt = tem;
> +             check_range = -1;
> +             if (tem == def_arg2[1 - i])
> +               check_range_stmt = def_arg_stmt[1 - i];
> +             else
> +               check_range_stmt = def_arg_alt_stmt[1 - i];
>               break;
>             }
>         }
> @@ -2080,7 +2096,7 @@ simplify_rotate (gimple_stmt_iterator *g
>               if (tem == def_arg2[1 - i] || tem == def_arg2_alt[1 - i])
>                 {
>                   rotcnt = tem;
> -                 check_range = true;
> +                 check_range = 1;
>                   if (tem == def_arg2[1 - i])
>                     check_range_stmt = def_arg_stmt[1 - i];
>                   else
> @@ -2099,7 +2115,7 @@ simplify_rotate (gimple_stmt_iterator *g
>                       || tem2 == def_arg2_alt[1 - i])
>                     {
>                       rotcnt = tem2;
> -                     check_range = true;
> +                     check_range = 1;
>                       if (tem2 == def_arg2[1 - i])
>                         check_range_stmt = def_arg_stmt[1 - i];
>                       else
> @@ -2144,17 +2160,44 @@ simplify_rotate (gimple_stmt_iterator *g
>         if (TREE_CODE (rotcnt) != SSA_NAME)
>           return false;
>         int_range_max r;
> -       if (!get_global_range_query ()->range_of_expr (r, rotcnt,
> -                                                      check_range_stmt))
> -         return false;
> +       range_query *q = get_range_query (cfun);
> +       if (q == get_global_range_query ())
> +         q = enable_ranger (cfun);
> +       if (!q->range_of_expr (r, rotcnt, check_range_stmt))
> +         {
> +           if (check_range > 0)
> +             return false;
> +           r.set_varying (TREE_TYPE (rotcnt));
> +         }
>         int prec = TYPE_PRECISION (TREE_TYPE (rotcnt));
>         signop sign = TYPE_SIGN (TREE_TYPE (rotcnt));
>         wide_int min = wide_int::from (TYPE_PRECISION (rtype), prec, sign);
>         wide_int max = wide_int::from (wider_prec - 1, prec, sign);
> -       int_range<2> r2 (TREE_TYPE (rotcnt), min, max);
> +       if (check_range < 0)
> +         max = min;
> +       int_range<1> r2 (TREE_TYPE (rotcnt), min, max);
>         r.intersect (r2);
>         if (!r.undefined_p ())
> -         return false;
> +         {
> +           if (check_range > 0)
> +             {
> +               int_range_max r3;
> +               for (int i = TYPE_PRECISION (rtype) + 1; i < wider_prec;
> +                    i += TYPE_PRECISION (rtype))
> +                 {
> +                   int j = i + TYPE_PRECISION (rtype) - 2;
> +                   min = wide_int::from (i, prec, sign);
> +                   max = wide_int::from (MIN (j, wider_prec - 1),
> +                                         prec, sign);
> +                   int_range<1> r4 (TREE_TYPE (rotcnt), min, max);
> +                   r3.union_ (r4);
> +                 }
> +               r.intersect (r3);
> +               if (!r.undefined_p ())
> +                 return false;
> +             }
> +           add_masking = true;
> +         }
>       }
>        if (rotcnt == NULL_TREE)
>       return false;
> @@ -2169,6 +2212,15 @@ simplify_rotate (gimple_stmt_iterator *g
>        gsi_insert_before (gsi, g, GSI_SAME_STMT);
>        rotcnt = gimple_assign_lhs (g);
>      }
> +  if (add_masking)
> +    {
> +      g = gimple_build_assign (make_ssa_name (TREE_TYPE (rotcnt)),
> +                            BIT_AND_EXPR, rotcnt,
> +                            build_int_cst (TREE_TYPE (rotcnt),
> +                                           TYPE_PRECISION (rtype) - 1));
> +      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +      rotcnt = gimple_assign_lhs (g);
> +    }
>    lhs = gimple_assign_lhs (stmt);
>    if (!useless_type_conversion_p (rtype, TREE_TYPE (def_arg1[0])))
>      lhs = make_ssa_name (TREE_TYPE (def_arg1[0]));
> @@ -3958,6 +4010,9 @@ pass_forwprop::execute (function *fun)
>    BITMAP_FREE (to_purge);
>    BITMAP_FREE (need_ab_cleanup);
>  
> +  if (get_range_query (cfun) != get_global_range_query ())
> +    disable_ranger (cfun);
> +
>    if (cfg_changed)
>      todoflags |= TODO_cleanup_cfg;
>  
> --- gcc/testsuite/c-c++-common/rotate-10.c.jj 2023-01-18 13:12:05.917425710 
> +0100
> +++ gcc/testsuite/c-c++-common/rotate-10.c    2023-01-18 13:11:37.869835522 
> +0100
> @@ -0,0 +1,53 @@
> +/* PR tree-optimization/108440 */
> +/* { dg-do compile { target { { ilp32 || lp64 } || llp64 } } } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times " r<< " 5 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " \\\& 7;" 4 "optimized" } } */
> +
> +unsigned char
> +foo (unsigned char x, unsigned int y)
> +{
> +  if (y > __CHAR_BIT__)
> +    __builtin_unreachable ();
> +  return (x << y) | (x >> ((-y) & (__CHAR_BIT__ - 1)));
> +}
> +
> +unsigned char
> +bar (unsigned char x, unsigned int y)
> +{
> +  if (y >= __CHAR_BIT__)
> +    __builtin_unreachable ();
> +  return (x << y) | (x >> ((-y) & (__CHAR_BIT__ - 1)));
> +}
> +
> +unsigned char
> +baz (unsigned char x, unsigned int y)
> +{
> +  if (y > __CHAR_BIT__ && y != 2 * __CHAR_BIT__)
> +    __builtin_unreachable ();
> +  return (x << y) | (x >> ((-y) & (__CHAR_BIT__ - 1)));
> +}
> +
> +unsigned char
> +qux (unsigned char x, unsigned int y)
> +{
> +  if (y > __CHAR_BIT__ && y != 2 * __CHAR_BIT__ && y != __CHAR_BIT__ + 2)
> +    __builtin_unreachable ();
> +  return (x << y) | (x >> ((-y) & (__CHAR_BIT__ - 1)));
> +}
> +
> +unsigned char
> +quux (unsigned char x, unsigned int y)
> +{
> +  if (y > __CHAR_BIT__)
> +    __builtin_unreachable ();
> +  return (x << y) | (x >> (__CHAR_BIT__ - y));
> +}
> +
> +unsigned char
> +corge (unsigned char x, unsigned int y)
> +{
> +  if (y >= __CHAR_BIT__)
> +    __builtin_unreachable ();
> +  return (x << y) | (x >> (__CHAR_BIT__ - y));
> +}
> --- gcc/testsuite/c-c++-common/rotate-11.c.jj 2023-01-18 13:14:07.146654356 
> +0100
> +++ gcc/testsuite/c-c++-common/rotate-11.c    2023-01-18 13:14:19.916467769 
> +0100
> @@ -0,0 +1,53 @@
> +/* PR tree-optimization/108440 */
> +/* { dg-do compile { target { { ilp32 || lp64 } || llp64 } } } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times " r<< " 5 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " \\\& 7;" 4 "optimized" } } */
> +
> +unsigned char
> +foo (unsigned char x, unsigned int y)
> +{
> +  if (y > __CHAR_BIT__)
> +    return 42;
> +  return (x << y) | (x >> ((-y) & (__CHAR_BIT__ - 1)));
> +}
> +
> +unsigned char
> +bar (unsigned char x, unsigned int y)
> +{
> +  if (y >= __CHAR_BIT__)
> +    return 42;
> +  return (x << y) | (x >> ((-y) & (__CHAR_BIT__ - 1)));
> +}
> +
> +unsigned char
> +baz (unsigned char x, unsigned int y)
> +{
> +  if (y > __CHAR_BIT__ && y != 2 * __CHAR_BIT__)
> +    return 42;
> +  return (x << y) | (x >> ((-y) & (__CHAR_BIT__ - 1)));
> +}
> +
> +unsigned char
> +qux (unsigned char x, unsigned int y)
> +{
> +  if (y > __CHAR_BIT__ && y != 2 * __CHAR_BIT__ && y != __CHAR_BIT__ + 2)
> +    return 42;
> +  return (x << y) | (x >> ((-y) & (__CHAR_BIT__ - 1)));
> +}
> +
> +unsigned char
> +quux (unsigned char x, unsigned int y)
> +{
> +  if (y > __CHAR_BIT__)
> +    return 42;
> +  return (x << y) | (x >> (__CHAR_BIT__ - y));
> +}
> +
> +unsigned char
> +corge (unsigned char x, unsigned int y)
> +{
> +  if (y >= __CHAR_BIT__)
> +    return 42;
> +  return (x << y) | (x >> (__CHAR_BIT__ - y));
> +}
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

Reply via email to