RE: [PATCH]middle-end convert negate + right shift into compare greater.

Richard Biener via Gcc-patches Thu, 04 Nov 2021 06:06:21 -0700

On Wed, 3 Nov 2021, Tamar Christina wrote:

> Hi,
> 
> I have addressed all the feedback and updated patch attached:
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> x86_64-pc-linux-gnu and no regressions.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       * match.pd: New negate+shift pattern.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.dg/signbit-2.c: New test.
>       * gcc.dg/signbit-3.c: New test.
>       * gcc.dg/signbit-4.c: New test.
>       * gcc.dg/signbit-5.c: New test.
>       * gcc.dg/signbit-6.c: New test.
>       * gcc.target/aarch64/signbit-1.c: New test.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 
> 65a6591f75c03333602147bbdf6d59f9ccd4b1e5..fe93500d22e2388889c8c9faf4c58cee95dec7f9
>  100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -38,7 +38,8 @@ along with GCC; see the file COPYING3.  If not see
>     uniform_integer_cst_p
>     HONOR_NANS
>     uniform_vector_p
> -   bitmask_inv_cst_vector_p)
> +   bitmask_inv_cst_vector_p
> +   expand_vec_cmp_expr_p)
>  
>  /* Operator lists.  */
>  (define_operator_list tcc_comparison
> @@ -832,6 +833,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>      { tree utype = unsigned_type_for (type); }
>      (convert (rshift (lshift (convert:utype @0) @2) @3))))))
>  
> +/* Fold (-x >> C) into -(x > 0) where C = precision(type) - 1.  */
> +(for cst (INTEGER_CST VECTOR_CST)
> + (simplify
> +  (rshift (negate:s @0) cst@1)
> +   (if (!TYPE_UNSIGNED (type)
> +        && TYPE_OVERFLOW_UNDEFINED (type))
> +    (with { tree stype = TREE_TYPE (@1);
> +         tree bt = truth_type_for (type);
> +         tree zeros = build_zero_cst (type); }
> +     (switch
> +      /* Handle scalar case.  */
> +      (if (INTEGRAL_TYPE_P (type)
> +        /* If we apply the rule to the scalar type before vectorization
> +           we will enforce the result of the comparison being a bool
> +           which will require an extra AND on the result that will be
> +           indistinguishable from when the user did actually want 0
> +           or 1 as the result so it can't be removed.  */
> +        && canonicalize_math_after_vectorization_p ()
> +        && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (type) - 1))
> +       (negate (convert (gt @0 { zeros; }))))
> +      /* Handle vector case.  */
> +      (if (VECTOR_INTEGER_TYPE_P (type)
> +        /* First check whether the target has the same mode for vector
> +           comparison results as it's operands do.  */
> +        && TYPE_MODE (bt) == TYPE_MODE (type)
> +        /* Then check to see if the target is able to expand the comparison
> +           with the given type later on, otherwise we may ICE.  */
> +        && expand_vec_cmp_expr_p (type, bt, { GT_EXPR }))


No need to wrap GT_EXPR in { }

> +       (with { tree cst = uniform_integer_cst_p (@1); }

if you declare 'cst' above where you declare 'bt' you can do

          && (cst = uniform_integer_cst_p (@1)))

combining it with the if above, and the one below, simplifying indents
and flow.

OK with that change.

I guess it might happen that the scalar transform expands badly
on some targets?  Please have an eye on problems that come up.

Thanks,
Richard.

> +     (if (cst && wi::eq_p (wi::to_wide (cst), element_precision (type) - 1))
> +      (view_convert (gt:bt @0 { zeros; }))))))))))
> +
>  /* Fold (C1/X)*C2 into (C1*C2)/X.  */
>  (simplify
>   (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
> diff --git a/gcc/testsuite/gcc.dg/signbit-2.c 
> b/gcc/testsuite/gcc.dg/signbit-2.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..fc0157cbc5c7996b481f2998bc30176c96a669bb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/signbit-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
> +
> +#include <stdint.h>
> +
> +void fun1(int32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (-x[i]) >> 31;
> +}
> +
> +void fun2(int32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (-x[i]) >> 30;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } 
> } */
> +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
> diff --git a/gcc/testsuite/gcc.dg/signbit-3.c 
> b/gcc/testsuite/gcc.dg/signbit-3.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..19e9c06c349b3287610f817628f00938ece60bf7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/signbit-3.c
> @@ -0,0 +1,13 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */
> +
> +#include <stdint.h>
> +
> +void fun1(int32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (-x[i]) >> 31;
> +}
> +
> +/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */
> +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
> diff --git a/gcc/testsuite/gcc.dg/signbit-4.c 
> b/gcc/testsuite/gcc.dg/signbit-4.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..bc459ba60a760bdf49e94dbec762f378c24fe9b5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/signbit-4.c
> @@ -0,0 +1,65 @@
> +/* { dg-do run } */
> +/* { dg-options "-O1 -fwrapv" } */
> +
> +#include <stdint.h>
> +#include <limits.h>
> +#include <stdio.h>
> +
> +#ifndef N
> +#define N 65
> +#endif
> +
> +#ifndef TYPE
> +#define TYPE int32_t
> +#endif
> +
> +#ifndef DEBUG
> +#define DEBUG 1
> +#endif
> +
> +#define BASE ((TYPE) -1 < 0 ? -126 : 4)
> +
> +__attribute__ ((noinline, noipa))
> +void fun1(TYPE *x, int n)
> +{
> +    for (int i = 0; i < n; i++)
> +      x[i] = (-x[i]) >> 31;
> +}
> +
> +__attribute__ ((noinline, noipa, optimize("O0")))
> +void fun2(TYPE *x, int n)
> +{
> +    for (int i = 0; i < n; i++)
> +      x[i] = (-x[i]) >> 31;
> +}
> +
> +int main ()
> +{
> +  TYPE a[N];
> +  TYPE b[N];
> +
> +  a[0] = INT_MIN;
> +  b[0] = INT_MIN;
> +
> +  for (int i = 1; i < N; ++i)
> +    {
> +      a[i] = BASE + i * 13;
> +      b[i] = BASE + i * 13;
> +      if (DEBUG)
> +        printf ("%d: 0x%x\n", i, a[i]);
> +    }
> +
> +  fun1 (a, N);
> +  fun2 (b, N);
> +
> +  for (int i = 0; i < N; ++i)
> +    {
> +      if (DEBUG)
> +        printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
> +
> +      if (a[i] != b[i])
> +        __builtin_abort ();
> +    }
> +  return 0;
> +}
> +
> diff --git a/gcc/testsuite/gcc.dg/signbit-5.c 
> b/gcc/testsuite/gcc.dg/signbit-5.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..22a92704773e3282759524b74d35196a477d43dd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/signbit-5.c
> @@ -0,0 +1,65 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3" } */
> +
> +#include <stdint.h>
> +#include <limits.h>
> +#include <stdio.h>
> +
> +#ifndef N
> +#define N 65
> +#endif
> +
> +#ifndef TYPE
> +#define TYPE int32_t
> +#endif
> +
> +#ifndef DEBUG
> +#define DEBUG 1
> +#endif
> +
> +#define BASE ((TYPE) -1 < 0 ? -126 : 4)
> +
> +__attribute__ ((noinline, noipa))
> +void fun1(TYPE *x, int n)
> +{
> +    for (int i = 0; i < n; i++)
> +      x[i] = (-x[i]) >> 31;
> +}
> +
> +__attribute__ ((noinline, noipa, optimize("O1")))
> +void fun2(TYPE *x, int n)
> +{
> +    for (int i = 0; i < n; i++)
> +      x[i] = (-x[i]) >> 31;
> +}
> +
> +int main ()
> +{
> +  TYPE a[N];
> +  TYPE b[N];
> +
> +  a[0] = INT_MIN;
> +  b[0] = INT_MIN;
> +
> +  for (int i = 1; i < N; ++i)
> +    {
> +      a[i] = BASE + i * 13;
> +      b[i] = BASE + i * 13;
> +      if (DEBUG)
> +        printf ("%d: 0x%x\n", i, a[i]);
> +    }
> +
> +  fun1 (a, N);
> +  fun2 (b, N);
> +
> +  for (int i = 0; i < N; ++i)
> +    {
> +      if (DEBUG)
> +        printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
> +
> +      if (a[i] != b[i])
> +        __builtin_abort ();
> +    }
> +  return 0;
> +}
> +
> diff --git a/gcc/testsuite/gcc.dg/signbit-6.c 
> b/gcc/testsuite/gcc.dg/signbit-6.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..da186624cfa057dfc3780c8af4f6b1335ba07e7e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/signbit-6.c
> @@ -0,0 +1,72 @@
> +/* { dg-do run } */
> +/* { dg-options "-O1" } */
> +
> +#include <stdint.h>
> +#include <limits.h>
> +#include <stdio.h>
> +
> +#ifndef N
> +#define N 65
> +#endif
> +
> +#ifndef TYPE
> +#define TYPE int32_t
> +#endif
> +
> +#ifndef DEBUG
> +#define DEBUG 1
> +#endif
> +
> +#define BASE ((TYPE) -1 < 0 ? -126 : 4)
> +
> +__attribute__ ((noinline, noipa))
> +void fun1(TYPE *x, int n)
> +{
> +    for (int i = 0; i < n; i++)
> +      x[i] = (-x[i]) >> 31;
> +}
> +
> +__attribute__ ((noinline, noipa, optimize("O0")))
> +void fun2(TYPE *x, int n)
> +{
> +    for (int i = 0; i < n; i++)
> +      x[i] = (-x[i]) >> 31;
> +}
> +
> +int main ()
> +{
> +  TYPE a[N];
> +  TYPE b[N];
> +
> +  a[0] = INT_MIN;
> +  b[0] = INT_MIN;
> +
> +  for (int i = 1; i < N; ++i)
> +    {
> +      a[i] = BASE + i * 13;
> +      b[i] = BASE + i * 13;
> +      if (DEBUG)
> +        printf ("%d: 0x%x\n", i, a[i]);
> +    }
> +
> +  fun1 (a, N);
> +  fun2 (b, N);
> +
> +  if (DEBUG)
> +    printf ("%d = 0x%x == 0x%x\n", 0, a[0], b[0]);
> +
> +  if (a[0] != 0x0 || b[0] != -1)
> +        __builtin_abort ();
> +
> +
> +  for (int i = 1; i < N; ++i)
> +    {
> +      if (DEBUG)
> +        printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
> +
> +      if (a[i] != b[i])
> +        __builtin_abort ();
> +    }
> +  return 0;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c 
> b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..3ebfb0586f37de29cf58635b27fe48503714447e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O3 --save-temps" } */
> +
> +#include <stdint.h>
> +
> +void fun1(int32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (-x[i]) >> 31;
> +}
> +
> +void fun2(int32_t *x, int n)
> +{
> +    for (int i = 0; i < (n & -16); i++)
> +      x[i] = (-x[i]) >> 30;
> +}
> +
> +/* { dg-final { scan-assembler-times {\tcmgt\t} 1 } } */
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Ivo Totev; HRB 36809 (AG Nuernberg)

RE: [PATCH]middle-end convert negate + right shift into compare greater.

Reply via email to