On September 10, 2019 9:45:28 AM GMT+02:00, Jakub Jelinek <ja...@redhat.com> 
wrote:
>Hi!
>
>The following patch optimizes (A / (cast) (1 << B)) -> (A >> B)
>in addition to already supported (A / (1 << B)) -> (A >> B).
>
>The patch only supports same precision cast (i.e. a sign change),
>or widening cast, in that case either zero extension (then the extended
>value is known to be a power of two and all is fine), or sign extension
>if the first operand has all upper bits starting with the sign bit of
>the
>narrower type clear.  That is because (unsigned long long) (1 << 31)
>is 0xffffffff80000000ULL and we need to make sure that dividing by that
>huge value is equal to >> 31.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok. 

Richard.

>2019-09-10  Jakub Jelinek  <ja...@redhat.com>
>
>       PR middle-end/91680
>       * match.pd ((A / (1 << B)) -> (A >> B)): Allow widening cast from
>       the shift type to type.
>
>       * gcc.dg/tree-ssa/pr91680.c: New test.
>       * g++.dg/torture/pr91680.C: New test.
>
>--- gcc/match.pd.jj    2019-09-09 17:33:08.551582878 +0200
>+++ gcc/match.pd       2019-09-09 20:05:25.966107441 +0200
>@@ -305,13 +305,29 @@ (define_operator_list COND_TERNARY
> /* (A / (1 << B)) -> (A >> B).
>   Only for unsigned A.  For signed A, this would not preserve rounding
>    toward zero.
>-   For example: (-1 / ( 1 << B)) !=  -1 >> B.  */
>+   For example: (-1 / ( 1 << B)) !=  -1 >> B.
>+   Also also widening conversions, like:
>+   (A / (unsigned long long) (1U << B)) -> (A >> B)
>+   or
>+   (A / (unsigned long long) (1 << B)) -> (A >> B).
>+   If the left shift is signed, it can be done only if the upper bits
>+   of A starting from shift's type sign bit are zero, as
>+   (unsigned long long) (1 << 31) is -2147483648ULL, not
>2147483648ULL,
>+   so it is valid only if A >> 31 is zero.  */
> (simplify
>- (trunc_div @0 (lshift integer_onep@1 @2))
>+ (trunc_div @0 (convert? (lshift integer_onep@1 @2)))
>  (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
>       && (!VECTOR_TYPE_P (type)
>         || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
>-        || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)))
>+        || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar))
>+      && (useless_type_conversion_p (type, TREE_TYPE (@1))
>+        || (element_precision (type) >= element_precision (TREE_TYPE (@1))
>+            && (TYPE_UNSIGNED (TREE_TYPE (@1))
>+                || (element_precision (type)
>+                    == element_precision (TREE_TYPE (@1)))
>+                || (get_nonzero_bits (@0)
>+                    & wi::mask (element_precision (TREE_TYPE (@1)) - 1, true,
>+                                element_precision (type))) == 0))))
>   (rshift @0 @2)))
> 
> /* Preserve explicit divisions by 0: the C++ front-end wants to detect
>--- gcc/testsuite/gcc.dg/tree-ssa/pr91680.c.jj 2019-09-09
>20:18:04.349619827 +0200
>+++ gcc/testsuite/gcc.dg/tree-ssa/pr91680.c    2019-09-09
>20:18:33.922171856 +0200
>@@ -0,0 +1,37 @@
>+/* PR middle-end/91680 */
>+/* { dg-do compile { target { ilp32 || lp64 } } } */
>+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
>+/* { dg-final { scan-tree-dump-times " / " 1 "forwprop1" } } */
>+/* { dg-final { scan-tree-dump-times " >> " 3 "forwprop1" } } */
>+
>+__attribute__((noipa)) unsigned long long
>+foo (unsigned char x)
>+{
>+  unsigned long long q = 1 << x;
>+  return 256 / q;
>+}
>+
>+__attribute__((noipa)) unsigned long long
>+bar (unsigned char x)
>+{
>+  unsigned long long q = 1U << x;
>+  return 256 / q;
>+}
>+
>+__attribute__((noipa)) unsigned long long
>+baz (unsigned char x, unsigned long long y)
>+{
>+  /* This can't be optimized, at least not in C++ and maybe not
>+     in C89, because for x 31 q is -2147483648ULL, not
>+     2147483648ULL, and e.g. 2147483648ULL >> 31 is 1, while
>+     2147483648ULL / -2147483648ULL is 0.  */
>+  unsigned long long q = 1 << x;
>+  return y / q;
>+}
>+
>+__attribute__((noipa)) unsigned long long
>+qux (unsigned char x, unsigned long long y)
>+{
>+  unsigned long long q = 1U << x;
>+  return y / q;
>+}
>--- gcc/testsuite/g++.dg/torture/pr91680.C.jj  2019-09-09
>20:25:51.775539166 +0200
>+++ gcc/testsuite/g++.dg/torture/pr91680.C     2019-09-09
>20:26:18.610132667 +0200
>@@ -0,0 +1,35 @@
>+/* PR middle-end/91680 */
>+/* { dg-do run { target { ilp32 || lp64 } } } */
>+
>+extern "C" void abort ();
>+
>+#include "../../gcc.dg/tree-ssa/pr91680.c"
>+
>+int
>+main ()
>+{
>+  unsigned char i;
>+  for (i = 0; i < __SIZEOF_INT__ * __CHAR_BIT__; i++)
>+    {
>+      volatile unsigned long long q = 1 << i;
>+      if (foo (i) != 256 / q)
>+      abort ();
>+      q = 1U << i;
>+      if (bar (i) != 256 / q)
>+      abort ();
>+      q = 1 << i;
>+      if (baz (i, (1U << i) - 1) != ((1U << i) - 1) / q)
>+      abort ();
>+      if (baz (i, 1U << i) != (1U << i) / q)
>+      abort ();
>+      if (baz (i, -1) != -1 / q)
>+      abort ();
>+      q = 1U << i;
>+      if (qux (i, (1U << i) - 1) != ((1U << i) - 1) / q)
>+      abort ();
>+      if (qux (i, 1U << i) != (1U << i) / q)
>+      abort ();
>+      if (qux (i, -1) != -1 / q)
>+      abort ();
>+    }
>+}
>
>       Jakub

Reply via email to