Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)

2017-06-22 Thread James Greenhalgh

On Wed, Jun 21, 2017 at 08:59:29AM +0200, Richard Biener wrote:
> Actually I was looking for a bit more generic
>
> bool
> target_supports_op_p (tree type, enum tree_code code,
> enum optab_subtype = optab_default)
> {
>   optab ot = optab_for_tree_code (code, type, optab_subtype);
>   return (ot != unknown_optab
>   && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing);
> }
>
> and you using target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)
> || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
>
> Ok with that change.

Oh, I see what you mean. Sorry for not getting that correct last time round.

This is what I committed to trunk as revision 249502 after bootstrapping and
testing on aarch64-none-linux-gnu without any issues.

Thanks,
James

gcc/

2017-06-22  James Greenhalgh  

* match.pd (A / (1 << B) -> A >> B): New.
* generic-match-head.c: Include optabs-tree.h.
* gimple-match-head.c: Likewise.
* optabs-tree.h (target_supports_op_p): New.
* optabs-tree.c (target_supports_op_p): New.

gcc/testsuite/

2017-06-22  James Greenhalgh  

* gcc.dg/tree-ssa/forwprop-37.c: New.

diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c
index 0c0d182..4504401 100644
--- a/gcc/generic-match-head.c
+++ b/gcc/generic-match-head.c
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Routine to determine if the types T1 and T2 are effectively
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index e7e9839..5f6aa27 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Forward declarations of the private auto-generated matchers.
diff --git a/gcc/match.pd b/gcc/match.pd
index 244e9eb..862372a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -147,6 +147,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (op @0 integer_onep)
 (non_lvalue @0)))
 
+/* (A / (1 << B)) -> (A >> B).
+   Only for unsigned A.  For signed A, this would not preserve rounding
+   toward zero.
+   For example: (-1 / ( 1 << B)) !=  -1 >> B.  */
+(simplify
+ (trunc_div @0 (lshift integer_onep@1 @2))
+ (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
+  && (!VECTOR_TYPE_P (type)
+	  || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
+	  || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)))
+  (rshift @0 @2)))
+
 /* Preserve explicit divisions by 0: the C++ front-end wants to detect
undefined behavior in constexpr evaluation, and assuming that the division
traps enables better optimizations than these anyway.  */
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
index 4bb54ba..c183b14 100644
--- a/gcc/optabs-tree.c
+++ b/gcc/optabs-tree.c
@@ -376,3 +376,18 @@ init_tree_optimization_optabs (tree optnode)
   ggc_free (tmp_optabs);
 }
 }
+
+/* Return TRUE if the target has support for vector right shift of an
+   operand of type TYPE.  If OT_TYPE is OPTAB_DEFAULT, check for existence
+   of a shift by either a scalar or a vector.  Otherwise, check only
+   for a shift that matches OT_TYPE.  */
+
+bool
+target_supports_op_p (tree type, enum tree_code code,
+		  enum optab_subtype ot_subtype)
+{
+  optab ot = optab_for_tree_code (code, type, ot_subtype);
+  return (ot != unknown_optab
+	  && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing);
+}
+
diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h
index d0b27e0..52e842b 100644
--- a/gcc/optabs-tree.h
+++ b/gcc/optabs-tree.h
@@ -41,5 +41,7 @@ bool supportable_convert_operation (enum tree_code, tree, tree, tree *,
 bool expand_vec_cmp_expr_p (tree, tree, enum tree_code);
 bool expand_vec_cond_expr_p (tree, tree, enum tree_code);
 void init_tree_optimization_optabs (tree);
+bool target_supports_op_p (tree, enum tree_code,
+			   enum optab_subtype = optab_default);
 
 #endif
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
new file mode 100644
index 000..dec826c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop1-raw" } */
+
+unsigned int
+f1 (unsigned int a, unsigned int b)
+{
+  unsigned int x = 1U << b;
+  return a / x;
+}
+
+unsigned long
+f2 (unsigned long a, int b)
+{
+  unsigned long x = 1UL << b;
+  return a / x;
+}
+
+unsigned long long
+f3 (unsigned long long a, int b)
+{
+  unsigned long long x = 1ULL << b;
+  return a / x;
+}
+
+/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */


Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)

2017-06-21 Thread Richard Biener
On Tue, 20 Jun 2017, James Greenhalgh wrote:

> 
> On Fri, Jun 16, 2017 at 11:41:57AM +0200, Richard Biener wrote:
> > On Fri, 16 Jun 2017, James Greenhalgh wrote:
> > > On Mon, Jun 12, 2017 at 03:56:25PM +0200, Richard Biener wrote:
> > > > +   We can't do the same for signed A, as it might be negative, which
> > > > would
> > > > +   introduce undefined behaviour.  */
> > > >
> > > > huh, AFAIR it is _left_ shift of negative values that invokes
> > > > undefined behavior.
> > >
> > > You're right this is not a clear comment. The problem is not undefined
> > > behaviour, so that text needs to go, but rounding towards/away from zero
> > > for signed negative values. Division will round towards zero, arithmetic
> > > right shift away from zero. For example in:
> > >
> > > -1 / (1 << 1)   !=-1 >> 1
> > >   = -1 / 2
> > >   = 0 = -1
> > >
> > > I've rewritten the comment to make it clear this is why we can only make
> > > this optimisation for unsigned values.
> >
> > Ah, of course.  You could use
> >
> >  if ((TYPE_UNSIGNED (type)
> >   || tree_expr_nonnegative_p (@0))
> >
> > here as improvement.
> 
> Thanks, I've made that change.
> 
> > > See, for example, gcc.c-torture/execute/pr34070-2.c
> > >
> > > > Note that as you are accepting vectors you need to make sure the
> > > > target actually supports arithmetic right shift of vectors
> > > > (you only know it supports left shift and division -- so it might
> > > > be sort-of-superfluous to check in case there is no arch that supports
> > > > those but not the other).
> > >
> > > I've added a check for that using optabs, is that the right way to do 
> > > this?
> >
> > +  && (!VECTOR_TYPE_P (type)
> > +  || optab_for_tree_code (RSHIFT_EXPR, type, optab_vector)
> > +  || optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar)))
> >
> > is not enough -- you need sth like
> >
> >  optab ot = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
> >  if (ot != unknown_optab
> >  && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing)
> >.. ok! ...
> >
> > ideally we'd have a helper for this in optab-tree.[ch],
> > tree-vect-patterns.c could also make use of that.
> 
> OK. I've added "target_has_vector_rshift_p" for this purpose.

Actually I was looking for a bit more generic

bool
target_supports_op_p (tree type, enum tree_code code,
  enum optab_subtype = optab_default)
{
  optab ot = optab_for_tree_code (code, type, optab_subtype);
  return (ot != unknown_optab
  && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing);
}

and you using target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)
|| target_supports_op_p (type, RSHIFT_EXPR, optab_vector)

Ok with that change.

Thanks,
Richard.

> Bootstrapped and tested on aarch64-none-linux-gnu with no issues.
> 
> OK?
> 
> Thanks,
> James
> 
> ---
> gcc/
> 
> 2017-06-19  James Greenhalgh  
> 
>   * match.pd (A / (1 << B) -> A >> B): New.
>   * generic-match-head.c: Include optabs-tree.h.
>   * gimple-match-head.c: Likewise.
>   * optabs-tree.h (target_has_vector_rshift_p): New.
>   * optabs-tree.c (target_has_vector_rshift_p): New.
> 
> gcc/testsuite/
> 
> 2017-06-19  James Greenhalgh  
> 
>   * gcc.dg/tree-ssa/forwprop-37.c: New.
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)

2017-06-20 Thread James Greenhalgh

On Fri, Jun 16, 2017 at 11:41:57AM +0200, Richard Biener wrote:
> On Fri, 16 Jun 2017, James Greenhalgh wrote:
> > On Mon, Jun 12, 2017 at 03:56:25PM +0200, Richard Biener wrote:
> > > +   We can't do the same for signed A, as it might be negative, which
> > > would
> > > +   introduce undefined behaviour.  */
> > >
> > > huh, AFAIR it is _left_ shift of negative values that invokes
> > > undefined behavior.
> >
> > You're right this is not a clear comment. The problem is not undefined
> > behaviour, so that text needs to go, but rounding towards/away from zero
> > for signed negative values. Division will round towards zero, arithmetic
> > right shift away from zero. For example in:
> >
> > -1 / (1 << 1)   !=-1 >> 1
> >   = -1 / 2
> >   = 0 = -1
> >
> > I've rewritten the comment to make it clear this is why we can only make
> > this optimisation for unsigned values.
>
> Ah, of course.  You could use
>
>  if ((TYPE_UNSIGNED (type)
>   || tree_expr_nonnegative_p (@0))
>
> here as improvement.

Thanks, I've made that change.

> > See, for example, gcc.c-torture/execute/pr34070-2.c
> >
> > > Note that as you are accepting vectors you need to make sure the
> > > target actually supports arithmetic right shift of vectors
> > > (you only know it supports left shift and division -- so it might
> > > be sort-of-superfluous to check in case there is no arch that supports
> > > those but not the other).
> >
> > I've added a check for that using optabs, is that the right way to do this?
>
> +  && (!VECTOR_TYPE_P (type)
> +  || optab_for_tree_code (RSHIFT_EXPR, type, optab_vector)
> +  || optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar)))
>
> is not enough -- you need sth like
>
>  optab ot = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
>  if (ot != unknown_optab
>  && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing)
>.. ok! ...
>
> ideally we'd have a helper for this in optab-tree.[ch],
> tree-vect-patterns.c could also make use of that.

OK. I've added "target_has_vector_rshift_p" for this purpose.

Bootstrapped and tested on aarch64-none-linux-gnu with no issues.

OK?

Thanks,
James

---
gcc/

2017-06-19  James Greenhalgh  

* match.pd (A / (1 << B) -> A >> B): New.
* generic-match-head.c: Include optabs-tree.h.
* gimple-match-head.c: Likewise.
* optabs-tree.h (target_has_vector_rshift_p): New.
* optabs-tree.c (target_has_vector_rshift_p): New.

gcc/testsuite/

2017-06-19  James Greenhalgh  

* gcc.dg/tree-ssa/forwprop-37.c: New.

diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c
index 0c0d182..4504401 100644
--- a/gcc/generic-match-head.c
+++ b/gcc/generic-match-head.c
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Routine to determine if the types T1 and T2 are effectively
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index e7e9839..5f6aa27 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Forward declarations of the private auto-generated matchers.
diff --git a/gcc/match.pd b/gcc/match.pd
index 244e9eb..eb6bd59 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -147,6 +147,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (op @0 integer_onep)
 (non_lvalue @0)))
 
+/* (A / (1 << B)) -> (A >> B).
+   Only for unsigned A.  For signed A, this would not preserve rounding
+   toward zero.
+   For example: (-1 / ( 1 << B)) !=  -1 >> B.  */
+(simplify
+ (trunc_div @0 (lshift integer_onep@1 @2))
+ (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
+  && (!VECTOR_TYPE_P (type)
+	  || target_has_vector_rshift_p (type, optab_default)))
+  (rshift @0 @2)))
+
 /* Preserve explicit divisions by 0: the C++ front-end wants to detect
undefined behavior in constexpr evaluation, and assuming that the division
traps enables better optimizations than these anyway.  */
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
index 4bb54ba..4a513d2 100644
--- a/gcc/optabs-tree.c
+++ b/gcc/optabs-tree.c
@@ -376,3 +376,24 @@ init_tree_optimization_optabs (tree optnode)
   ggc_free (tmp_optabs);
 }
 }
+
+/* Return TRUE if the target has support for vector right shift of an
+   operand of type TYPE.  If OT_TYPE is OPTAB_DEFAULT, check for existence
+   of a shift by either a scalar or a vector.  Otherwise, check only
+   for a shift that matches OT_TYPE.  */
+
+bool
+target_has_vector_rshift_p (tree type, enum optab_subtype ot_type)
+{
+  gcc_assert (VECTOR_TYPE_P (type));
+  if (ot_type != optab_default)
+{
+  optab ot = optab_for_tree_code (RSHIFT_EXPR, type, 

Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)

2017-06-16 Thread Richard Biener
On Fri, 16 Jun 2017, James Greenhalgh wrote:

> 
> On Mon, Jun 12, 2017 at 03:56:25PM +0200, Richard Biener wrote:
> > On Mon, 12 Jun 2017, James Greenhalgh wrote:
> >
> > >
> > > Hi,
> > >
> > > As subject, for the testcase in the patch:
> > >
> > >   unsigned long
> > >   f2 (unsigned long a, int b)
> > >   {
> > > unsigned long x = 1UL << b;
> > > return a / x;
> > >   }
> > >
> > > We currently generate:
> > >
> > >   f2:
> > >   mov x2, 1
> > >   lsl x1, x2, x1
> > >   udivx0, x0, x1
> > >   ret
> > >
> > > Which could instead be transformed to:
> > >
> > >   f2:
> > >   lsr x0, x0, x1
> > >   ret
> > >
> > > OK?
> >
> > +   We can't do the same for signed A, as it might be negative, which
> > would
> > +   introduce undefined behaviour.  */
> >
> > huh, AFAIR it is _left_ shift of negative values that invokes
> > undefined behavior.
> 
> You're right this is not a clear comment. The problem is not undefined
> behaviour, so that text needs to go, but rounding towards/away from zero
> for signed negative values. Division will round towards zero, arithmetic
> right shift away from zero. For example in:
> 
> -1 / (1 << 1)   !=-1 >> 1
>   = -1 / 2
>   = 0 = -1
> 
> I've rewritten the comment to make it clear this is why we can only make
> this optimisation for unsigned values.

Ah, of course.  You could use

 if ((TYPE_UNSIGNED (type)
  || tree_expr_nonnegative_p (@0))

here as improvement.

> See, for example, gcc.c-torture/execute/pr34070-2.c
> 
> > Note that as you are accepting vectors you need to make sure the
> > target actually supports arithmetic right shift of vectors
> > (you only know it supports left shift and division -- so it might
> > be sort-of-superfluous to check in case there is no arch that supports
> > those but not the other).
> 
> I've added a check for that using optabs, is that the right way to do this?

+  && (!VECTOR_TYPE_P (type)
+  || optab_for_tree_code (RSHIFT_EXPR, type, optab_vector)
+  || optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar)))

is not enough -- you need sth like

 optab ot = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
 if (ot != unknown_optab
 && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing)
   .. ok! ...

ideally we'd have a helper for this in optab-tree.[ch], 
tree-vect-patterns.c could also make use of that.

Thanks,
Richard.


> Bootstrapped and tested on aarch64-none-linux-gnu with no issues.
> 
> OK?
> 
> Thanks,
> James
> 
> ---
> gcc/
> 
> 2017-06-13  James Greenhalgh  
> 
>   * match.pd (A / (1 << B) -> A >> B): New.
>   * generic-match-head.c: Include optabs-tree.h.
>   * gimple-match-head.c: Likewise.
> 
> gcc/testsuite/
> 
> 2017-06-13  James Greenhalgh  
> 
>   * gcc.dg/tree-ssa/forwprop-37.c: New.
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)

2017-06-16 Thread James Greenhalgh

On Mon, Jun 12, 2017 at 03:56:25PM +0200, Richard Biener wrote:
> On Mon, 12 Jun 2017, James Greenhalgh wrote:
>
> >
> > Hi,
> >
> > As subject, for the testcase in the patch:
> >
> >   unsigned long
> >   f2 (unsigned long a, int b)
> >   {
> > unsigned long x = 1UL << b;
> > return a / x;
> >   }
> >
> > We currently generate:
> >
> >   f2:
> > mov x2, 1
> > lsl x1, x2, x1
> > udivx0, x0, x1
> > ret
> >
> > Which could instead be transformed to:
> >
> >   f2:
> > lsr x0, x0, x1
> > ret
> >
> > OK?
>
> +   We can't do the same for signed A, as it might be negative, which
> would
> +   introduce undefined behaviour.  */
>
> huh, AFAIR it is _left_ shift of negative values that invokes
> undefined behavior.

You're right this is not a clear comment. The problem is not undefined
behaviour, so that text needs to go, but rounding towards/away from zero
for signed negative values. Division will round towards zero, arithmetic
right shift away from zero. For example in:

-1 / (1 << 1)   !=-1 >> 1
  = -1 / 2
  = 0 = -1

I've rewritten the comment to make it clear this is why we can only make
this optimisation for unsigned values.

See, for example, gcc.c-torture/execute/pr34070-2.c

> Note that as you are accepting vectors you need to make sure the
> target actually supports arithmetic right shift of vectors
> (you only know it supports left shift and division -- so it might
> be sort-of-superfluous to check in case there is no arch that supports
> those but not the other).

I've added a check for that using optabs, is that the right way to do this?

Bootstrapped and tested on aarch64-none-linux-gnu with no issues.

OK?

Thanks,
James

---
gcc/

2017-06-13  James Greenhalgh  

* match.pd (A / (1 << B) -> A >> B): New.
* generic-match-head.c: Include optabs-tree.h.
* gimple-match-head.c: Likewise.

gcc/testsuite/

2017-06-13  James Greenhalgh  

* gcc.dg/tree-ssa/forwprop-37.c: New.

diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c
index 0c0d182..4504401 100644
--- a/gcc/generic-match-head.c
+++ b/gcc/generic-match-head.c
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Routine to determine if the types T1 and T2 are effectively
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index e7e9839..5f6aa27 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Forward declarations of the private auto-generated matchers.
diff --git a/gcc/match.pd b/gcc/match.pd
index 244e9eb..2bea268 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -147,6 +147,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (op @0 integer_onep)
 (non_lvalue @0)))
 
+/* (A / (1 << B)) -> (A >> B).
+   Only for unsigned A.  For signed A, this would not preserve rounding
+   toward zero.
+   For example: (-1 / ( 1 << B)) !=  -1 >> B.  */
+(simplify
+ (trunc_div @0 (lshift integer_onep@1 @2))
+ (if (TYPE_UNSIGNED (type)
+  && (!VECTOR_TYPE_P (type)
+  || optab_for_tree_code (RSHIFT_EXPR, type, optab_vector)
+  || optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar)))
+  (rshift @0 @2)))
+
 /* Preserve explicit divisions by 0: the C++ front-end wants to detect
undefined behavior in constexpr evaluation, and assuming that the division
traps enables better optimizations than these anyway.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
new file mode 100644
index 000..dec826c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop1-raw" } */
+
+unsigned int
+f1 (unsigned int a, unsigned int b)
+{
+  unsigned int x = 1U << b;
+  return a / x;
+}
+
+unsigned long
+f2 (unsigned long a, int b)
+{
+  unsigned long x = 1UL << b;
+  return a / x;
+}
+
+unsigned long long
+f3 (unsigned long long a, int b)
+{
+  unsigned long long x = 1ULL << b;
+  return a / x;
+}
+
+/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */


Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)

2017-06-12 Thread Richard Biener
On Mon, 12 Jun 2017, James Greenhalgh wrote:

> 
> Hi,
> 
> As subject, for the testcase in the patch:
> 
>   unsigned long
>   f2 (unsigned long a, int b)
>   {
> unsigned long x = 1UL << b;
> return a / x;
>   }
> 
> We currently generate:
> 
>   f2:
>   mov x2, 1
>   lsl x1, x2, x1
>   udivx0, x0, x1
>   ret
> 
> Which could instead be transformed to:
> 
>   f2:
>   lsr x0, x0, x1
>   ret
> 
> OK?

+   We can't do the same for signed A, as it might be negative, which 
would
+   introduce undefined behaviour.  */

huh, AFAIR it is _left_ shift of negative values that invokes
undefined behavior.

Note that as you are accepting vectors you need to make sure the
target actually supports arithmetic right shift of vectors
(you only know it supports left shift and division -- so it might
be sort-of-superfluous to check in case there is no arch that supports
those but not the other).

Richard.

> Thanks,
> James
> 
> ---
> gcc/
> 
> 2017-06-12  James Greenhalgh  
> 
>   * match.pd (A / (1 << B) -> A >> B): New.
> 
> gcc/testsuite/
> 
> 2017-06-12  James Greenhalgh  
> 
>   * gcc.dg/tree-ssa/forwprop-37.c: New.
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


[Patch match.pd] Fold (A / (1 << B)) to (A >> B)

2017-06-12 Thread James Greenhalgh

Hi,

As subject, for the testcase in the patch:

  unsigned long
  f2 (unsigned long a, int b)
  {
unsigned long x = 1UL << b;
return a / x;
  }

We currently generate:

  f2:
mov x2, 1
lsl x1, x2, x1
udivx0, x0, x1
ret

Which could instead be transformed to:

  f2:
lsr x0, x0, x1
ret

OK?

Thanks,
James

---
gcc/

2017-06-12  James Greenhalgh  

* match.pd (A / (1 << B) -> A >> B): New.

gcc/testsuite/

2017-06-12  James Greenhalgh  

* gcc.dg/tree-ssa/forwprop-37.c: New.

diff --git a/gcc/match.pd b/gcc/match.pd
index 54a8e04..656ede2 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -147,6 +147,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (op @0 integer_onep)
 (non_lvalue @0)))
 
+/* For unsigned A: (A / (1 << B)), (A >> B).
+   We can't do the same for signed A, as it might be negative, which would
+   introduce undefined behaviour.  */
+(simplify
+ (trunc_div @0 (lshift integer_onep@1 @2))
+ (if (TYPE_UNSIGNED (type))
+  (rshift @0 @2)))
+
 /* Preserve explicit divisions by 0: the C++ front-end wants to detect
undefined behavior in constexpr evaluation, and assuming that the division
traps enables better optimizations than these anyway.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
new file mode 100644
index 000..dec826c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop1-raw" } */
+
+unsigned int
+f1 (unsigned int a, unsigned int b)
+{
+  unsigned int x = 1U << b;
+  return a / x;
+}
+
+unsigned long
+f2 (unsigned long a, int b)
+{
+  unsigned long x = 1UL << b;
+  return a / x;
+}
+
+unsigned long long
+f3 (unsigned long long a, int b)
+{
+  unsigned long long x = 1ULL << b;
+  return a / x;
+}
+
+/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */