Re: [PATCH 5/8 v2] middle-end: Add cltz_complement idiom recognition

2023-01-19 Thread Richard Biener via Gcc-patches
On Thu, Jan 19, 2023 at 10:19 AM Jan-Benedict Glaw  wrote:
>
> On Thu, 2022-12-22 17:42:16 +, Andrew Carlotti via Gcc-patches 
>  wrote:
> > New patch below, bootstrapped and regression tested on
> > aarch64-unknown-linux-gnu and x86_64-pc-linux-gnu - ok to merge?
>
> > diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
> > index 
> > fece876099c1687569d6351e7d2416ea6acae5b5..ce2441f2a6dbdf2d8fe42755d5d1abd8a631bb5c
> >  100644
> > --- a/gcc/tree-ssa-loop-niter.cc
> > +++ b/gcc/tree-ssa-loop-niter.cc
> > @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "tree-chrec.h"
> >  #include "tree-scalar-evolution.h"
> >  #include "tree-dfa.h"
> > +#include "internal-fn.h"
> >  #include "gimple-range.h"
> >
> >
> > @@ -2198,6 +2199,224 @@ number_of_iterations_popcount (loop_p loop, edge 
> > exit,
> >return true;
> >  }
> >
> > +/* Return an expression that counts the leading/trailing zeroes of src.
> > +
> > +   If define_at_zero is true, then the built expression will be defined to
> > +   return the precision of src when src == 0 (using either a conditional
> > +   expression or a suitable internal function).
> > +   Otherwise, we can elide the conditional expression and let src = 0 
> > invoke
> > +   undefined behaviour.  */
> > +
> > +static tree
> > +build_cltz_expr (tree src, bool leading, bool define_at_zero)
> > +{
> [...]
> > +
> > +  tree call;
> > +  if (use_ifn)
> > +{
> > +  call = build_call_expr_internal_loc (UNKNOWN_LOCATION, ifn,
> > +integer_type_node, 1, src);
> > +  int val;
> > +  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
>  
>
> This will give us a new unused variable warning.

I wonder if hardening the defaults.h macros like

#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  (((MODE), (VALUE)), 0)

fixes that and makes sense (also to avoid losing side-effects for the arguments)

Richard.


> > +  int optab_defined_at_zero
> > + = leading ? CLZ_DEFINED_VALUE_AT_ZERO (mode, val)
> > +   : CTZ_DEFINED_VALUE_AT_ZERO (mode, val);
> > +  if (define_at_zero && !(optab_defined_at_zero == 2 && val == prec))
> > + {
> > +   tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src,
> > +   build_zero_cst (TREE_TYPE (src)));
> > +   call = fold_build3(COND_EXPR, integer_type_node, is_zero, call,
> > +  build_int_cst (integer_type_node, prec));
> > + }
> > +}
>
> MfG, JBG
>
> --


Re: [PATCH 5/8 v2] middle-end: Add cltz_complement idiom recognition

2023-01-19 Thread Jan-Benedict Glaw
On Thu, 2022-12-22 17:42:16 +, Andrew Carlotti via Gcc-patches 
 wrote:
> New patch below, bootstrapped and regression tested on
> aarch64-unknown-linux-gnu and x86_64-pc-linux-gnu - ok to merge?

> diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
> index 
> fece876099c1687569d6351e7d2416ea6acae5b5..ce2441f2a6dbdf2d8fe42755d5d1abd8a631bb5c
>  100644
> --- a/gcc/tree-ssa-loop-niter.cc
> +++ b/gcc/tree-ssa-loop-niter.cc
> @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-chrec.h"
>  #include "tree-scalar-evolution.h"
>  #include "tree-dfa.h"
> +#include "internal-fn.h"
>  #include "gimple-range.h"
>  
>  
> @@ -2198,6 +2199,224 @@ number_of_iterations_popcount (loop_p loop, edge exit,
>return true;
>  }
>  
> +/* Return an expression that counts the leading/trailing zeroes of src.
> +
> +   If define_at_zero is true, then the built expression will be defined to
> +   return the precision of src when src == 0 (using either a conditional
> +   expression or a suitable internal function).
> +   Otherwise, we can elide the conditional expression and let src = 0 invoke
> +   undefined behaviour.  */
> +
> +static tree
> +build_cltz_expr (tree src, bool leading, bool define_at_zero)
> +{
[...]
> +
> +  tree call;
> +  if (use_ifn)
> +{
> +  call = build_call_expr_internal_loc (UNKNOWN_LOCATION, ifn,
> +integer_type_node, 1, src);
> +  int val;
> +  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
 

This will give us a new unused variable warning.

> +  int optab_defined_at_zero
> + = leading ? CLZ_DEFINED_VALUE_AT_ZERO (mode, val)
> +   : CTZ_DEFINED_VALUE_AT_ZERO (mode, val);
> +  if (define_at_zero && !(optab_defined_at_zero == 2 && val == prec))
> + {
> +   tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src,
> +   build_zero_cst (TREE_TYPE (src)));
> +   call = fold_build3(COND_EXPR, integer_type_node, is_zero, call,
> +  build_int_cst (integer_type_node, prec));
> + }
> +}

MfG, JBG

-- 


signature.asc
Description: PGP signature


Re: [PATCH 5/8 v2] middle-end: Add cltz_complement idiom recognition

2023-01-12 Thread Richard Biener via Gcc-patches
On Thu, Dec 22, 2022 at 6:42 PM Andrew Carlotti  wrote:
>
> On Thu, Nov 24, 2022 at 11:41:31AM +0100, Richard Biener wrote:
> > Note we do have CTZ and CLZ
> > optabs and internal functions - in case there's a HImode CLZ this
> > could be elided.  More general you can avoid using the __builtin_
> > functions with their fixed types in favor of using IFN_C[TL]Z which
> > are type agnostic (but require optab support - you should be able
> > to check this via direct_internal_fn_supported_p).
>
> IFN support added. I've also renamed the defined_at_zero parameter to
> define_at_zero, since this is a request for the expression to define it,
> rather than a guarantee that it is already defined.
>
> New patch below, bootstrapped and regression tested on
> aarch64-unknown-linux-gnu and x86_64-pc-linux-gnu - ok to merge?

OK, and sorry for the delay.

Richard.

> ---
>
> This recognises patterns of the form:
> while (n) { n >>= 1 }
>
> This patch results in improved (but still suboptimal) codegen:
>
> foo (unsigned int b) {
> int c = 0;
>
> while (b) {
> b >>= 1;
> c++;
> }
>
> return c;
> }
>
> foo:
> .LFB11:
> .cfi_startproc
> cbz w0, .L3
> clz w1, w0
> tst x0, 1
> mov w0, 32
> sub w0, w0, w1
> cselw0, w0, wzr, ne
> ret
>
> The conditional is unnecessary. phiopt could recognise a redundant csel
> (using cond_removal_in_builtin_zero_pattern) when one of the inputs is a
> clz call, but it cannot recognise the redunancy when the input is (e.g.)
> (32 - clz).
>
> I could perhaps extend this function to recognise this pattern in a later
> patch, if this is a good place to recognise more patterns.
>
> gcc/ChangeLog:
>
> PR tree-optimization/94793
> * tree-scalar-evolution.cc (expression_expensive_p): Add checks
> for c[lt]z optabs.
> * tree-ssa-loop-niter.cc (build_cltz_expr): New.
> (number_of_iterations_cltz_complement): New.
> (number_of_iterations_bitcount): Add call to the above.
>
> gcc/testsuite/ChangeLog:
>
> * lib/target-supports.exp (check_effective_target_clz)
> (check_effective_target_clzl, check_effective_target_clzll)
> (check_effective_target_ctz, check_effective_target_clzl)
> (check_effective_target_ctzll): New.
> * gcc.dg/tree-ssa/cltz-complement-max.c: New test.
> * gcc.dg/tree-ssa/clz-complement-char.c: New test.
> * gcc.dg/tree-ssa/clz-complement-int.c: New test.
> * gcc.dg/tree-ssa/clz-complement-long-long.c: New test.
> * gcc.dg/tree-ssa/clz-complement-long.c: New test.
> * gcc.dg/tree-ssa/ctz-complement-char.c: New test.
> * gcc.dg/tree-ssa/ctz-complement-int.c: New test.
> * gcc.dg/tree-ssa/ctz-complement-long-long.c: New test.
> * gcc.dg/tree-ssa/ctz-complement-long.c: New test.
>
> ---
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cltz-complement-max.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/cltz-complement-max.c
> new file mode 100644
> index 
> ..1a29ca52e42e50822e4e3213b2cb008b766d0318
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/cltz-complement-max.c
> @@ -0,0 +1,60 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-loop-optimize -fdump-tree-optimized" } */
> +
> +#define PREC (__CHAR_BIT__)
> +
> +int clz_complement_count1 (unsigned char b) {
> +int c = 0;
> +
> +while (b) {
> +   b >>= 1;
> +   c++;
> +}
> +if (c <= PREC)
> +  return 0;
> +else
> +  return 34567;
> +}
> +
> +int clz_complement_count2 (unsigned char b) {
> +int c = 0;
> +
> +while (b) {
> +   b >>= 1;
> +   c++;
> +}
> +if (c <= PREC - 1)
> +  return 0;
> +else
> +  return 76543;
> +}
> +
> +int ctz_complement_count1 (unsigned char b) {
> +int c = 0;
> +
> +while (b) {
> +   b <<= 1;
> +   c++;
> +}
> +if (c <= PREC)
> +  return 0;
> +else
> +  return 23456;
> +}
> +
> +int ctz_complement_count2 (unsigned char b) {
> +int c = 0;
> +
> +while (b) {
> +   b <<= 1;
> +   c++;
> +}
> +if (c <= PREC - 1)
> +  return 0;
> +else
> +  return 65432;
> +}
> +/* { dg-final { scan-tree-dump-times "34567" 0 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "76543" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "23456" 0 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "65432" 1 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/clz-complement-char.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/clz-complement-char.c
> new file mode 100644
> index 
> ..2ebe8fabcaf0ce88f3a6a46e9ba4ba79b7d3672e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/clz-complement-char.c
> @@ -0,0 +1,31 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target clz } */
> +/* { dg-options "-O2 

[PATCH 5/8 v2] middle-end: Add cltz_complement idiom recognition

2022-12-22 Thread Andrew Carlotti via Gcc-patches
On Thu, Nov 24, 2022 at 11:41:31AM +0100, Richard Biener wrote:
> Note we do have CTZ and CLZ
> optabs and internal functions - in case there's a HImode CLZ this
> could be elided.  More general you can avoid using the __builtin_
> functions with their fixed types in favor of using IFN_C[TL]Z which
> are type agnostic (but require optab support - you should be able
> to check this via direct_internal_fn_supported_p).

IFN support added. I've also renamed the defined_at_zero parameter to
define_at_zero, since this is a request for the expression to define it,
rather than a guarantee that it is already defined.

New patch below, bootstrapped and regression tested on
aarch64-unknown-linux-gnu and x86_64-pc-linux-gnu - ok to merge?

---

This recognises patterns of the form:
while (n) { n >>= 1 }

This patch results in improved (but still suboptimal) codegen:

foo (unsigned int b) {
int c = 0;

while (b) {
b >>= 1;
c++;
}

return c;
}

foo:
.LFB11:
.cfi_startproc
cbz w0, .L3
clz w1, w0
tst x0, 1
mov w0, 32
sub w0, w0, w1
cselw0, w0, wzr, ne
ret

The conditional is unnecessary. phiopt could recognise a redundant csel
(using cond_removal_in_builtin_zero_pattern) when one of the inputs is a
clz call, but it cannot recognise the redunancy when the input is (e.g.)
(32 - clz).

I could perhaps extend this function to recognise this pattern in a later
patch, if this is a good place to recognise more patterns.

gcc/ChangeLog:

PR tree-optimization/94793
* tree-scalar-evolution.cc (expression_expensive_p): Add checks
for c[lt]z optabs.
* tree-ssa-loop-niter.cc (build_cltz_expr): New.
(number_of_iterations_cltz_complement): New.
(number_of_iterations_bitcount): Add call to the above.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp (check_effective_target_clz)
(check_effective_target_clzl, check_effective_target_clzll)
(check_effective_target_ctz, check_effective_target_clzl)
(check_effective_target_ctzll): New.
* gcc.dg/tree-ssa/cltz-complement-max.c: New test.
* gcc.dg/tree-ssa/clz-complement-char.c: New test.
* gcc.dg/tree-ssa/clz-complement-int.c: New test.
* gcc.dg/tree-ssa/clz-complement-long-long.c: New test.
* gcc.dg/tree-ssa/clz-complement-long.c: New test.
* gcc.dg/tree-ssa/ctz-complement-char.c: New test.
* gcc.dg/tree-ssa/ctz-complement-int.c: New test.
* gcc.dg/tree-ssa/ctz-complement-long-long.c: New test.
* gcc.dg/tree-ssa/ctz-complement-long.c: New test.

---

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cltz-complement-max.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cltz-complement-max.c
new file mode 100644
index 
..1a29ca52e42e50822e4e3213b2cb008b766d0318
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cltz-complement-max.c
@@ -0,0 +1,60 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-loop-optimize -fdump-tree-optimized" } */
+
+#define PREC (__CHAR_BIT__)
+
+int clz_complement_count1 (unsigned char b) {
+int c = 0;
+
+while (b) {
+   b >>= 1;
+   c++;
+}
+if (c <= PREC)
+  return 0;
+else
+  return 34567;
+}
+
+int clz_complement_count2 (unsigned char b) {
+int c = 0;
+
+while (b) {
+   b >>= 1;
+   c++;
+}
+if (c <= PREC - 1)
+  return 0;
+else
+  return 76543;
+}
+
+int ctz_complement_count1 (unsigned char b) {
+int c = 0;
+
+while (b) {
+   b <<= 1;
+   c++;
+}
+if (c <= PREC)
+  return 0;
+else
+  return 23456;
+}
+
+int ctz_complement_count2 (unsigned char b) {
+int c = 0;
+
+while (b) {
+   b <<= 1;
+   c++;
+}
+if (c <= PREC - 1)
+  return 0;
+else
+  return 65432;
+}
+/* { dg-final { scan-tree-dump-times "34567" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "76543" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "23456" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "65432" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/clz-complement-char.c 
b/gcc/testsuite/gcc.dg/tree-ssa/clz-complement-char.c
new file mode 100644
index 
..2ebe8fabcaf0ce88f3a6a46e9ba4ba79b7d3672e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/clz-complement-char.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-require-effective-target clz } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+#define PREC (__CHAR_BIT__)
+
+int
+__attribute__ ((noinline, noclone))
+foo (unsigned char b) {
+int c = 0;
+
+while (b) {
+   b >>= 1;
+   c++;
+}
+
+return c;
+}
+
+int main()
+{
+  if (foo(0) != 0)
+__builtin_abort ();
+  if (foo(5) != 3)
+__builtin_abort ();
+  if (foo(255) != 8)
+__builtin_abort ();
+  return 0;
+}
+
+/* { dg-final {