This patch complements one from June 12th which is still awaiting
review: https://gcc.gnu.org/pipermail/gcc-patches/2020-June/547937.html

This patch optimizes popcount and parity of an argument known to have
at most a single bit set, to be that single bit.  Hence, popcount(x&8)
is simplified to (x>>3)&1.   This generalizes the existing optimization
of popcount(x&1) being simplified to x&1, which is moved with this
patch to avoid a duplicate pattern warning in match.pd.

This patch has been tested on x86_64-pc-linux-gnu with a "make bootstrap"
and "make -k check" with no new failures.  If this is approved after
(or at the same time) as the patch above, I'm happy to resolve the
conflicts and retest before committing.


2020-07-20  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        * match.pd (popcount(x) -> x>>C): New simplification.

gcc/testsuite
        * gcc.dg/fold-popcount-5.c: New test.
        * gcc.dg/fold-parity-5.c: Likewise.


Ok for mainline?
Thanks in advance,
Roger
--
Roger Sayle
NextMove Software
Cambridge, UK

diff --git a/gcc/match.pd b/gcc/match.pd
index c6ae7a7..0b3b626 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5966,11 +5966,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* POPCOUNT simplifications.  */
 (for popcount (BUILT_IN_POPCOUNT BUILT_IN_POPCOUNTL BUILT_IN_POPCOUNTLL
               BUILT_IN_POPCOUNTIMAX)
-  /* popcount(X&1) is nop_expr(X&1).  */
-  (simplify
-    (popcount @0)
-    (if (tree_nonzero_bits (@0) == 1)
-      (convert @0)))
   /* popcount(X) + popcount(Y) is popcount(X|Y) when X&Y must be zero.  */
   (simplify
     (plus (popcount:s @0) (popcount:s @1))
@@ -5983,6 +5978,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       (cmp (popcount @0) integer_zerop)
       (rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
 
+/* popcount(X&C1) is (X>>C2)&1 when C1 == 1<<C2.  Same for parity(X&C1).  */
+(for pfun (BUILT_IN_POPCOUNT BUILT_IN_PARITY
+          BUILT_IN_POPCOUNTL BUILT_IN_PARITYL
+          BUILT_IN_POPCOUNTLL BUILT_IN_PARITYLL
+          BUILT_IN_POPCOUNTIMAX BUILT_IN_PARITYIMAX)
+  (simplify
+    (pfun @0)
+    (with { wide_int nz = tree_nonzero_bits (@0); }
+      (switch
+       (if (nz == 1)
+         (convert @0))
+       (if (wi::popcount (nz) == 1)
+         (with { tree utype = unsigned_type_for (TREE_TYPE (@0));
+                 int bits = wi::ctz (nz); }
+           (convert (rshift:utype (convert:utype @0)
+                                  { build_int_cst (utype, bits); }))))))))
+
 #if GIMPLE
 /* 64- and 32-bits branchless implementations of popcount are detected:
 
diff --git a/gcc/testsuite/gcc.dg/fold-popcount-5.c 
b/gcc/testsuite/gcc.dg/fold-popcount-5.c
new file mode 100644
index 0000000..943726f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-popcount-5.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int test_and4(unsigned int a)
+{
+  return __builtin_popcount(a&4);
+}
+
+int test_and4l(unsigned long b)
+{
+  return __builtin_popcountl(b&4);
+}
+
+int test_and4ll(unsigned long long c)
+{
+  return __builtin_popcountll(c&4);
+}
+
+int test_shift(unsigned int d)
+{
+  int bits = 8*sizeof(unsigned int)-1;
+  return __builtin_popcount(d<<31);
+}
+
+int test_shiftl(unsigned long e)
+{
+  int bits = 8*sizeof(unsigned long)-1;
+  return __builtin_popcountl(e<<bits);
+}
+
+int test_shiftll(unsigned long long f)
+{
+  int bits = 8*sizeof(unsigned long long)-1;
+  return __builtin_popcountll(f<<bits);
+}
+
+/* { dg-final { scan-tree-dump-times "popcount" 0 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/fold-parity-5.c 
b/gcc/testsuite/gcc.dg/fold-parity-5.c
new file mode 100644
index 0000000..69d3a6a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-parity-5.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int test_and4(unsigned int a)
+{
+  return __builtin_parity(a&4);
+}
+
+int test_and4l(unsigned long b)
+{
+  return __builtin_parityl(b&4);
+}
+
+int test_and4ll(unsigned long long c)
+{
+  return __builtin_parityll(c&4);
+}
+
+int test_shift(unsigned int d)
+{
+  int bits = 8*sizeof(unsigned int)-1;
+  return __builtin_parity(d<<31);
+}
+
+int test_shiftl(unsigned long e)
+{
+  int bits = 8*sizeof(unsigned long)-1;
+  return __builtin_parityl(e<<bits);
+}
+
+int test_shiftll(unsigned long long f)
+{
+  int bits = 8*sizeof(unsigned long long)-1;
+  return __builtin_parityll(f<<bits);
+}
+
+/* { dg-final { scan-tree-dump-times "parity" 0 "optimized" } } */
+

Reply via email to