This simple patch to match.pd optimizes away bit permutation
operations, specifically bswap and rotate, in calls to popcount and
parity.  Although this patch has been developed and tested on LP64,
it relies on there being no truncations or extensions to "marry up"
the appropriate PARITY, PARITYL and PARITYLL forms with either BSWAP32
or BSWAP64, assuming this transformation won't fire if the integral
types have different sizes.

The following patch has been tested on x86_64-pc-linux-gnu with
"make bootstrap" and "make -k check" with no new failures.
Ok for mainline?

2020-08-21  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        * gcc/match.pd (popcount(bswapN(x)) -> popcount(x),
        popcount(rotate(x)) -> popcount(x), parity(bswapN(x)) -> parity(x),
        parity(rotate(x)) -> parity(x)): New simplifications.

gcc/testsuite/ChangeLog
        * gcc.dg/fold-popcount-6.c: New test.
        * gcc.dg/fold-popcount-7.c: New test.
        * gcc.dg/fold-parity-6.c: New test.
        * gcc.dg/fold-parity-7.c: New test.

Thanks in advance,
Roger
--
Roger Sayle
NextMove Software
Cambridge, UK

diff --git a/gcc/match.pd b/gcc/match.pd
index c3b8816..7e8a893 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6060,12 +6060,40 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (bit_and (POPCOUNT @0) integer_onep)
   (PARITY @0))
 
+/* Rely on no conversion to marry POPCOUNT, POPCOUNTL and POPCOUNTLL.  */
+(simplify
+  (POPCOUNT (BUILT_IN_BSWAP32 @0))
+  (POPCOUNT @0))
+(simplify
+  (POPCOUNT (BUILT_IN_BSWAP64 @0))
+  (POPCOUNT @0))
+
+(for popcount (POPCOUNT)
+  (for rot (lrotate rrotate)
+    (simplify
+      (popcount (rot @0 @1))
+      (popcount @0))))
+
 /* PARITY simplifications.  */
 /* parity(~X) is parity(X).  */
 (simplify
   (PARITY (bit_not @0))
   (PARITY @0))
 
+/* Rely on no conversion to marry PARITY, PARITYL and PARITYLL.  */
+(simplify
+  (PARITY (BUILT_IN_BSWAP32 @0))
+  (PARITY @0))
+(simplify
+  (PARITY (BUILT_IN_BSWAP64 @0))
+  (PARITY @0))
+
+(for parity (PARITY)
+  (for rot (lrotate rrotate)
+    (simplify
+      (parity (rot @0 @1))
+      (parity @0))))
+
 /* parity(X)^parity(Y) is parity(X^Y).  */
 (simplify
   (bit_xor (PARITY:s @0) (PARITY:s @1))
diff --git a/gcc/testsuite/gcc.dg/fold-popcount-6.c 
b/gcc/testsuite/gcc.dg/fold-popcount-6.c
new file mode 100644
index 0000000..37b55a1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-popcount-6.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned int foo(unsigned int x)
+{
+  if (sizeof(unsigned int) == 4)
+    return __builtin_popcount (__builtin_bswap32(x));
+  return x;
+}
+
+unsigned int bar(unsigned long x)
+{
+  if (sizeof(unsigned long) == 8)
+    return __builtin_popcountl (__builtin_bswap64(x));
+  if (sizeof(unsigned long) == 4)
+    return __builtin_popcountl (__builtin_bswap32(x));
+  return x;
+}
+
+/* { dg-final { scan-tree-dump-times "bswap" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-popcount-7.c 
b/gcc/testsuite/gcc.dg/fold-popcount-7.c
new file mode 100644
index 0000000..fdcefe1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-popcount-7.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned int foo(unsigned int x)
+{
+  if (sizeof(unsigned int) == 4) {
+    unsigned int y = (x>>4) | (x<<28);
+    return __builtin_popcount(y);
+  } else return x;
+}
+
+unsigned int bar(unsigned long x)
+{
+  if (sizeof(unsigned long) == 8) {
+    unsigned long y = (x>>4) | (x<<60);
+    return __builtin_popcountl (y);
+  } else if(sizeof(unsigned long) == 4) {
+    unsigned long y = (x>>4) | (x<<28);
+    return __builtin_popcountl (y);
+  } else return (unsigned int)x;
+}
+
+/* { dg-final { scan-tree-dump-times " r>> " 0 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/fold-parity-6.c 
b/gcc/testsuite/gcc.dg/fold-parity-6.c
new file mode 100644
index 0000000..ece0048
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-parity-6.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned int foo(unsigned int x)
+{
+  if (sizeof(unsigned int) == 4)
+    return __builtin_parity (__builtin_bswap32(x));
+  return x;
+}
+
+unsigned int bar(unsigned long x)
+{
+  if (sizeof(unsigned long) == 8)
+    return __builtin_parityl (__builtin_bswap64(x));
+  if (sizeof(unsigned long) == 4)
+    return __builtin_parityl (__builtin_bswap32(x));
+  return (unsigned int)x;
+}
+
+/* { dg-final { scan-tree-dump-times "bswap" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-parity-7.c 
b/gcc/testsuite/gcc.dg/fold-parity-7.c
new file mode 100644
index 0000000..9b5085b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-parity-7.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned int foo(unsigned int x)
+{
+  if (sizeof(unsigned int) == 4) {
+    unsigned int y = (x>>4) | (x<<28);
+    return __builtin_parity (y);
+  } else return x;
+}
+
+unsigned int bar(unsigned long x)
+{
+  if (sizeof(unsigned long) == 8) {
+    unsigned long y = (x>>4) | (x<<60);
+    return __builtin_parityl (y);
+  } else if (sizeof(unsigned long) == 4) {
+    unsigned long y = (x>>4) | (x<<28);
+    return __builtin_parityl (y);
+  } else return (unsigned int)x;
+}
+
+/* { dg-final { scan-tree-dump-times " r>> " 0 "optimized" } } */
+

Reply via email to