When X is nonzero, X ^ (X - 1) produces a mask of trailing zeros plus
the lowest set bit, so popcount of that expression equals ctz(X) + 1.
Folding to CTZ avoids the blsmsk+popcnt (and a cmove for the zero
case) sequence on targets with a direct CTZ.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

        PR middle-end/124630
        * match.pd (popcount (x ^ (x - 1))): Fold to ctz (x) + 1 when
        x is nonzero and CTZ is directly supported.

gcc/testsuite/ChangeLog:

        PR middle-end/124630
        * gcc.target/i386/pr124630.c: New test.
---
 gcc/match.pd                             | 11 +++++++++++
 gcc/testsuite/gcc.target/i386/pr124630.c | 12 ++++++++++++
 2 files changed, 23 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr124630.c

diff --git a/gcc/match.pd b/gcc/match.pd
index b037b1a2876..f963dcb0f58 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -10377,6 +10377,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       (BUILT_IN_POPCOUNT (convert:type0 @0))
       (if (cfn == CFN_BUILT_IN_POPCOUNTLL)
        (BUILT_IN_POPCOUNTLL (convert:type0 @0))))))))
+
+/* popcount (X ^ (X - 1)) is CTZ (X) + 1 when X is nonzero.  */
+(simplify
+  (POPCOUNT (bit_xor:c tree_expr_nonzero_p@0
+                     (plus @0 integer_minus_onep)))
+  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+       && direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (@0),
+                                         OPTIMIZE_FOR_SPEED))
+   (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
+    (plus (CTZ:type (convert:utype @0))
+         { build_int_cst (integer_type_node, 1); }))))
 #endif
 
 /* PARITY simplifications.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr124630.c 
b/gcc/testsuite/gcc.target/i386/pr124630.c
new file mode 100644
index 00000000000..440aede39fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr124630.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi" } */
+/* { dg-final { scan-assembler-not "blsmsk" } } */
+/* { dg-final { scan-assembler-not "cmove" } } */
+
+unsigned foo (unsigned a)
+{
+    if (a != 0)
+     return __builtin_popcount (a ^ (a - 1)) - 1;
+    else
+     return 32;
+}
-- 
2.34.1

Reply via email to