https://gcc.gnu.org/g:421f09e24c9c82a9ff6a61dd0b65687175c90bff

commit r17-709-g421f09e24c9c82a9ff6a61dd0b65687175c90bff
Author: liuhongt <[email protected]>
Date:   Thu May 14 02:06:43 2026 -0700

    match.pd: Fold popcount(x ^ (x - 1)) to ctz(x) + 1 [PR124630]
    
    When X is nonzero, X ^ (X - 1) produces a mask of trailing zeros plus
    the lowest set bit, so popcount of that expression equals ctz(X) + 1.
    
    gcc/ChangeLog:
    
            PR middle-end/124630
            * match.pd (popcount (x ^ (x - 1))): Fold to ctz (x) + 1 when
            x is nonzero and CTZ is directly supported.
    
    gcc/testsuite/ChangeLog:
    
            PR middle-end/124630
            * gcc.dg/pr124630.c: New test.
            * gcc.target/i386/pr124630.c: New test.

Diff:
---
 gcc/match.pd                             | 10 ++++++++++
 gcc/testsuite/gcc.dg/pr124630.c          | 16 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr124630.c | 12 ++++++++++++
 3 files changed, 38 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index d7704e78851d..0be4eff818b2 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -10424,6 +10424,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       (BUILT_IN_POPCOUNT (convert:type0 @0))
       (if (cfn == CFN_BUILT_IN_POPCOUNTLL)
        (BUILT_IN_POPCOUNTLL (convert:type0 @0))))))))
+
+/* popcount (X ^ (X - 1)) is the same as ffs(x) when x is nonzero, and
+   using ctz+1 will generate better code.  */
+(simplify
+ (POPCOUNT (bit_xor:c tree_expr_nonzero_p@0 (plus @0 integer_minus_onep)))
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+      && direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (@0),
+                                        OPTIMIZE_FOR_SPEED))
+  (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
+   (plus (CTZ:type (convert:utype @0)) { build_one_cst (type); }))))
 #endif
 
 /* PARITY simplifications.  */
diff --git a/gcc/testsuite/gcc.dg/pr124630.c b/gcc/testsuite/gcc.dg/pr124630.c
new file mode 100644
index 000000000000..fde8ce97c67f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr124630.c
@@ -0,0 +1,16 @@
+/* PR middle-end/124630 */
+/* { dg-do compile } */
+/* { dg-require-effective-target ctz } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned
+foo (unsigned a)
+{
+  if (a != 0)
+    return __builtin_popcount (a ^ (a - 1)) - 1;
+  else
+    return __CHAR_BIT__ * __SIZEOF_INT__;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } 
*/
+/* { dg-final { scan-tree-dump-not "__builtin_popcount|\\.POPCOUNT" 
"optimized" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr124630.c 
b/gcc/testsuite/gcc.target/i386/pr124630.c
new file mode 100644
index 000000000000..440aede39fa5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr124630.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi" } */
+/* { dg-final { scan-assembler-not "blsmsk" } } */
+/* { dg-final { scan-assembler-not "cmove" } } */
+
+unsigned foo (unsigned a)
+{
+    if (a != 0)
+     return __builtin_popcount (a ^ (a - 1)) - 1;
+    else
+     return 32;
+}

Reply via email to