On Fri, May 22, 2026 at 12:14 AM liuhongt <[email protected]> wrote:
>
> > Can you format it the same as the FFS pattern though:
> Changed, use 1 space for indent, not 2 spaces
>
> > And add a comment saying:
> > `popcount (X ^ (X - 1))` is the same as ffs(x) when x is nonzero. and
> > using ctz+1 will generate better code.
> Added.
>
> > Please add a generic testcase and not just a x86_64 specific one.
> > In this case you can use ctz target supports which checks if ctz will
> > cause a call or not.
> Added a new testcase under gcc.dg
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?

Ok.

>
> When X is nonzero, X ^ (X - 1) produces a mask of trailing zeros plus
> the lowest set bit, so popcount of that expression equals ctz(X) + 1.
>
> gcc/ChangeLog:
>
>         PR middle-end/124630
>         * match.pd (popcount (x ^ (x - 1))): Fold to ctz (x) + 1 when
>         x is nonzero and CTZ is directly supported.
>
> gcc/testsuite/ChangeLog:
>
>         PR middle-end/124630
>         * gcc.dg/pr124630.c: New test.
>         * gcc.target/i386/pr124630.c: New test.
> ---
>  gcc/match.pd                             | 10 ++++++++++
>  gcc/testsuite/gcc.dg/pr124630.c          | 16 ++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr124630.c | 12 ++++++++++++
>  3 files changed, 38 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr124630.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr124630.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index b037b1a2876..298b769fcc0 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -10377,6 +10377,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>        (BUILT_IN_POPCOUNT (convert:type0 @0))
>        (if (cfn == CFN_BUILT_IN_POPCOUNTLL)
>         (BUILT_IN_POPCOUNTLL (convert:type0 @0))))))))
> +
> +/* popcount (X ^ (X - 1)) is the same as ffs(x) when x is nonzero, and
> +   using ctz+1 will generate better code.  */
> +(simplify
> + (POPCOUNT (bit_xor:c tree_expr_nonzero_p@0 (plus @0 integer_minus_onep)))
> + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +      && direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (@0),
> +                                        OPTIMIZE_FOR_SPEED))
> +  (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
> +   (plus (CTZ:type (convert:utype @0)) { build_one_cst (type); }))))
>  #endif
>
>  /* PARITY simplifications.  */
> diff --git a/gcc/testsuite/gcc.dg/pr124630.c b/gcc/testsuite/gcc.dg/pr124630.c
> new file mode 100644
> index 00000000000..fde8ce97c67
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr124630.c
> @@ -0,0 +1,16 @@
> +/* PR middle-end/124630 */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target ctz } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +unsigned
> +foo (unsigned a)
> +{
> +  if (a != 0)
> +    return __builtin_popcount (a ^ (a - 1)) - 1;
> +  else
> +    return __CHAR_BIT__ * __SIZEOF_INT__;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } 
> } */
> +/* { dg-final { scan-tree-dump-not "__builtin_popcount|\\.POPCOUNT" 
> "optimized" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr124630.c 
> b/gcc/testsuite/gcc.target/i386/pr124630.c
> new file mode 100644
> index 00000000000..440aede39fa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr124630.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mbmi" } */
> +/* { dg-final { scan-assembler-not "blsmsk" } } */
> +/* { dg-final { scan-assembler-not "cmove" } } */
> +
> +unsigned foo (unsigned a)
> +{
> +    if (a != 0)
> +     return __builtin_popcount (a ^ (a - 1)) - 1;
> +    else
> +     return 32;
> +}
> --
> 2.34.1
>

Reply via email to