https://gcc.gnu.org/g:421f09e24c9c82a9ff6a61dd0b65687175c90bff
commit r17-709-g421f09e24c9c82a9ff6a61dd0b65687175c90bff Author: liuhongt <[email protected]> Date: Thu May 14 02:06:43 2026 -0700 match.pd: Fold popcount(x ^ (x - 1)) to ctz(x) + 1 [PR124630] When X is nonzero, X ^ (X - 1) produces a mask of trailing zeros plus the lowest set bit, so popcount of that expression equals ctz(X) + 1. gcc/ChangeLog: PR middle-end/124630 * match.pd (popcount (x ^ (x - 1))): Fold to ctz (x) + 1 when x is nonzero and CTZ is directly supported. gcc/testsuite/ChangeLog: PR middle-end/124630 * gcc.dg/pr124630.c: New test. * gcc.target/i386/pr124630.c: New test. Diff: --- gcc/match.pd | 10 ++++++++++ gcc/testsuite/gcc.dg/pr124630.c | 16 ++++++++++++++++ gcc/testsuite/gcc.target/i386/pr124630.c | 12 ++++++++++++ 3 files changed, 38 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index d7704e78851d..0be4eff818b2 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -10424,6 +10424,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (BUILT_IN_POPCOUNT (convert:type0 @0)) (if (cfn == CFN_BUILT_IN_POPCOUNTLL) (BUILT_IN_POPCOUNTLL (convert:type0 @0)))))))) + +/* popcount (X ^ (X - 1)) is the same as ffs(x) when x is nonzero, and + using ctz+1 will generate better code. */ +(simplify + (POPCOUNT (bit_xor:c tree_expr_nonzero_p@0 (plus @0 integer_minus_onep))) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (@0), + OPTIMIZE_FOR_SPEED)) + (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); } + (plus (CTZ:type (convert:utype @0)) { build_one_cst (type); })))) #endif /* PARITY simplifications. */ diff --git a/gcc/testsuite/gcc.dg/pr124630.c b/gcc/testsuite/gcc.dg/pr124630.c new file mode 100644 index 000000000000..fde8ce97c67f --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr124630.c @@ -0,0 +1,16 @@ +/* PR middle-end/124630 */ +/* { dg-do compile } */ +/* { dg-require-effective-target ctz } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +unsigned +foo (unsigned a) +{ + if (a != 0) + return __builtin_popcount (a ^ (a - 1)) - 1; + else + return __CHAR_BIT__ * __SIZEOF_INT__; +} + +/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-not "__builtin_popcount|\\.POPCOUNT" "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr124630.c b/gcc/testsuite/gcc.target/i386/pr124630.c new file mode 100644 index 000000000000..440aede39fa5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr124630.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbmi" } */ +/* { dg-final { scan-assembler-not "blsmsk" } } */ +/* { dg-final { scan-assembler-not "cmove" } } */ + +unsigned foo (unsigned a) +{ + if (a != 0) + return __builtin_popcount (a ^ (a - 1)) - 1; + else + return 32; +}
