On Fri, May 10, 2024 at 12:55 PM Di Zhao OS <diz...@os.amperecomputing.com> wrote: > > This patch tries to fix pr114760 by checking for the > variants explicitly. When recognizing bit counting idiom, > include pattern "x * 2" for "x << 1", and "x / 2" for > "x >> 1" (given x is unsigned). > > Bootstrapped and tested on x86_64-linux-gnu. > > Thanks, > Di Zhao > > --- > > gcc/ChangeLog: > PR tree-optimization/114760 > * tree-ssa-loop-niter.cc (is_lshift_by_1): New function > to check if STMT is equivalent to x << 1. > (is_rshift_by_1): New function to check if STMT is > equivalent to x >> 1. > (number_of_iterations_cltz): Enhance the identification > of logical shift by one. > (number_of_iterations_cltz_complement): Enhance the > identification of logical shift by one. > > gcc/testsuite/ChangeLog: > PR tree-optimization/114760 > * gcc.dg/tree-ssa/pr114760-1.c: New test. > * gcc.dg/tree-ssa/pr114760-2.c: New test. > --- > gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c | 69 ++++++++++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c | 20 +++++++ > gcc/tree-ssa-loop-niter.cc | 56 +++++++++++++----- > 3 files changed, 131 insertions(+), 14 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c > new file mode 100644 > index 00000000000..9f10ccc3b51 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c > @@ -0,0 +1,69 @@ > +/* PR tree-optimization/114760 */ > +/* { dg-do compile } */ > +/* { dg-require-effective-target clz } */ > +/* { dg-require-effective-target ctz } */ > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > + > +unsigned > +ntz32_1 (unsigned x) > +{ > + int n = 32; > + while (x != 0) > + { > + n = n - 1; > + x = x * 2; > + } > + return n; > +} > + > +unsigned > +ntz32_2 (unsigned x) > +{ > + int n = 32; > + while (x != 0) > + { > + n = n - 1; > + x = x + x; > + } > + return n; > +} > + > +unsigned > +ntz32_3 (unsigned x) > +{ > + int n = 32; > + while (x != 0) > + { > + n = n - 1; > + x = x << 1; > + } > + return n; > +} > + > +#define PREC (__CHAR_BIT__ * __SIZEOF_INT__) > +int > +nlz32_1 (unsigned int b) { > + int c = PREC; > + > + while (b != 0) { > + b >>= 1; > + c --; > + } > + > + return c; > +} > + > +int > +nlz32_2 (unsigned int b) { > + int c = PREC; > + > + while (b != 0) { > + b /= 2; > + c --; > + } > + > + return c; > +} > + > +/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 3 "optimized" } > } */ > +/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 2 "optimized" } > } */ > \ No newline at end of file > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c > new file mode 100644 > index 00000000000..e1b4c4b1338 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c > @@ -0,0 +1,20 @@ > +/* PR tree-optimization/114760 */ > +/* { dg-do compile } */ > +/* { dg-require-effective-target clz } */ > +/* { dg-options "-O3 -fdump-tree-optimized" } */ > + > +// Check that for signed type, there's no CLZ. > +#define PREC (__CHAR_BIT__ * __SIZEOF_INT__) > +int > +no_nlz32 (int b) { > + int c = PREC; > + > + while (b != 0) { > + b /= 2; > + c --; > + } > + > + return c; > +} > + > +/* { dg-final { scan-tree-dump-not "__builtin_ctz|\\.CLZ" "optimized" } } */ > \ No newline at end of file > diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc > index 0fde07e626f..1d99264949b 100644 > --- a/gcc/tree-ssa-loop-niter.cc > +++ b/gcc/tree-ssa-loop-niter.cc > @@ -2303,6 +2303,38 @@ build_cltz_expr (tree src, bool leading, bool > define_at_zero) > return call; > } > > +/* Returns true if STMT is equivalent to x << 1. */ > + > +static bool > +is_lshift_by_1 (gimple *stmt)
You are checking for gimple-assign before calling these so please use a 'gassign *' typed argument. > +{ > + if (gimple_assign_rhs_code (stmt) == LSHIFT_EXPR > + && integer_onep (gimple_assign_rhs2 (stmt))) > + return true; > + if (gimple_assign_rhs_code (stmt) == MULT_EXPR > + && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST > + && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2) You need to check for tree_fits_shwi_p (wich also checks for INTEGER_CST) before using tree_to_shwi. Ok with this change (to both functions). Thanks, Richard. > + return true; > + return false; > +} > + > +/* Returns true if STMT is equivalent to x >> 1. */ > + > +static bool > +is_rshift_by_1 (gimple *stmt) > +{ > + if (!TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (stmt)))) > + return false; > + if (gimple_assign_rhs_code (stmt) == RSHIFT_EXPR > + && integer_onep (gimple_assign_rhs2 (stmt))) > + return true; > + if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR > + && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST > + && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2) > + return true; > + return false; > +} > + > /* See comment below for number_of_iterations_bitcount. > For c[lt]z, we have: > > @@ -2400,14 +2432,12 @@ number_of_iterations_cltz (loop_p loop, edge exit, > > /* Make sure iv_2_stmt is a logical shift by one stmt: > iv_2 = iv_1 {<<|>>} 1 */ > - if (!is_gimple_assign (iv_2_stmt) > - || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR > - && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR > - || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt))))) > - || !integer_onep (gimple_assign_rhs2 (iv_2_stmt))) > + if (!is_gimple_assign (iv_2_stmt)) > + return false; > + bool left_shift = false; > + if (!((left_shift = is_lshift_by_1 (iv_2_stmt)) > + || is_rshift_by_1 (iv_2_stmt))) > return false; > - > - bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR); > > tree iv_1 = gimple_assign_rhs1 (iv_2_stmt); > > @@ -2516,14 +2546,12 @@ number_of_iterations_cltz_complement (loop_p loop, > edge exit, > > /* Make sure iv_2_stmt is a logical shift by one stmt: > iv_2 = iv_1 {>>|<<} 1 */ > - if (!is_gimple_assign (iv_2_stmt) > - || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR > - && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR > - || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt))))) > - || !integer_onep (gimple_assign_rhs2 (iv_2_stmt))) > + if (!is_gimple_assign (iv_2_stmt)) > + return false; > + bool left_shift = false; > + if (!((left_shift = is_lshift_by_1 (iv_2_stmt)) > + || is_rshift_by_1 (iv_2_stmt))) > return false; > - > - bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR); > > tree iv_1 = gimple_assign_rhs1 (iv_2_stmt); > > -- > 2.25.1 > > >