On Fri, May 10, 2024 at 12:55 PM Di Zhao OS
<diz...@os.amperecomputing.com> wrote:
>
> This patch tries to fix pr114760 by checking for the
> variants explicitly. When recognizing bit counting idiom,
> include pattern "x * 2" for "x << 1", and "x / 2" for
> "x >> 1" (given x is unsigned).
>
> Bootstrapped and tested on x86_64-linux-gnu.
>
> Thanks,
> Di Zhao
>
> ---
>
> gcc/ChangeLog:
>         PR tree-optimization/114760
>         * tree-ssa-loop-niter.cc (is_lshift_by_1): New function
>         to check if STMT is equivalent to x << 1.
>         (is_rshift_by_1): New function to check if STMT is
>         equivalent to x >> 1.
>         (number_of_iterations_cltz): Enhance the identification
>         of logical shift by one.
>         (number_of_iterations_cltz_complement): Enhance the
>         identification of logical shift by one.
>
> gcc/testsuite/ChangeLog:
>         PR tree-optimization/114760
>         * gcc.dg/tree-ssa/pr114760-1.c: New test.
>         * gcc.dg/tree-ssa/pr114760-2.c: New test.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c | 69 ++++++++++++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c | 20 +++++++
>  gcc/tree-ssa-loop-niter.cc                 | 56 +++++++++++++-----
>  3 files changed, 131 insertions(+), 14 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c
> new file mode 100644
> index 00000000000..9f10ccc3b51
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c
> @@ -0,0 +1,69 @@
> +/* PR tree-optimization/114760 */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target clz } */
> +/* { dg-require-effective-target ctz } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
> +
> +unsigned
> +ntz32_1 (unsigned x)
> +{
> +  int n = 32;
> +  while (x != 0)
> +    {
> +      n = n - 1;
> +      x = x * 2;
> +    }
> +  return n;
> +}
> +
> +unsigned
> +ntz32_2 (unsigned x)
> +{
> +  int n = 32;
> +  while (x != 0)
> +    {
> +      n = n - 1;
> +      x = x + x;
> +    }
> +  return n;
> +}
> +
> +unsigned
> +ntz32_3 (unsigned x)
> +{
> +  int n = 32;
> +  while (x != 0)
> +    {
> +      n = n - 1;
> +      x = x << 1;
> +    }
> +  return n;
> +}
> +
> +#define PREC (__CHAR_BIT__ * __SIZEOF_INT__)
> +int
> +nlz32_1 (unsigned int b) {
> +    int c = PREC;
> +
> +    while (b != 0) {
> +       b >>= 1;
> +       c --;
> +    }
> +
> +    return c;
> +}
> +
> +int
> +nlz32_2 (unsigned int b) {
> +    int c = PREC;
> +
> +    while (b != 0) {
> +       b /= 2;
> +       c --;
> +    }
> +
> +    return c;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 3 "optimized" } 
> } */
> +/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 2 "optimized" } 
> } */
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c
> new file mode 100644
> index 00000000000..e1b4c4b1338
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c
> @@ -0,0 +1,20 @@
> +/* PR tree-optimization/114760 */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target clz } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
> +
> +// Check that for signed type, there's no CLZ.
> +#define PREC (__CHAR_BIT__ * __SIZEOF_INT__)
> +int
> +no_nlz32 (int b) {
> +    int c = PREC;
> +
> +    while (b != 0) {
> +       b /= 2;
> +       c --;
> +    }
> +
> +    return c;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "__builtin_ctz|\\.CLZ" "optimized" } } */
> \ No newline at end of file
> diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
> index 0fde07e626f..1d99264949b 100644
> --- a/gcc/tree-ssa-loop-niter.cc
> +++ b/gcc/tree-ssa-loop-niter.cc
> @@ -2303,6 +2303,38 @@ build_cltz_expr (tree src, bool leading, bool 
> define_at_zero)
>    return call;
>  }
>
> +/* Returns true if STMT is equivalent to x << 1.  */
> +
> +static bool
> +is_lshift_by_1 (gimple *stmt)

You are checking for gimple-assign before calling these so please
use a 'gassign *' typed argument.

> +{
> +  if (gimple_assign_rhs_code (stmt) == LSHIFT_EXPR
> +      && integer_onep (gimple_assign_rhs2 (stmt)))
> +    return true;
> +  if (gimple_assign_rhs_code (stmt) == MULT_EXPR
> +      && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
> +      && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2)

You need to check for tree_fits_shwi_p (wich also checks for INTEGER_CST)
before using tree_to_shwi.

Ok with this change (to both functions).

Thanks,
Richard.

> +    return true;
> +  return false;
> +}
> +
> +/* Returns true if STMT is equivalent to x >> 1.  */
> +
> +static bool
> +is_rshift_by_1 (gimple *stmt)
> +{
> +  if (!TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (stmt))))
> +    return false;
> +  if (gimple_assign_rhs_code (stmt) == RSHIFT_EXPR
> +      && integer_onep (gimple_assign_rhs2 (stmt)))
> +    return true;
> +  if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR
> +      && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
> +      && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2)
> +    return true;
> +  return false;
> +}
> +
>  /* See comment below for number_of_iterations_bitcount.
>     For c[lt]z, we have:
>
> @@ -2400,14 +2432,12 @@ number_of_iterations_cltz (loop_p loop, edge exit,
>
>    /* Make sure iv_2_stmt is a logical shift by one stmt:
>       iv_2 = iv_1 {<<|>>} 1  */
> -  if (!is_gimple_assign (iv_2_stmt)
> -      || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR
> -         && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR
> -             || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt)))))
> -      || !integer_onep (gimple_assign_rhs2 (iv_2_stmt)))
> +  if (!is_gimple_assign (iv_2_stmt))
> +    return false;
> +  bool left_shift = false;
> +  if (!((left_shift = is_lshift_by_1 (iv_2_stmt))
> +       || is_rshift_by_1 (iv_2_stmt)))
>      return false;
> -
> -  bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR);
>
>    tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
>
> @@ -2516,14 +2546,12 @@ number_of_iterations_cltz_complement (loop_p loop, 
> edge exit,
>
>    /* Make sure iv_2_stmt is a logical shift by one stmt:
>       iv_2 = iv_1 {>>|<<} 1  */
> -  if (!is_gimple_assign (iv_2_stmt)
> -      || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR
> -         && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR
> -             || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt)))))
> -      || !integer_onep (gimple_assign_rhs2 (iv_2_stmt)))
> +  if (!is_gimple_assign (iv_2_stmt))
> +    return false;
> +  bool left_shift = false;
> +  if (!((left_shift = is_lshift_by_1 (iv_2_stmt))
> +       || is_rshift_by_1 (iv_2_stmt)))
>      return false;
> -
> -  bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR);
>
>    tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
>
> --
> 2.25.1
>
>
>

Reply via email to