On Fri, 17 Nov 2023, Jakub Jelinek wrote:

> Hi!
> 
> ctz(ext(X)) is the same as ctz(X) in the UB on zero case (or could be also
> in the 2 argument case on large BITINT_TYPE by preserving the argument, not
> implemented in this patch),
> popcount(zext(X)) is the same as popcount(X),
> parity(zext(X)) is the same as parity(X),
> parity(sext(X)) is the same as parity(X) provided the bit difference between
> the extended and unextended types is even,
> ffs(ext(X)) is the same as ffs(X).
> 
> The following patch optimizes those in match.pd if those are beneficial
> (always in the large BITINT_TYPE case, or if the narrower type has optab
> and the wider doesn't, or the wider is larger than word and narrower is
> one of the standard argument sizes (tested just int and long long, as
> long is on most targets same bitsize as one of those two).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

> Joseph in the PR mentioned that ctz(narrow(X)) is the same as ctz(X)
> if UB on 0, but that can be handled incrementally (and would need different
> decisions when it is profitable).
> And clz(zext(X)) is clz(X) + bit_difference, but not sure we want to change
> that in match.pd at all, perhaps during insn selection?

Yeah, it would be less canonical for GIMPLE I think.

Richard.

> 2023-11-17  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/112566
>       PR tree-optimization/83171
>       * match.pd (ctz(ext(X)) -> ctz(X), popcount(zext(X)) -> popcount(X),
>       parity(ext(X)) -> parity(X), ffs(ext(X)) -> ffs(X)): New
>       simplifications.
>       ( __builtin_ffs (X) == 0 -> X == 0): Use FFS rather than
>       BUILT_IN_FFS BUILT_IN_FFSL BUILT_IN_FFSLL BUILT_IN_FFSIMAX.
> 
>       * gcc.dg/pr112566-1.c: New test.
>       * gcc.dg/pr112566-2.c: New test.
>       * gcc.target/i386/pr78057.c (foo): Pass another long long argument
>       and use it in __builtin_ia32_*zcnt_u64 instead of the int one.
> 
> --- gcc/match.pd.jj   2023-11-14 10:52:16.190276041 +0100
> +++ gcc/match.pd      2023-11-16 13:09:41.221275374 +0100
> @@ -8672,6 +8672,50 @@ (define_operator_list SYNC_FETCH_AND_AND
>                              wi::shifted_mask (tree_to_uhwi (@1), 1,
>                                                false, prec)); })))))))
>  
> +#if GIMPLE
> +/* ctz(ext(X)) == ctz(X).  Valid just for the UB at zero cases though.  */
> +(simplify
> +  (CTZ (convert@1 @0))
> +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +       && TYPE_PRECISION (TREE_TYPE (@1)) > TYPE_PRECISION (TREE_TYPE (@0)))
> +   (with { combined_fn cfn = CFN_LAST;
> +        tree type0 = TREE_TYPE (@0);
> +        if (TREE_CODE (type0) == BITINT_TYPE)
> +          {
> +            if (TYPE_PRECISION (type0) > MAX_FIXED_MODE_SIZE)
> +              cfn = CFN_CTZ;
> +            else
> +              type0
> +                = build_nonstandard_integer_type (TYPE_PRECISION (type0),
> +                                                  1);
> +          }
> +        type0 = unsigned_type_for (type0);
> +        if (cfn == CFN_LAST
> +            && direct_internal_fn_supported_p (IFN_CTZ, type0,
> +                                               OPTIMIZE_FOR_BOTH))
> +          cfn = CFN_CTZ;
> +        if (cfn == CFN_LAST
> +            && TYPE_PRECISION (TREE_TYPE (@1)) > BITS_PER_WORD
> +            && !direct_internal_fn_supported_p (IFN_CTZ,
> +                                                TREE_TYPE (@1),
> +                                                OPTIMIZE_FOR_BOTH))
> +          {
> +            if (TYPE_PRECISION (type0)
> +                == TYPE_PRECISION (unsigned_type_node))
> +              cfn = CFN_BUILT_IN_CTZ;
> +            else if (TYPE_PRECISION (type0)
> +                     == TYPE_PRECISION (long_long_unsigned_type_node))
> +              cfn = CFN_BUILT_IN_CTZLL;
> +          } }
> +    (if (cfn == CFN_CTZ)
> +     (IFN_CTZ (convert:type0 @0))
> +     (if (cfn == CFN_BUILT_IN_CTZ)
> +      (BUILT_IN_CTZ (convert:type0 @0))
> +      (if (cfn == CFN_BUILT_IN_CTZLL)
> +       (BUILT_IN_CTZLL (convert:type0 @0))))))))
> +#endif
> +
>  /* POPCOUNT simplifications.  */
>  /* popcount(X) + popcount(Y) is popcount(X|Y) when X&Y must be zero.  */
>  (simplify
> @@ -8742,6 +8786,50 @@ (define_operator_list SYNC_FETCH_AND_AND
>             (popcount:s @1))
>        (popcount (log2 @0 @1)))))
>  
> +#if GIMPLE
> +/* popcount(zext(X)) == popcount(X).  */
> +(simplify
> +  (POPCOUNT (convert@1 @0))
> +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +       && TYPE_UNSIGNED (TREE_TYPE (@0))
> +       && TYPE_PRECISION (TREE_TYPE (@1)) > TYPE_PRECISION (TREE_TYPE (@0)))
> +   (with { combined_fn cfn = CFN_LAST;
> +        tree type0 = TREE_TYPE (@0);
> +        if (TREE_CODE (type0) == BITINT_TYPE)
> +          {
> +            if (TYPE_PRECISION (type0) > MAX_FIXED_MODE_SIZE)
> +              cfn = CFN_POPCOUNT;
> +            else
> +              type0
> +                = build_nonstandard_integer_type (TYPE_PRECISION (type0),
> +                                                  1);
> +          }
> +        if (cfn == CFN_LAST
> +            && direct_internal_fn_supported_p (IFN_POPCOUNT, type0,
> +                                               OPTIMIZE_FOR_BOTH))
> +          cfn = CFN_POPCOUNT;
> +        if (cfn == CFN_LAST
> +            && TYPE_PRECISION (TREE_TYPE (@1)) > BITS_PER_WORD
> +            && !direct_internal_fn_supported_p (IFN_POPCOUNT,
> +                                                TREE_TYPE (@1),
> +                                                OPTIMIZE_FOR_BOTH))
> +          {
> +            if (TYPE_PRECISION (type0)
> +                == TYPE_PRECISION (unsigned_type_node))
> +              cfn = CFN_BUILT_IN_POPCOUNT;
> +            else if (TYPE_PRECISION (type0)
> +                     == TYPE_PRECISION (long_long_unsigned_type_node))
> +              cfn = CFN_BUILT_IN_POPCOUNTLL;
> +          } }
> +    (if (cfn == CFN_POPCOUNT)
> +     (IFN_POPCOUNT (convert:type0 @0))
> +     (if (cfn == CFN_BUILT_IN_POPCOUNT)
> +      (BUILT_IN_POPCOUNT (convert:type0 @0))
> +      (if (cfn == CFN_BUILT_IN_POPCOUNTLL)
> +       (BUILT_IN_POPCOUNTLL (convert:type0 @0))))))))
> +#endif
> +
>  /* PARITY simplifications.  */
>  /* parity(~X) is parity(X).  */
>  (simplify
> @@ -8780,6 +8868,54 @@ (define_operator_list SYNC_FETCH_AND_AND
>    (bit_xor (PARITY:s @0) (PARITY:s @1))
>    (PARITY (bit_xor @0 @1)))
>  
> +#if GIMPLE
> +/* parity(zext(X)) == parity(X).  */
> +/* parity(sext(X)) == parity(X) if the difference in precision is even.  */
> +(simplify
> +  (PARITY (convert@1 @0))
> +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +       && TYPE_PRECISION (TREE_TYPE (@1)) > TYPE_PRECISION (TREE_TYPE (@0))
> +       && (TYPE_UNSIGNED (TREE_TYPE (@0))
> +        || ((TYPE_PRECISION (TREE_TYPE (@1))
> +             - TYPE_PRECISION (TREE_TYPE (@0))) & 1) == 0))
> +   (with { combined_fn cfn = CFN_LAST;
> +        tree type0 = TREE_TYPE (@0);
> +        if (TREE_CODE (type0) == BITINT_TYPE)
> +          {
> +            if (TYPE_PRECISION (type0) > MAX_FIXED_MODE_SIZE)
> +              cfn = CFN_PARITY;
> +            else
> +              type0
> +                = build_nonstandard_integer_type (TYPE_PRECISION (type0),
> +                                                  1);
> +          }
> +        type0 = unsigned_type_for (type0);
> +        if (cfn == CFN_LAST
> +            && direct_internal_fn_supported_p (IFN_PARITY, type0,
> +                                               OPTIMIZE_FOR_BOTH))
> +          cfn = CFN_PARITY;
> +        if (cfn == CFN_LAST
> +            && TYPE_PRECISION (TREE_TYPE (@1)) > BITS_PER_WORD
> +            && !direct_internal_fn_supported_p (IFN_PARITY,
> +                                                TREE_TYPE (@1),
> +                                                OPTIMIZE_FOR_BOTH))
> +          {
> +            if (TYPE_PRECISION (type0)
> +                == TYPE_PRECISION (unsigned_type_node))
> +              cfn = CFN_BUILT_IN_PARITY;
> +            else if (TYPE_PRECISION (type0)
> +                     == TYPE_PRECISION (long_long_unsigned_type_node))
> +              cfn = CFN_BUILT_IN_PARITYLL;
> +          } }
> +    (if (cfn == CFN_PARITY)
> +     (IFN_PARITY (convert:type0 @0))
> +     (if (cfn == CFN_BUILT_IN_PARITY)
> +      (BUILT_IN_PARITY (convert:type0 @0))
> +      (if (cfn == CFN_BUILT_IN_PARITYLL)
> +       (BUILT_IN_PARITYLL (convert:type0 @0))))))))
> +#endif
> +
>  /* a != 0 ? FUN(a) : 0 -> Fun(a) for some builtin functions. */
>  (for func (POPCOUNT BSWAP FFS PARITY)
>   (simplify
> @@ -8987,8 +9123,7 @@ (define_operator_list SYNC_FETCH_AND_AND
>     (plus (CTZ:type (convert:utype @0)) { build_one_cst (type); }))))
>  #endif
>  
> -(for ffs (BUILT_IN_FFS BUILT_IN_FFSL BUILT_IN_FFSLL
> -       BUILT_IN_FFSIMAX)
> +(for ffs (FFS)
>   /* __builtin_ffs (X) == 0 -> X == 0.
>      __builtin_ffs (X) == 6 -> (X & 63) == 32.  */
>   (for cmp (eq ne)
> @@ -9036,6 +9171,50 @@ (define_operator_list SYNC_FETCH_AND_AND
>                    { build_zero_cst (TREE_TYPE (@0)); }))))))))
>  
>  #if GIMPLE
> +/* ffs(ext(X)) == ffs(X).  */
> +(simplify
> +  (FFS (convert@1 @0))
> +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +       && TYPE_PRECISION (TREE_TYPE (@1)) > TYPE_PRECISION (TREE_TYPE (@0)))
> +   (with { combined_fn cfn = CFN_LAST;
> +        tree type0 = TREE_TYPE (@0);
> +        if (TREE_CODE (type0) == BITINT_TYPE)
> +          {
> +            if (TYPE_PRECISION (type0) > MAX_FIXED_MODE_SIZE)
> +              cfn = CFN_FFS;
> +            else
> +              type0
> +                = build_nonstandard_integer_type (TYPE_PRECISION (type0),
> +                                                  0);
> +          }
> +        type0 = signed_type_for (type0);
> +        if (cfn == CFN_LAST
> +            && direct_internal_fn_supported_p (IFN_FFS, type0,
> +                                               OPTIMIZE_FOR_BOTH))
> +          cfn = CFN_FFS;
> +        if (cfn == CFN_LAST
> +            && TYPE_PRECISION (TREE_TYPE (@1)) > BITS_PER_WORD
> +            && !direct_internal_fn_supported_p (IFN_FFS,
> +                                                TREE_TYPE (@1),
> +                                                OPTIMIZE_FOR_BOTH))
> +          {
> +            if (TYPE_PRECISION (type0)
> +                == TYPE_PRECISION (integer_type_node))
> +              cfn = CFN_BUILT_IN_FFS;
> +            else if (TYPE_PRECISION (type0)
> +                     == TYPE_PRECISION (long_long_integer_type_node))
> +              cfn = CFN_BUILT_IN_FFSLL;
> +          } }
> +    (if (cfn == CFN_FFS)
> +     (IFN_FFS (convert:type0 @0))
> +     (if (cfn == CFN_BUILT_IN_FFS)
> +      (BUILT_IN_FFS (convert:type0 @0))
> +      (if (cfn == CFN_BUILT_IN_FFSLL)
> +       (BUILT_IN_FFSLL (convert:type0 @0))))))))
> +#endif
> +
> +#if GIMPLE
>  
>  /* Simplify:
>       a = op a1
> --- gcc/testsuite/gcc.dg/pr112566-1.c.jj      2023-11-16 13:29:23.063761320 
> +0100
> +++ gcc/testsuite/gcc.dg/pr112566-1.c 2023-11-16 13:35:14.459852714 +0100
> @@ -0,0 +1,14 @@
> +/* PR tree-optimization/112566 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-additional-options "-mbmi2 -mlzcnt -mpopcnt" { target i?86-*-* 
> x86_64-*-* } } */
> +/* { dg-final { scan-tree-dump-not "ll \\\(" "optimized" { target ia32 } } } 
> */
> +/* { dg-final { scan-tree-dump-not "\\\(long long (unsigned )?int\\\)" 
> "optimized" { target ia32 } } } */
> +
> +int foo (unsigned int x) { return __builtin_ctzll (x); }
> +int bar (unsigned int x) { return __builtin_popcountll (x); }
> +int baz (unsigned int x) { return __builtin_parityll (x); }
> +int qux (int x) { return __builtin_ffsll (x); }
> +int corge (int x) { return __builtin_ctzll (x); }
> +int garply (int x) { return __builtin_parityll (x); }
> +int fred (unsigned int x) { return __builtin_ffsll (x); }
> --- gcc/testsuite/gcc.dg/pr112566-2.c.jj      2023-11-16 13:32:30.854138101 
> +0100
> +++ gcc/testsuite/gcc.dg/pr112566-2.c 2023-11-16 13:34:14.727687104 +0100
> @@ -0,0 +1,12 @@
> +/* PR tree-optimization/112566 */
> +/* { dg-do compile { target bitint575 } } */
> +/* { dg-options "-O2 -fdump-tree-ccp2" } */
> +/* { dg-final { scan-tree-dump-not "\\\((unsigned )?_BitInt\\\(512\\\)\\\)" 
> "ccp2" } } */
> +
> +int foo (unsigned _BitInt(256) x) { return __builtin_ctzg ((unsigned 
> _BitInt(512)) x); }
> +int bar (unsigned _BitInt(256) x) { return __builtin_popcountg ((unsigned 
> _BitInt(512)) x); }
> +int baz (unsigned _BitInt(256) x) { return __builtin_parityg ((unsigned 
> _BitInt(512)) x); }
> +int qux (_BitInt(256) x) { return __builtin_ffsg ((_BitInt(512)) x); }
> +int corge (_BitInt(256) x) { return __builtin_ctzg ((unsigned _BitInt(512)) 
> x); }
> +int garply (_BitInt(256) x) { return __builtin_parityg ((unsigned 
> _BitInt(512)) x); }
> +int fred (unsigned _BitInt(256) x) { return __builtin_ffsg ((_BitInt(512)) 
> x); }
> --- gcc/testsuite/gcc.target/i386/pr78057.c.jj        2020-01-14 
> 20:02:47.876593462 +0100
> +++ gcc/testsuite/gcc.target/i386/pr78057.c   2023-11-17 14:27:39.120666182 
> +0100
> @@ -5,7 +5,7 @@
>  extern void link_error (void);
>  
>  int
> -foo (int x)
> +foo (int x, long long y)
>  {
>    if (__builtin_ia32_tzcnt_u16 (16) != 4
>        || __builtin_ia32_tzcnt_u16 (0) != 16
> @@ -24,13 +24,14 @@ foo (int x)
>       )
>      link_error ();
>    x += 2;
> -  if (x == 0)
> +  y += 2;
> +  if (x == 0 || y == 0)
>      return 5;
>    return __builtin_ia32_tzcnt_u32 (x)
>           + __builtin_ia32_lzcnt_u32 (x)
>  #ifdef __x86_64__
> -      + __builtin_ia32_tzcnt_u64 (x)
> -      + __builtin_ia32_lzcnt_u64 (x)
> +      + __builtin_ia32_tzcnt_u64 (y)
> +      + __builtin_ia32_lzcnt_u64 (y)
>  #endif
>        ;
>  }
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to