Re: [PATCH] gimple-fold: extend vector simplification to match scalar bitwise optimizations [PR119196]

Richard Biener Mon, 12 May 2025 06:06:32 -0700

On Mon, 12 May 2025, Icen Zeyada wrote:

>     Generalize existing scalar gimple_fold rules to apply the same
>     bitwise comparison simplifications to vector types.  Previously, an
>     expression like
> 
>         (x < y) && (x > y)
> 
>     would fold to `false` if x and y are scalars, but equivalent vector
>     comparisons were left untouched.  This patch enables folding of
>     patterns of the form
> 
>         (cmp x y) bit_and (cmp x y)
>         (cmp x y) bit_ior (cmp x y)
> 
>     for vector operands as well, ensuring consistent optimization across
>     all data types.
> 
>     PR tree-optimization/119196
> 
>     gcc/ChangeLog:
> 
>       * match.pd: Allow scalar optimizations with bitwise AND/OR to apply to 
> vectors.
> 
>     gcc/testsuite/ChangeLog:
> 
>       * gcc.target/aarch64/vector-compare-5.c: Add new test for vector 
> compare simplification.
> 
> Signed-off-by: Icen Zeyada <icen.zeya...@arm.com>
> ---
>  gcc/match.pd                                  | 19 +++++++--
>  .../gcc.target/aarch64/vector-compare-5.c     | 41 +++++++++++++++++++
>  2 files changed, 56 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index ab496d923cc0..a8a2e01e5e64 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3620,7 +3620,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     (bit_and:c (code1:c@3 @0 @1) (code2:c@4 (convert?@c0 @0) @2))
>     (if ((TREE_CODE (@1) == INTEGER_CST
>        && TREE_CODE (@2) == INTEGER_CST)
> -     || ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +     || ((ANY_INTEGRAL_TYPE_P (TREE_TYPE (@1))
>            || POINTER_TYPE_P (TREE_TYPE (@1)))
>           && bitwise_equal_p (@1, @2)))
>      (with
> @@ -3697,7 +3697,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (bit_and (code1:c@3 @0 @1) (code2:c@4 @0 @2))
>    (if ((TREE_CODE (@1) == INTEGER_CST
>       && TREE_CODE (@2) == INTEGER_CST)
> -       || ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +       || ((ANY_INTEGRAL_TYPE_P (TREE_TYPE (@1))
>           || POINTER_TYPE_P (TREE_TYPE (@1)))
>          && operand_equal_p (@1, @2)))
>     (with
> @@ -3747,7 +3747,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     (bit_ior:c (code1:c@3 @0 @1) (code2:c@4 (convert?@c0 @0) @2))
>     (if ((TREE_CODE (@1) == INTEGER_CST
>        && TREE_CODE (@2) == INTEGER_CST)
> -     || ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +     || ((ANY_INTEGRAL_TYPE_P (TREE_TYPE (@1))
>           || POINTER_TYPE_P (TREE_TYPE (@1)))
>           && bitwise_equal_p (@1, @2)))
>      (with
> @@ -3880,7 +3880,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>       rcmp (eq gt le eq ge lt)
>   (simplify
>    (eq:c (cmp1:c @0 @1) (cmp2 @0 @1))
> -  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0)))
> +  (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +             || POINTER_TYPE_P (TREE_TYPE (@0)))
>      (rcmp @0 @1))))


For all of the above you need to ensure that we can 
expand the vector comparison via expand_vec_cmp_expr_p.

>  /* (type)([0,1]@a != 0) -> (type)a
> @@ -6510,6 +6511,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>               { build_int_cst (integer_type_node, prec - 1);}))))))
>  #endif
>  
> +(for op1 (simple_comparison)
> + (for op2 (simple_comparison)
> +  (for lop (bit_and bit_ior)
> +    (simplify
> +       (lop
> +        (vec_cond (op1 @0 @1) integer_minus_onep@2 integer_zerop@3)
> +        (vec_cond (op2 @0 @1) integer_minus_onep@2 integer_zerop@3))
> +       (with { tree op_type = truth_type_for (TREE_TYPE (@0)); }
> +             (vec_cond (lop:op_type (op1 @0 @1) (op2 @0 @1)) @2 @3))))))
> +

Likewise here, also for the vec_cond via expand_vec_cond_expr_p.  Why
do you compute a "new" op_type here and not re-use that of the
(op1 @0 @1) compares and the compares themselves?  I'd have expected

> +    (simplify
> +       (lop
> +        (vec_cond (op1@00 @0 @1) integer_minus_onep@2 integer_zerop@3) 
> +        (vec_cond (op2@01 @0 @1) integer_minus_onep@2 integer_zerop@3))
          (vec_cond (lop @00 @01) @2 @3)

and why match a comparison at all? Why not

    (simplify
      (lop
       (vec_cond @0 integer_minus_onep@2 integer_zerop@3)
       (vec_cond @1 @2 @3))
      (vec_cond (lop @0 @1) @2 @3))

?

IMO this pattern should be in a separate patch.

>  (for cnd (cond vec_cond)
>   /* (a != b) ? (a - b) : 0 -> (a - b) */
>   (simplify
> diff --git a/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c 
> b/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
> new file mode 100644
> index 000000000000..c4b95a21996d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-additional-options "-fdump-tree-original-all" } */
> +
> +typedef int v4i __attribute__((vector_size(4*sizeof(int))));
> +
> +/* Ensure we can simplify `VEC_COND_EXPR(a OP1 b) OP2 VEC_COND_EXPR(a OP3 b)`
> + * into `VEC_COND_EXPR(a OP4 b)`
> + */
> +
> +void use (v4i const *z);
> +
> +void
> +g (v4i *x, v4i const *y, v4i *z, v4i *t)
> +{
> +  *z = *x > *y | *x == *y; // expect >=
> +  *t = *x > *y | *x <= *y; // expect true
> +}
> +
> +void
> +h (v4i *x, v4i const *y, v4i *z, v4i *t)
> +{
> +  *z = *x <= *y & *x >= *y; // expect x == y
> +  *t = *x <= *y & *x != *y; // expect x<y
> +}
> +
> +void
> +i (v4i *x, v4i const *y, v4i *z, v4i *t)
> +{
> +  *z = *x == *y | *x != *y; // expect true
> +  *t = *x == *y & *x != *y; // expect false
> +}
> +
> +/* { dg-final { scan-tree-dump 
> ".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*>=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
>  "original" } } */
> +/* { dg-final { scan-tree-dump 
> ".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*;" "original" } } */
> +/* { dg-final { scan-tree-dump 
> ".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*==\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
>  "original" } } */
> +/* { dg-final { scan-tree-dump 
> ".*\\*tD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*<\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
>  "original" } } */
> +/* { dg-final { scan-tree-dump 
> ".*\\*zD\\.\\d+\\s*=\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*;" "original" } } */
> +/* { dg-final { scan-tree-dump 
> ".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*;" "original" } } */
> +
> +
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Re: [PATCH] gimple-fold: extend vector simplification to match scalar bitwise optimizations [PR119196]

Reply via email to