Generalize existing scalar gimple_fold rules to apply the same bitwise comparison simplifications to vector types. Previously, an expression like
(x < y) && (x > y) would fold to `false` if x and y are scalars, but equivalent vector comparisons were left untouched. This patch enables folding of patterns of the form (cmp x y) bit_and (cmp x y) (cmp x y) bit_ior (cmp x y) for vector operands as well, ensuring consistent optimization across all data types. PR tree-optimization/119196 gcc/ChangeLog: * match.pd: Allow scalar optimizations with bitwise AND/OR to apply to vectors. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vector-compare-5.c: Add new test for vector compare simplification. Signed-off-by: Icen Zeyada <icen.zeya...@arm.com> --- gcc/match.pd | 11 ++++- .../gcc.target/aarch64/vector-compare-5.c | 49 +++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vector-compare-5.c diff --git a/gcc/match.pd b/gcc/match.pd index da60d6a22290..cf1bf3749853 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3635,6 +3635,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if ((TREE_CODE (@1) == INTEGER_CST && TREE_CODE (@2) == INTEGER_CST) || ((INTEGRAL_TYPE_P (TREE_TYPE (@1)) + || (VECTOR_TYPE_P (TREE_TYPE (@1)) + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2)) || POINTER_TYPE_P (TREE_TYPE (@1))) && bitwise_equal_p (@1, @2))) (with @@ -3712,6 +3714,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if ((TREE_CODE (@1) == INTEGER_CST && TREE_CODE (@2) == INTEGER_CST) || ((INTEGRAL_TYPE_P (TREE_TYPE (@1)) + || (VECTOR_TYPE_P (TREE_TYPE (@1)) + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2)) || POINTER_TYPE_P (TREE_TYPE (@1))) && operand_equal_p (@1, @2))) (with @@ -3762,6 +3766,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if ((TREE_CODE (@1) == INTEGER_CST && TREE_CODE (@2) == INTEGER_CST) || ((INTEGRAL_TYPE_P (TREE_TYPE (@1)) + || (VECTOR_TYPE_P (TREE_TYPE (@1)) + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2)) || POINTER_TYPE_P (TREE_TYPE (@1))) && bitwise_equal_p (@1, @2))) (with @@ -3894,7 +3900,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) rcmp (eq gt le eq ge lt) (simplify (eq:c (cmp1:c @0 @1) (cmp2 @0 @1)) - (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0))) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + || POINTER_TYPE_P (TREE_TYPE (@0)) + || (VECTOR_TYPE_P (TREE_TYPE (@0)) + && expand_vec_cmp_expr_p (TREE_TYPE (@0), type, rcmp))) (rcmp @0 @1)))) /* (type)([0,1]@a != 0) -> (type)a diff --git a/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c b/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c new file mode 100644 index 000000000000..59ab56c4e255 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c @@ -0,0 +1,49 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-original-all" } */ + +typedef int v4i __attribute__((vector_size(4*sizeof(int)))); + +/* Ensure we can simplify `VEC_COND_EXPR(a OP1 b) OP2 VEC_COND_EXPR(a OP3 b)` + * into `VEC_COND_EXPR(a OP4 b)` + */ + +void use (v4i const *z); + +void +g (v4i *x, v4i const *y, v4i *z, v4i *t) +{ + *z = *x > *y | *x == *y; // expect >= + *t = *x > *y | *x <= *y; // expect true +} + +void +h (v4i *x, v4i const *y, v4i *z, v4i *t) +{ + *z = *x <= *y & *x >= *y; // expect x == y + *t = *x <= *y & *x != *y; // expect x<y +} + +void +i (v4i *x, v4i const *y, v4i *z, v4i *t) +{ + *z = *x == *y | *x != *y; // expect true + *t = *x == *y & *x != *y; // expect false +} + +void +k (v4i *x, v4i const *y, v4i *z, v4i *t) +{ + *z = *x < *y | *x == *y; // x <= y + *t = *x < *y & *x > *y; // expect false +} + + +/* { dg-final { scan-tree-dump ".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*>=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;" "original" } } */ +/* { dg-final { scan-tree-dump ".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*;" "original" } } */ +/* { dg-final { scan-tree-dump ".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*==\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;" "original" } } */ +/* { dg-final { scan-tree-dump ".*\\*tD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*<\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;" "original" } } */ +/* { dg-final { scan-tree-dump ".*\\*zD\\.\\d+\\s*=\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*;" "original" } } */ +/* { dg-final { scan-tree-dump ".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*;" "original" } } */ +/* { dg-final { scan-tree-dump ".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*<=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;" "original" } } */ +/* { dg-final { scan-tree-dump ".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*;" "original" } } */ -- 2.43.0