fold_binary_loc assumed that if the type of the result wasn't a vector, the operands wouldn't be either. This isn't necessarily true for EQ_EXPR and NE_EXPR of vector masks, which can return a single scalar for the mask as a whole.
Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64-linux-gnu. OK to install? Richard 2017-11-06 Richard Sandiford <richard.sandif...@linaro.org> Alan Hayward <alan.hayw...@arm.com> David Sherwood <david.sherw...@arm.com> gcc/ * fold-const.c (fold_binary_loc): Check the argument types rather than the result type when testing for a vector operation. gcc/testsuite/ * gcc.target/aarch64/sve_vec_bool_cmp_1.c: New test. * gcc.target/aarch64/sve_vec_bool_cmp_1_run.c: Likweise. Index: gcc/fold-const.c =================================================================== --- gcc/fold-const.c 2017-11-06 15:26:35.441288554 +0000 +++ gcc/fold-const.c 2017-11-06 15:26:35.595423552 +0000 @@ -9392,7 +9392,7 @@ fold_binary_loc (location_t loc, if ((code == BIT_AND_EXPR || code == BIT_IOR_EXPR || code == EQ_EXPR || code == NE_EXPR) - && TREE_CODE (type) != VECTOR_TYPE + && TREE_CODE (TREE_TYPE (arg0)) != VECTOR_TYPE && ((truth_value_p (TREE_CODE (arg0)) && (truth_value_p (TREE_CODE (arg1)) || (TREE_CODE (arg1) == BIT_AND_EXPR Index: gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c =================================================================== --- /dev/null 2017-11-03 22:04:05.605699023 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c 2017-11-06 15:26:35.595423552 +0000 @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> +#include <stdbool.h> + +#define VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE) \ +void __attribute__ ((noinline, noclone)) \ +vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \ + INDUCTYPE start, \ + INDUCTYPE n, \ + INDUCTYPE mask) \ +{ \ + for (INDUCTYPE i = 0; i < n; i++) \ + { \ + bool lhs = i >= start; \ + bool rhs = (i & mask) != 0x3D; \ + if (lhs OP rhs) \ + dst[i] = src[i]; \ + } \ +} + +#define TEST_OP(T, NAME, OP) \ + T (NAME, OP, uint8_t, uint8_t) \ + T (NAME, OP, uint16_t, uint16_t) \ + T (NAME, OP, uint32_t, uint32_t) \ + T (NAME, OP, uint64_t, uint64_t) \ + T (NAME, OP, float, uint32_t) \ + T (NAME, OP, double, uint64_t) + +#define TEST_ALL(T) \ + TEST_OP (T, cmpeq, ==) \ + TEST_OP (T, cmpne, !=) + +TEST_ALL (VEC_BOOL) + +/* Both cmpne and cmpeq loops will contain an exclusive predicate or. */ +/* { dg-final { scan-assembler-times {\teors?\tp[0-9]*\.b, p[0-7]/z, p[0-9]*\.b, p[0-9]*\.b\n} 12 } } */ +/* cmpeq will also contain a predicate not operation. */ +/* { dg-final { scan-assembler-times {\tnot\tp[0-9]*\.b, p[0-7]/z, p[0-9]*\.b\n} 6 } } */ Index: gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c =================================================================== --- /dev/null 2017-11-03 22:04:05.605699023 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c 2017-11-06 15:26:35.595423552 +0000 @@ -0,0 +1,37 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve" } */ + +#include "sve_vec_bool_cmp_1.c" + +#define N 103 + +#define TEST_VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE) \ +{ \ + INDUCTYPE i; \ + VARTYPE src[N]; \ + VARTYPE dst[N]; \ + for (i = 0; i < N; i++) \ + { \ + src[i] = i; \ + dst[i] = i * 2; \ + asm volatile ("" ::: "memory"); \ + } \ + vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (dst, src, 13, \ + 97, 0xFF); \ + for (i = 0; i < 13; i++) \ + if (dst[i] != (VARTYPE) (0 OP 1 ? i : i * 2)) \ + __builtin_abort (); \ + for (i = 13; i < 97; i++) \ + if (dst[i] != (VARTYPE) (1 OP (i != 0x3D) ? i : i * 2)) \ + __builtin_abort (); \ + for (i = 97; i < N; i++) \ + if (dst[i] != (i * 2)) \ + __builtin_abort (); \ +} + +int __attribute__ ((optimize (1))) +main () +{ + TEST_ALL (TEST_VEC_BOOL) + return 0; +}